diff options
Diffstat (limited to 'arch')
218 files changed, 6730 insertions, 2263 deletions
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index a1057c2d95e7..3641ec1452f4 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -62,6 +62,9 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/alpha/include/asm/statfs.h b/arch/alpha/include/asm/statfs.h index de35cd438a10..ccd2e186bfd8 100644 --- a/arch/alpha/include/asm/statfs.h +++ b/arch/alpha/include/asm/statfs.h @@ -1,6 +1,8 @@ #ifndef _ALPHA_STATFS_H #define _ALPHA_STATFS_H +#include <linux/types.h> + /* Alpha is the only 64-bit platform with 32-bit statfs. And doesn't even seem to implement statfs64 */ #define __statfs_word __u32 diff --git a/arch/alpha/include/asm/swab.h b/arch/alpha/include/asm/swab.h index 68e7089e02d5..4d682b16c7c4 100644 --- a/arch/alpha/include/asm/swab.h +++ b/arch/alpha/include/asm/swab.h @@ -1,7 +1,7 @@ #ifndef _ALPHA_SWAB_H #define _ALPHA_SWAB_H -#include <asm/types.h> +#include <linux/types.h> #include <linux/compiler.h> #include <asm/compiler.h> diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index e4a54b615894..b45d913a51c3 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -903,8 +903,9 @@ sys_alpha_pipe: stq $26, 0($sp) .prologue 0 + mov $31, $17 lda $16, 8($sp) - jsr $26, do_pipe + jsr $26, do_pipe_flags ldq $26, 0($sp) bne $0, 1f diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 703731accda6..d3812eb84015 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -90,7 +90,7 @@ show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(irq)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[irq]); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j)); #endif seq_printf(p, " %14s", irq_desc[irq].chip->typename); seq_printf(p, " %c%s", diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index e16aeb6e79ef..67c19f8a9944 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -64,7 +64,7 @@ do_entInt(unsigned long type, unsigned long vector, smp_percpu_timer_interrupt(regs); cpu = smp_processor_id(); if (cpu != boot_cpuid) { - kstat_cpu(cpu).irqs[RTC_IRQ]++; + kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ)); } else { handle_irq(RTC_IRQ); } diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index ae41f097864b..42ee05981e71 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -46,8 +46,6 @@ #include <asm/hwrpb.h> #include <asm/processor.h> -extern int do_pipe(int *); - /* * Brk needs to return an error. Still support Linux's brk(0) query idiom, * which OSF programs just shouldn't be doing. We're still not quite diff --git a/arch/arm/include/asm/a.out.h b/arch/arm/include/asm/a.out.h index 79489fdcc8b8..083894b2e3bc 100644 --- a/arch/arm/include/asm/a.out.h +++ b/arch/arm/include/asm/a.out.h @@ -2,7 +2,7 @@ #define __ARM_A_OUT_H__ #include <linux/personality.h> -#include <asm/types.h> +#include <linux/types.h> struct exec { diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index f2cd18a0932b..ee1304f22f94 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -14,7 +14,7 @@ #ifndef __ASMARM_SETUP_H #define __ASMARM_SETUP_H -#include <asm/types.h> +#include <linux/types.h> #define COMMAND_LINE_SIZE 1024 diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index 6817be9573a6..537de4e0ef50 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h index 27a689be0856..ca2bf2f6d6ea 100644 --- a/arch/arm/include/asm/swab.h +++ b/arch/arm/include/asm/swab.h @@ -16,7 +16,7 @@ #define __ASM_ARM_SWAB_H #include <linux/compiler.h> -#include <asm/types.h> +#include <linux/types.h> #if !defined(__STRICT_ANSI__) || defined(__KERNEL__) # define __SWAB_64_THRU_32__ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 363db186cb93..7296f0416286 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%3d: ", i); for_each_present_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-"); seq_printf(p, " %s", action->name); for (action = action->next; action; action = action->next) diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index b3404b7775b3..0d2074f51a59 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -231,14 +231,17 @@ static struct platform_device kirkwood_switch_device = { void __init kirkwood_ge00_switch_init(struct dsa_platform_data *d, int irq) { + int i; + if (irq != NO_IRQ) { kirkwood_switch_resources[0].start = irq; kirkwood_switch_resources[0].end = irq; kirkwood_switch_device.num_resources = 1; } - d->mii_bus = &kirkwood_ge00_shared.dev; d->netdev = &kirkwood_ge00.dev; + for (i = 0; i < d->nr_chips; i++) + d->chip[i].mii_bus = &kirkwood_ge00_shared.dev; kirkwood_switch_device.dev.platform_data = d; platform_device_register(&kirkwood_switch_device); diff --git a/arch/arm/mach-kirkwood/rd88f6281-setup.c b/arch/arm/mach-kirkwood/rd88f6281-setup.c index 9a0e905d10cd..e1c0516c4df3 100644 --- a/arch/arm/mach-kirkwood/rd88f6281-setup.c +++ b/arch/arm/mach-kirkwood/rd88f6281-setup.c @@ -75,7 +75,7 @@ static struct mv643xx_eth_platform_data rd88f6281_ge00_data = { .duplex = DUPLEX_FULL, }; -static struct dsa_platform_data rd88f6281_switch_data = { +static struct dsa_chip_data rd88f6281_switch_chip_data = { .port_names[0] = "lan1", .port_names[1] = "lan2", .port_names[2] = "lan3", @@ -83,6 +83,11 @@ static struct dsa_platform_data rd88f6281_switch_data = { .port_names[5] = "cpu", }; +static struct dsa_platform_data rd88f6281_switch_plat_data = { + .nr_chips = 1, + .chip = &rd88f6281_switch_chip_data, +}; + static struct mv643xx_eth_platform_data rd88f6281_ge01_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(11), }; @@ -105,12 +110,12 @@ static void __init rd88f6281_init(void) kirkwood_ge00_init(&rd88f6281_ge00_data); kirkwood_pcie_id(&dev, &rev); if (rev == MV88F6281_REV_A0) { - rd88f6281_switch_data.sw_addr = 10; + rd88f6281_switch_chip_data.sw_addr = 10; kirkwood_ge01_init(&rd88f6281_ge01_data); } else { - rd88f6281_switch_data.port_names[4] = "wan"; + rd88f6281_switch_chip_data.port_names[4] = "wan"; } - kirkwood_ge00_switch_init(&rd88f6281_switch_data, NO_IRQ); + kirkwood_ge00_switch_init(&rd88f6281_switch_plat_data, NO_IRQ); kirkwood_rtc_init(); kirkwood_sata_init(&rd88f6281_sata_data); diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c index 22e0eb6e9ec4..feb0e54a91de 100644 --- a/arch/arm/mach-ns9xxx/irq.c +++ b/arch/arm/mach-ns9xxx/irq.c @@ -63,7 +63,6 @@ static struct irq_chip ns9xxx_chip = { #else static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) { - unsigned int cpu = smp_processor_id(); struct irqaction *action; irqreturn_t action_ret; @@ -72,7 +71,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) BUG_ON(desc->status & IRQ_INPROGRESS); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); - kstat_cpu(cpu).irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, desc); action = desc->action; if (unlikely(!action || (desc->status & IRQ_DISABLED))) diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 8a0e49d84256..68cc3efae567 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -31,6 +31,7 @@ #include <plat/ehci-orion.h> #include <plat/mv_xor.h> #include <plat/orion_nand.h> +#include <plat/orion5x_wdt.h> #include <plat/time.h> #include "common.h" @@ -219,14 +220,17 @@ static struct platform_device orion5x_switch_device = { void __init orion5x_eth_switch_init(struct dsa_platform_data *d, int irq) { + int i; + if (irq != NO_IRQ) { orion5x_switch_resources[0].start = irq; orion5x_switch_resources[0].end = irq; orion5x_switch_device.num_resources = 1; } - d->mii_bus = &orion5x_eth_shared.dev; d->netdev = &orion5x_eth.dev; + for (i = 0; i < d->nr_chips; i++) + d->chip[i].mii_bus = &orion5x_eth_shared.dev; orion5x_switch_device.dev.platform_data = d; platform_device_register(&orion5x_switch_device); @@ -533,6 +537,29 @@ void __init orion5x_xor_init(void) /***************************************************************************** + * Watchdog + ****************************************************************************/ +static struct orion5x_wdt_platform_data orion5x_wdt_data = { + .tclk = 0, +}; + +static struct platform_device orion5x_wdt_device = { + .name = "orion5x_wdt", + .id = -1, + .dev = { + .platform_data = &orion5x_wdt_data, + }, + .num_resources = 0, +}; + +void __init orion5x_wdt_init(void) +{ + orion5x_wdt_data.tclk = orion5x_tclk; + platform_device_register(&orion5x_wdt_device); +} + + +/***************************************************************************** * Time handling ****************************************************************************/ int orion5x_tclk; @@ -631,6 +658,11 @@ void __init orion5x_init(void) printk(KERN_INFO "Orion: Applying 5281 D0 WFI workaround.\n"); disable_hlt(); } + + /* + * Register watchdog driver + */ + orion5x_wdt_init(); } /* diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c index 15f53235ee30..9c1ca41730ba 100644 --- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c +++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c @@ -94,7 +94,7 @@ static struct mv643xx_eth_platform_data rd88f5181l_fxo_eth_data = { .duplex = DUPLEX_FULL, }; -static struct dsa_platform_data rd88f5181l_fxo_switch_data = { +static struct dsa_chip_data rd88f5181l_fxo_switch_chip_data = { .port_names[0] = "lan2", .port_names[1] = "lan1", .port_names[2] = "wan", @@ -103,6 +103,11 @@ static struct dsa_platform_data rd88f5181l_fxo_switch_data = { .port_names[7] = "lan3", }; +static struct dsa_platform_data rd88f5181l_fxo_switch_plat_data = { + .nr_chips = 1, + .chip = &rd88f5181l_fxo_switch_chip_data, +}; + static void __init rd88f5181l_fxo_init(void) { /* @@ -117,7 +122,7 @@ static void __init rd88f5181l_fxo_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&rd88f5181l_fxo_eth_data); - orion5x_eth_switch_init(&rd88f5181l_fxo_switch_data, NO_IRQ); + orion5x_eth_switch_init(&rd88f5181l_fxo_switch_plat_data, NO_IRQ); orion5x_uart0_init(); orion5x_setup_dev_boot_win(RD88F5181L_FXO_NOR_BOOT_BASE, diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c index 8ad3934399d4..ee1399ff0ced 100644 --- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c @@ -95,7 +95,7 @@ static struct mv643xx_eth_platform_data rd88f5181l_ge_eth_data = { .duplex = DUPLEX_FULL, }; -static struct dsa_platform_data rd88f5181l_ge_switch_data = { +static struct dsa_chip_data rd88f5181l_ge_switch_chip_data = { .port_names[0] = "lan2", .port_names[1] = "lan1", .port_names[2] = "wan", @@ -104,6 +104,11 @@ static struct dsa_platform_data rd88f5181l_ge_switch_data = { .port_names[7] = "lan3", }; +static struct dsa_platform_data rd88f5181l_ge_switch_plat_data = { + .nr_chips = 1, + .chip = &rd88f5181l_ge_switch_chip_data, +}; + static struct i2c_board_info __initdata rd88f5181l_ge_i2c_rtc = { I2C_BOARD_INFO("ds1338", 0x68), }; @@ -122,7 +127,8 @@ static void __init rd88f5181l_ge_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&rd88f5181l_ge_eth_data); - orion5x_eth_switch_init(&rd88f5181l_ge_switch_data, gpio_to_irq(8)); + orion5x_eth_switch_init(&rd88f5181l_ge_switch_plat_data, + gpio_to_irq(8)); orion5x_i2c_init(); orion5x_uart0_init(); diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c index 262e25e4dace..7737cf9a8f50 100644 --- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c @@ -35,7 +35,7 @@ static struct mv643xx_eth_platform_data rd88f6183ap_ge_eth_data = { .duplex = DUPLEX_FULL, }; -static struct dsa_platform_data rd88f6183ap_ge_switch_data = { +static struct dsa_chip_data rd88f6183ap_ge_switch_chip_data = { .port_names[0] = "lan1", .port_names[1] = "lan2", .port_names[2] = "lan3", @@ -44,6 +44,11 @@ static struct dsa_platform_data rd88f6183ap_ge_switch_data = { .port_names[5] = "cpu", }; +static struct dsa_platform_data rd88f6183ap_ge_switch_plat_data = { + .nr_chips = 1, + .chip = &rd88f6183ap_ge_switch_chip_data, +}; + static struct mtd_partition rd88f6183ap_ge_partitions[] = { { .name = "kernel", @@ -89,7 +94,8 @@ static void __init rd88f6183ap_ge_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&rd88f6183ap_ge_eth_data); - orion5x_eth_switch_init(&rd88f6183ap_ge_switch_data, gpio_to_irq(3)); + orion5x_eth_switch_init(&rd88f6183ap_ge_switch_plat_data, + gpio_to_irq(3)); spi_register_board_info(rd88f6183ap_ge_spi_slave_info, ARRAY_SIZE(rd88f6183ap_ge_spi_slave_info)); orion5x_spi_init(); diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c index cc8f89200865..1b4ad9d5e2eb 100644 --- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c +++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c @@ -106,7 +106,7 @@ static struct mv643xx_eth_platform_data wrt350n_v2_eth_data = { .duplex = DUPLEX_FULL, }; -static struct dsa_platform_data wrt350n_v2_switch_data = { +static struct dsa_chip_data wrt350n_v2_switch_chip_data = { .port_names[0] = "lan2", .port_names[1] = "lan1", .port_names[2] = "wan", @@ -115,6 +115,11 @@ static struct dsa_platform_data wrt350n_v2_switch_data = { .port_names[7] = "lan4", }; +static struct dsa_platform_data wrt350n_v2_switch_plat_data = { + .nr_chips = 1, + .chip = &wrt350n_v2_switch_chip_data, +}; + static void __init wrt350n_v2_init(void) { /* @@ -129,7 +134,7 @@ static void __init wrt350n_v2_init(void) */ orion5x_ehci0_init(); orion5x_eth_init(&wrt350n_v2_eth_data); - orion5x_eth_switch_init(&wrt350n_v2_switch_data, NO_IRQ); + orion5x_eth_switch_init(&wrt350n_v2_switch_plat_data, NO_IRQ); orion5x_uart0_init(); orion5x_setup_dev_boot_win(WRT350N_V2_NOR_BOOT_BASE, diff --git a/arch/arm/plat-orion/include/plat/orion5x_wdt.h b/arch/arm/plat-orion/include/plat/orion5x_wdt.h new file mode 100644 index 000000000000..3c9cf6a305ef --- /dev/null +++ b/arch/arm/plat-orion/include/plat/orion5x_wdt.h @@ -0,0 +1,18 @@ +/* + * arch/arm/plat-orion/include/plat/orion5x_wdt.h + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __PLAT_ORION5X_WDT_H +#define __PLAT_ORION5X_WDT_H + +struct orion5x_wdt_platform_data { + u32 tclk; /* no <linux/clk.h> support yet */ +}; + + +#endif + diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 35863f260929..04c860619700 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/avr32/include/asm/swab.h b/arch/avr32/include/asm/swab.h index a14aa5b46d98..14cc737bbca6 100644 --- a/arch/avr32/include/asm/swab.h +++ b/arch/avr32/include/asm/swab.h @@ -4,7 +4,7 @@ #ifndef __ASM_AVR32_SWAB_H #define __ASM_AVR32_SWAB_H -#include <asm/types.h> +#include <linux/types.h> #include <linux/compiler.h> #define __SWAB_64_THRU_32__ diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c index a8e767d836aa..9f572229d318 100644 --- a/arch/avr32/kernel/irq.c +++ b/arch/avr32/kernel/irq.c @@ -58,7 +58,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%3d: ", i); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %8s", irq_desc[i].chip->name ? : "-"); seq_printf(p, " %s", action->name); for (action = action->next; action; action = action->next) diff --git a/arch/blackfin/include/asm/socket.h b/arch/blackfin/include/asm/socket.h index 2ca702e44d47..fac7fe9e1f8a 100644 --- a/arch/blackfin/include/asm/socket.h +++ b/arch/blackfin/include/asm/socket.h @@ -53,4 +53,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/blackfin/include/asm/swab.h b/arch/blackfin/include/asm/swab.h index 69a051b612bd..6403ad2932eb 100644 --- a/arch/blackfin/include/asm/swab.h +++ b/arch/blackfin/include/asm/swab.h @@ -1,7 +1,7 @@ #ifndef _BLACKFIN_SWAB_H #define _BLACKFIN_SWAB_H -#include <asm/types.h> +#include <linux/types.h> #include <linux/compiler.h> #if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__) diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index 7fd126564846..bd052a67032e 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -83,7 +83,7 @@ int show_interrupts(struct seq_file *p, void *v) goto skip; seq_printf(p, "%3d: ", i); for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, " %s", action->name); for (action = action->next; action; action = action->next) diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 9df0ca82f5de..d5cf74005408 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -56,6 +56,9 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 2dfac8c79090..7f642fcffbfc 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %14s", irq_desc[i].chip->typename); seq_printf(p, " %s", action->name); diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index 73abae767fdc..af3e824b91b3 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, void *v) if (action) { seq_printf(p, "%3d: ", i); for_each_present_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-"); seq_printf(p, " %s", action->name); for (action = action->next; diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index da2520dbf254..602518a70a1a 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/swab.h b/arch/h8300/include/asm/swab.h index c108f39b8bc4..39abbf52807d 100644 --- a/arch/h8300/include/asm/swab.h +++ b/arch/h8300/include/asm/swab.h @@ -1,7 +1,7 @@ #ifndef _H8300_SWAB_H #define _H8300_SWAB_H -#include <asm/types.h> +#include <linux/types.h> #if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__) # define __SWAB_64_THRU_32__ diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c index ef4f0047067d..74f8dd7b34d2 100644 --- a/arch/h8300/kernel/irq.c +++ b/arch/h8300/kernel/irq.c @@ -183,7 +183,7 @@ asmlinkage void do_IRQ(int irq) #if defined(CONFIG_PROC_FS) int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *) v, j; + int i = *(loff_t *) v; struct irqaction * action; unsigned long flags; @@ -196,7 +196,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!action) goto unlock; seq_printf(p, "%3d: ",i); - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs(i)); seq_printf(p, " %14s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); seq_printf(p, " %s", action->name); diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index a46f8395e9a5..af9405cd70e5 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -240,7 +240,7 @@ ia32_syscall_table: data8 sys_ni_syscall data8 sys_umask /* 60 */ data8 sys_chroot - data8 sys_ustat + data8 compat_sys_ustat data8 sys_dup2 data8 sys_getppid data8 sys_getpgrp /* 65 */ diff --git a/arch/ia64/include/asm/fpu.h b/arch/ia64/include/asm/fpu.h index 3859558ff0a4..0c26157cffa5 100644 --- a/arch/ia64/include/asm/fpu.h +++ b/arch/ia64/include/asm/fpu.h @@ -6,8 +6,6 @@ * David Mosberger-Tang <davidm@hpl.hp.com> */ -#include <asm/types.h> - /* floating point status register: */ #define FPSR_TRAP_VD (1 << 0) /* invalid op trap disabled */ #define FPSR_TRAP_DD (1 << 1) /* denormal trap disabled */ diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h index 0f5b55921758..c2c5fd8fcac4 100644 --- a/arch/ia64/include/asm/gcc_intrin.h +++ b/arch/ia64/include/asm/gcc_intrin.h @@ -6,6 +6,7 @@ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com> */ +#include <linux/types.h> #include <linux/compiler.h> /* define this macro to get some asm stmts included in 'c' files */ diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h index a3e44a5ed497..c47830e26cb7 100644 --- a/arch/ia64/include/asm/intrinsics.h +++ b/arch/ia64/include/asm/intrinsics.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ +#include <linux/types.h> /* include compiler specific intrinsics */ #include <asm/ia64regs.h> #ifdef __INTEL_COMPILER diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index bfa86b6af7cd..18a7e49abbc5 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -21,8 +21,7 @@ * */ -#include <asm/types.h> - +#include <linux/types.h> #include <linux/ioctl.h> /* Select x86 specific features in <linux/kvm.h> */ @@ -166,7 +165,40 @@ struct saved_vpd { unsigned long vcpuid[5]; unsigned long vpsr; unsigned long vpr; - unsigned long vcr[128]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; + unsigned long rsv2[7]; + unsigned long ipsr; + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; }; struct kvm_regs { @@ -214,4 +246,18 @@ struct kvm_sregs { struct kvm_fpu { }; +#define KVM_IA64_VCPU_STACK_SHIFT 16 +#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT) + +struct kvm_ia64_vcpu_stack { + unsigned char stack[KVM_IA64_VCPU_STACK_SIZE]; +}; + +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 348663661659..4542651e6acb 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -112,7 +112,11 @@ #define VCPU_STRUCT_SHIFT 16 #define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) -#define KVM_STK_OFFSET VCPU_STRUCT_SIZE +/* + * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h + */ +#define KVM_STK_SHIFT 16 +#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT) #define KVM_VM_STRUCT_SHIFT 19 #define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) @@ -153,10 +157,10 @@ struct kvm_vm_data { struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; }; -#define VCPU_BASE(n) KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, vcpu_data[n]) -#define VM_BASE KVM_VM_DATA_BASE + \ - offsetof(struct kvm_vm_data, kvm_vm_struct) +#define VCPU_BASE(n) (KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, vcpu_data[n])) +#define KVM_VM_BASE (KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, kvm_vm_struct)) #define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ offsetof(struct kvm_vm_data, kvm_mem_dirty_log) @@ -235,8 +239,6 @@ struct kvm_vm_data { struct kvm; struct kvm_vcpu; -struct kvm_guest_debug{ -}; struct kvm_mmio_req { uint64_t addr; /* physical address */ @@ -462,6 +464,8 @@ struct kvm_arch { unsigned long metaphysical_rr4; unsigned long vmm_init_rr; + int online_vcpus; + struct kvm_ioapic *vioapic; struct kvm_vm_stat stat; struct kvm_sal_data rdv_sal_data; diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h new file mode 100644 index 000000000000..592c1047a0c5 --- /dev/null +++ b/arch/ia64/include/asm/msidef.h @@ -0,0 +1,42 @@ +#ifndef _IA64_MSI_DEF_H +#define _IA64_MSI_DEF_H + +/* + * Shifts for APIC-based data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) +#define MSI_DATA_VECTOR_MASK 0xffffff00 + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +/* + * Shift/mask fields for APIC-based bus address + */ + +#define MSI_ADDR_DEST_ID_SHIFT 4 +#define MSI_ADDR_HEADER 0xfee00000 + +#define MSI_ADDR_DEST_ID_MASK 0xfff0000f +#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT) + +#define MSI_ADDR_DEST_MODE_SHIFT 2 +#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT) +#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + +#endif/* _IA64_MSI_DEF_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index d5ef0aa3e312..745421225ec6 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -63,4 +63,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/ia64/include/asm/swab.h b/arch/ia64/include/asm/swab.h index 6aa58b699eea..c89a8cb5d8a5 100644 --- a/arch/ia64/include/asm/swab.h +++ b/arch/ia64/include/asm/swab.h @@ -6,7 +6,7 @@ * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co. */ -#include <asm/types.h> +#include <linux/types.h> #include <asm/intrinsics.h> #include <linux/compiler.h> diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index a58f64ca9f0e..4f596613bffd 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -80,7 +80,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) { - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); } #endif seq_printf(p, " %14s", irq_desc[i].chip->name); diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 890339339035..368ee4e5266d 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -7,44 +7,7 @@ #include <linux/msi.h> #include <linux/dmar.h> #include <asm/smp.h> - -/* - * Shifts for APIC-based data - */ - -#define MSI_DATA_VECTOR_SHIFT 0 -#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) -#define MSI_DATA_VECTOR_MASK 0xffffff00 - -#define MSI_DATA_DELIVERY_SHIFT 8 -#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT) -#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT) - -#define MSI_DATA_LEVEL_SHIFT 14 -#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) -#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) - -#define MSI_DATA_TRIGGER_SHIFT 15 -#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) -#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) - -/* - * Shift/mask fields for APIC-based bus address - */ - -#define MSI_TARGET_CPU_SHIFT 4 -#define MSI_ADDR_HEADER 0xfee00000 - -#define MSI_ADDR_DESTID_MASK 0xfff0000f -#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT) - -#define MSI_ADDR_DESTMODE_SHIFT 2 -#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT) -#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT) - -#define MSI_ADDR_REDIRECTION_SHIFT 3 -#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) -#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) +#include <asm/msidef.h> static struct irq_chip ia64_msi_chip; @@ -65,8 +28,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, read_msi_msg(irq, &msg); addr = msg.address_lo; - addr &= MSI_ADDR_DESTID_MASK; - addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + addr &= MSI_ADDR_DEST_ID_MASK; + addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); msg.address_lo = addr; data = msg.data; @@ -98,9 +61,9 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) msg.address_hi = 0; msg.address_lo = MSI_ADDR_HEADER | - MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_DEST_MODE_PHYS | MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DESTID_CPU(dest_phys_id); + MSI_ADDR_DEST_ID_CPU(dest_phys_id); msg.data = MSI_DATA_TRIGGER_EDGE | @@ -183,8 +146,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DESTID_MASK; - msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); irq_desc[irq].affinity = *mask; @@ -215,9 +178,9 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) msg->address_hi = 0; msg->address_lo = MSI_ADDR_HEADER | - MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_DEST_MODE_PHYS | MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DESTID_CPU(dest); + MSI_ADDR_DEST_ID_CPU(dest); msg->data = MSI_DATA_TRIGGER_EDGE | diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 0e499757309b..5c0f408cfd71 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2196,7 +2196,7 @@ pfmfs_delete_dentry(struct dentry *dentry) return 1; } -static struct dentry_operations pfmfs_dentry_operations = { +static const struct dentry_operations pfmfs_dentry_operations = { .d_delete = pfmfs_delete_dentry, }; diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index f833a0b4188d..0a2d6b86075a 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -4,6 +4,10 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + default y + menuconfig VIRTUALIZATION bool "Virtualization" depends on HAVE_KVM || IA64 diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h index c6786e8b1bf4..c0785a728271 100644 --- a/arch/ia64/kvm/irq.h +++ b/arch/ia64/kvm/irq.h @@ -23,6 +23,8 @@ #ifndef __IRQ_H #define __IRQ_H +#include "lapic.h" + static inline int irqchip_in_kernel(struct kvm *kvm) { return 1; diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 28f982045f29..076b00d1dbff 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_IRQCHIP: case KVM_CAP_MP_STATE: - + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -314,7 +314,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, union ia64_lid lid; int i; - for (i = 0; i < KVM_MAX_VCPUS; i++) { + for (i = 0; i < kvm->arch.online_vcpus; i++) { if (kvm->vcpus[i]) { lid.val = VCPU_LID(kvm->vcpus[i]); if (lid.id == id && lid.eid == eid) @@ -388,7 +388,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) call_data.ptc_g_data = p->u.ptc_g_data; - for (i = 0; i < KVM_MAX_VCPUS; i++) { + for (i = 0; i < kvm->arch.online_vcpus; i++) { if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || vcpu == kvm->vcpus[i]) @@ -788,6 +788,8 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); kvm_init_vm(kvm); + kvm->arch.online_vcpus = 0; + return kvm; } @@ -919,7 +921,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vioapic); + goto out; + } break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -927,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; @@ -1149,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /*Initialize itc offset for vcpus*/ itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); - for (i = 0; i < KVM_MAX_VCPUS; i++) { + for (i = 0; i < kvm->arch.online_vcpus; i++) { v = (struct kvm_vcpu *)((char *)vcpu + sizeof(struct kvm_vcpu_data) * i); v->arch.itc_offset = itc_offset; @@ -1283,6 +1298,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, goto fail; } + kvm->arch.online_vcpus++; + return vcpu; fail: return ERR_PTR(r); @@ -1303,8 +1320,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -EINVAL; } @@ -1421,6 +1438,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu, + struct kvm_ia64_vcpu_stack *stack) +{ + memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack)); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu, + struct kvm_ia64_vcpu_stack *stack) +{ + memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu), + sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu)); + + vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data; + return 0; +} + void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { @@ -1430,9 +1464,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) long kvm_arch_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) + unsigned int ioctl, unsigned long arg) { - return -EINVAL; + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + struct kvm_ia64_vcpu_stack *stack = NULL; + long r; + + switch (ioctl) { + case KVM_IA64_VCPU_GET_STACK: { + struct kvm_ia64_vcpu_stack __user *user_stack; + void __user *first_p = argp; + + r = -EFAULT; + if (copy_from_user(&user_stack, first_p, sizeof(void *))) + goto out; + + if (!access_ok(VERIFY_WRITE, user_stack, + sizeof(struct kvm_ia64_vcpu_stack))) { + printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: " + "Illegal user destination address for stack\n"); + goto out; + } + stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); + if (!stack) { + r = -ENOMEM; + goto out; + } + + r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack); + if (r) + goto out; + + if (copy_to_user(user_stack, stack, + sizeof(struct kvm_ia64_vcpu_stack))) + goto out; + + break; + } + case KVM_IA64_VCPU_SET_STACK: { + struct kvm_ia64_vcpu_stack __user *user_stack; + void __user *first_p = argp; + + r = -EFAULT; + if (copy_from_user(&user_stack, first_p, sizeof(void *))) + goto out; + + if (!access_ok(VERIFY_READ, user_stack, + sizeof(struct kvm_ia64_vcpu_stack))) { + printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: " + "Illegal user address for stack\n"); + goto out; + } + stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); + if (!stack) { + r = -ENOMEM; + goto out; + } + if (copy_from_user(stack, user_stack, + sizeof(struct kvm_ia64_vcpu_stack))) + goto out; + + r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack); + break; + } + + default: + r = -EINVAL; + } + +out: + kfree(stack); + return r; } int kvm_arch_set_memory_region(struct kvm *kvm, @@ -1472,7 +1575,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm) } long kvm_arch_dev_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) + unsigned int ioctl, unsigned long arg) { return -EINVAL; } @@ -1737,7 +1840,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, struct kvm_vcpu *lvcpu = kvm->vcpus[0]; int i; - for (i = 1; i < KVM_MAX_VCPUS; i++) { + for (i = 1; i < kvm->arch.online_vcpus; i++) { if (!kvm->vcpus[i]) continue; if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c index cb7600bdff9d..a8ae52ed5635 100644 --- a/arch/ia64/kvm/kvm_fw.c +++ b/arch/ia64/kvm/kvm_fw.c @@ -227,6 +227,18 @@ static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) return result; } +static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result = {0, 0, 0, 0}; + long in0, in1, in2, in3; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + result.status = ia64_pal_register_info(in1, &result.v1, &result.v2); + + return result; +} + static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) { @@ -268,8 +280,12 @@ static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) { struct ia64_pal_retval result; + unsigned long in0, in1, in2, in3; - INIT_PAL_STATUS_UNIMPLEMENTED(result); + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + + result.status = ia64_pal_vm_info(in1, in2, + (pal_tc_info_u_t *)&result.v1, &result.v2); return result; } @@ -292,6 +308,108 @@ static void prepare_for_halt(struct kvm_vcpu *vcpu) vcpu->arch.timer_fired = 0; } +static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu) +{ + long status; + unsigned long in0, in1, in2, in3, r9; + unsigned long pm_buffer[16]; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + status = ia64_pal_perf_mon_info(pm_buffer, + (pal_perf_mon_info_u_t *) &r9); + if (status != 0) { + printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status); + } else { + if (in1) + memcpy((void *)in1, pm_buffer, sizeof(pm_buffer)); + else { + status = PAL_STATUS_EINVAL; + printk(KERN_WARNING"Invalid parameters " + "for PAL call:0x%lx!\n", in0); + } + } + return (struct ia64_pal_retval){status, r9, 0, 0}; +} + +static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu) +{ + unsigned long in0, in1, in2, in3; + long status; + unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32) + | (1UL << 61) | (1UL << 60); + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + if (in1) { + memcpy((void *)in1, &res, sizeof(res)); + status = 0; + } else{ + status = PAL_STATUS_EINVAL; + printk(KERN_WARNING"Invalid parameters " + "for PAL call:0x%lx!\n", in0); + } + + return (struct ia64_pal_retval){status, 0, 0, 0}; +} + +static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu) +{ + unsigned long r9; + long status; + + status = ia64_pal_mem_attrib(&r9); + + return (struct ia64_pal_retval){status, r9, 0, 0}; +} + +static void remote_pal_prefetch_visibility(void *v) +{ + s64 trans_type = (s64)v; + ia64_pal_prefetch_visibility(trans_type); +} + +static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result = {0, 0, 0, 0}; + unsigned long in0, in1, in2, in3; + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + result.status = ia64_pal_prefetch_visibility(in1); + if (result.status == 0) { + /* Must be performed on all remote processors + in the coherence domain. */ + smp_call_function(remote_pal_prefetch_visibility, + (void *)in1, 1); + /* Unnecessary on remote processor for other vcpus!*/ + result.status = 1; + } + return result; +} + +static void remote_pal_mc_drain(void *v) +{ + ia64_pal_mc_drain(); +} + +static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result = {0, 0, 0, 0}; + unsigned long in0, in1, in2, in3; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + + if (in1 == 0 && in2) { + char brand_info[128]; + result.status = ia64_pal_get_brand_info(brand_info); + if (result.status == PAL_STATUS_SUCCESS) + memcpy((void *)in2, brand_info, 128); + } else { + result.status = PAL_STATUS_REQUIRES_MEMORY; + printk(KERN_WARNING"Invalid parameters for " + "PAL call:0x%lx!\n", in0); + } + + return result; +} + int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -300,14 +418,22 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) int ret = 1; gr28 = kvm_get_pal_call_index(vcpu); - /*printk("pal_call index:%lx\n",gr28);*/ switch (gr28) { case PAL_CACHE_FLUSH: result = pal_cache_flush(vcpu); break; + case PAL_MEM_ATTRIB: + result = pal_mem_attrib(vcpu); + break; case PAL_CACHE_SUMMARY: result = pal_cache_summary(vcpu); break; + case PAL_PERF_MON_INFO: + result = pal_perf_mon_info(vcpu); + break; + case PAL_HALT_INFO: + result = pal_halt_info(vcpu); + break; case PAL_HALT_LIGHT: { INIT_PAL_STATUS_SUCCESS(result); @@ -317,6 +443,16 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) } break; + case PAL_PREFETCH_VISIBILITY: + result = pal_prefetch_visibility(vcpu); + break; + case PAL_MC_DRAIN: + result.status = ia64_pal_mc_drain(); + /* FIXME: All vcpus likely call PAL_MC_DRAIN. + That causes the congestion. */ + smp_call_function(remote_pal_mc_drain, NULL, 1); + break; + case PAL_FREQ_RATIOS: result = pal_freq_ratios(vcpu); break; @@ -346,6 +482,9 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) INIT_PAL_STATUS_SUCCESS(result); result.v1 = (1L << 32) | 1L; break; + case PAL_REGISTER_INFO: + result = pal_register_info(vcpu); + break; case PAL_VM_PAGE_SIZE: result.status = ia64_pal_vm_page_size(&result.v0, &result.v1); @@ -365,12 +504,18 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) result.status = ia64_pal_version( (pal_version_u_t *)&result.v0, (pal_version_u_t *)&result.v1); - break; case PAL_FIXED_ADDR: result.status = PAL_STATUS_SUCCESS; result.v0 = vcpu->vcpu_id; break; + case PAL_BRAND_INFO: + result = pal_get_brand_info(vcpu); + break; + case PAL_GET_PSTATE: + case PAL_CACHE_SHARED_INFO: + INIT_PAL_STATUS_UNIMPLEMENTED(result); + break; default: INIT_PAL_STATUS_UNIMPLEMENTED(result); printk(KERN_WARNING"kvm: Unsupported pal call," diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index 230eae482f32..b1dc80952d91 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -167,7 +167,6 @@ static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) return (rr1.val); } - /* * Set vIFA & vITIR & vIHA, when vPSR.ic =1 * Parameter: @@ -222,8 +221,6 @@ void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); } - - /* * Data Nested TLB Fault * @ Data Nested TLB Vector @@ -245,7 +242,6 @@ void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); } - /* * Data TLB Fault * @ Data TLB vector @@ -265,8 +261,6 @@ static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) /* If vPSR.ic, IFA, ITIR, IHA*/ set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); - - } /* @@ -279,7 +273,6 @@ void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) _vhpt_fault(vcpu, vadr); } - /* * VHPT Data Fault * @ VHPT Translation vector @@ -290,8 +283,6 @@ void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) _vhpt_fault(vcpu, vadr); } - - /* * Deal with: * General Exception vector @@ -301,7 +292,6 @@ void _general_exception(struct kvm_vcpu *vcpu) inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); } - /* * Illegal Operation Fault * @ General Exception Vector @@ -419,19 +409,16 @@ static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); } - void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) { __page_not_present(vcpu, vadr); } - void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) { __page_not_present(vcpu, vadr); } - /* Deal with * Data access rights vector */ @@ -563,22 +550,64 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim, inject_guest_interruption(vcpu, vector); } +static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu, + unsigned long arg) +{ + struct thash_data *data; + unsigned long gpa, poff; + + if (!is_physical_mode(vcpu)) { + /* Depends on caller to provide the DTR or DTC mapping.*/ + data = vtlb_lookup(vcpu, arg, D_TLB); + if (data) + gpa = data->page_flags & _PAGE_PPN_MASK; + else { + data = vhpt_lookup(arg); + if (!data) + return 0; + gpa = data->gpaddr & _PAGE_PPN_MASK; + } + + poff = arg & (PSIZE(data->ps) - 1); + arg = PAGEALIGN(gpa, data->ps) | poff; + } + arg = kvm_gpa_to_mpa(arg << 1 >> 1); + + return (unsigned long)__va(arg); +} + static void set_pal_call_data(struct kvm_vcpu *vcpu) { struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long gr28 = vcpu_get_gr(vcpu, 28); + unsigned long gr29 = vcpu_get_gr(vcpu, 29); + unsigned long gr30 = vcpu_get_gr(vcpu, 30); /*FIXME:For static and stacked convention, firmware * has put the parameters in gr28-gr31 before * break to vmm !!*/ - p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); - p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); - p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + switch (gr28) { + case PAL_PERF_MON_INFO: + case PAL_HALT_INFO: + p->u.pal_data.gr29 = kvm_trans_pal_call_args(vcpu, gr29); + p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + break; + case PAL_BRAND_INFO: + p->u.pal_data.gr29 = gr29;; + p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30); + break; + default: + p->u.pal_data.gr29 = gr29;; + p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + } + p->u.pal_data.gr28 = gr28; p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); + p->exit_reason = EXIT_REASON_PAL_CALL; } -static void set_pal_call_result(struct kvm_vcpu *vcpu) +static void get_pal_call_result(struct kvm_vcpu *vcpu) { struct exit_ctl_data *p = &vcpu->arch.exit_data; @@ -606,7 +635,7 @@ static void set_sal_call_data(struct kvm_vcpu *vcpu) p->exit_reason = EXIT_REASON_SAL_CALL; } -static void set_sal_call_result(struct kvm_vcpu *vcpu) +static void get_sal_call_result(struct kvm_vcpu *vcpu) { struct exit_ctl_data *p = &vcpu->arch.exit_data; @@ -629,13 +658,13 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, if (iim == DOMN_PAL_REQUEST) { set_pal_call_data(v); vmm_transition(v); - set_pal_call_result(v); + get_pal_call_result(v); vcpu_increment_iip(v); return; } else if (iim == DOMN_SAL_REQUEST) { set_sal_call_data(v); vmm_transition(v); - set_sal_call_result(v); + get_sal_call_result(v); vcpu_increment_iip(v); return; } @@ -703,7 +732,6 @@ void vhpi_detection(struct kvm_vcpu *vcpu) } } - void leave_hypervisor_tail(void) { struct kvm_vcpu *v = current_vcpu; @@ -737,7 +765,6 @@ void leave_hypervisor_tail(void) } } - static inline void handle_lds(struct kvm_pt_regs *regs) { regs->cr_ipsr |= IA64_PSR_ED; diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index ecd526b55323..d4d280505878 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -112,7 +112,6 @@ void switch_to_physical_rid(struct kvm_vcpu *vcpu) return; } - void switch_to_virtual_rid(struct kvm_vcpu *vcpu) { unsigned long psr; @@ -166,8 +165,6 @@ void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, return; } - - /* * In physical mode, insert tc/tr for region 0 and 4 uses * RID[0] and RID[4] which is for physical mode emulation. @@ -269,7 +266,6 @@ static inline unsigned long fph_index(struct kvm_pt_regs *regs, return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); } - /* * The inverse of the above: given bspstore and the number of * registers, calculate ar.bsp. @@ -811,12 +807,15 @@ static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) { struct kvm_vcpu *v; + struct kvm *kvm; int i; long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); unsigned long vitv = VCPU(vcpu, itv); + kvm = (struct kvm *)KVM_VM_BASE; + if (vcpu->vcpu_id == 0) { - for (i = 0; i < KVM_MAX_VCPUS; i++) { + for (i = 0; i < kvm->arch.online_vcpus; i++) { v = (struct kvm_vcpu *)((char *)vcpu + sizeof(struct kvm_vcpu_data) * i); VMX(v, itc_offset) = itc_offset; @@ -1039,8 +1038,6 @@ u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) return key; } - - void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long thash, vadr; @@ -1050,7 +1047,6 @@ void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); } - void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long tag, vadr; @@ -1131,7 +1127,6 @@ int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) return IA64_NO_FAULT; } - int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long r1, r3; @@ -1154,7 +1149,6 @@ void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); } - /************************************ * Insert/Purge translation register/cache ************************************/ @@ -1385,7 +1379,6 @@ void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) vcpu_set_itc(vcpu, r2); } - void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long r1; @@ -1393,8 +1386,9 @@ void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) r1 = vcpu_get_itc(vcpu); vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); } + /************************************************************************** - struct kvm_vcpu*protection key register access routines + struct kvm_vcpu protection key register access routines **************************************************************************/ unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) @@ -1407,20 +1401,6 @@ void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) ia64_set_pkr(reg, val); } - -unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa) -{ - union ia64_rr rr, rr1; - - rr.val = vcpu_get_rr(vcpu, ifa); - rr1.val = 0; - rr1.ps = rr.ps; - rr1.rid = rr.rid; - return (rr1.val); -} - - - /******************************** * Moves to privileged registers ********************************/ @@ -1464,8 +1444,6 @@ unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, return (IA64_NO_FAULT); } - - void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long r3, r2; @@ -1510,8 +1488,6 @@ void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) vcpu_set_pkr(vcpu, r3, r2); } - - void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long r3, r1; @@ -1557,7 +1533,6 @@ void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); } - unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) { /* FIXME: This could get called as a result of a rsvd-reg fault */ @@ -1609,7 +1584,6 @@ unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) return 0; } - unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) { unsigned long tgt = inst.M33.r1; @@ -1633,8 +1607,6 @@ unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) return 0; } - - void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) { @@ -1776,9 +1748,6 @@ void vcpu_bsw1(struct kvm_vcpu *vcpu) } } - - - void vcpu_rfi(struct kvm_vcpu *vcpu) { unsigned long ifs, psr; @@ -1796,7 +1765,6 @@ void vcpu_rfi(struct kvm_vcpu *vcpu) regs->cr_iip = VCPU(vcpu, iip); } - /* VPSR can't keep track of below bits of guest PSR This function gets guest PSR diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h index b2f12a562bdf..042af92ced83 100644 --- a/arch/ia64/kvm/vcpu.h +++ b/arch/ia64/kvm/vcpu.h @@ -703,7 +703,7 @@ extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); -extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, +extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 ifa, int type); extern void thash_purge_all(struct kvm_vcpu *v); extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, @@ -738,7 +738,7 @@ void kvm_init_vhpt(struct kvm_vcpu *v); void thash_init(struct thash_cb *hcb, u64 sz); void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); - +u64 kvm_gpa_to_mpa(u64 gpa); extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 6b6307a3bd55..38232b37668b 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -164,11 +164,11 @@ static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) unsigned long ps, gpaddr; ps = itir_ps(itir); + rr.val = ia64_get_rr(ifa); - gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | - (ifa & ((1UL << ps) - 1)); + gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | + (ifa & ((1UL << ps) - 1)); - rr.val = ia64_get_rr(ifa); head = (struct thash_data *)ia64_thash(ifa); head->etag = INVALID_TI_TAG; ia64_mf(); @@ -412,16 +412,14 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) /* * Purge overlap TCs and then insert the new entry to emulate itc ops. - * Notes: Only TC entry can purge and insert. - * 1 indicates this is MMIO + * Notes: Only TC entry can purge and insert. */ -int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, +void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 ifa, int type) { u64 ps; u64 phy_pte, io_mask, index; union ia64_rr vrr, mrr; - int ret = 0; ps = itir_ps(itir); vrr.val = vcpu_get_rr(v, ifa); @@ -441,25 +439,19 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, phy_pte &= ~_PAGE_MA_MASK; } - if (pte & VTLB_PTE_IO) - ret = 1; - vtlb_purge(v, ifa, ps); vhpt_purge(v, ifa, ps); - if (ps == mrr.ps) { - if (!(pte&VTLB_PTE_IO)) { - vhpt_insert(phy_pte, itir, ifa, pte); - } else { - vtlb_insert(v, pte, itir, ifa); - vcpu_quick_region_set(VMX(v, tc_regions), ifa); - } - } else if (ps > mrr.ps) { + if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) { vtlb_insert(v, pte, itir, ifa); vcpu_quick_region_set(VMX(v, tc_regions), ifa); - if (!(pte&VTLB_PTE_IO)) - vhpt_insert(phy_pte, itir, ifa, pte); - } else { + } + if (pte & VTLB_PTE_IO) + return; + + if (ps >= mrr.ps) + vhpt_insert(phy_pte, itir, ifa, pte); + else { u64 psr; phy_pte &= ~PAGE_FLAGS_RV_MASK; psr = ia64_clear_ic(); @@ -469,7 +461,6 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, if (!(pte&VTLB_PTE_IO)) mark_pages_dirty(v, pte, ps); - return ret; } /* @@ -509,7 +500,6 @@ void thash_purge_all(struct kvm_vcpu *v) local_flush_tlb_all(); } - /* * Lookup the hash table and its collision chain to find an entry * covering this address rid:va or the entry. @@ -517,7 +507,6 @@ void thash_purge_all(struct kvm_vcpu *v) * INPUT: * in: TLB format for both VHPT & TLB. */ - struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) { struct thash_data *cch; @@ -547,7 +536,6 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) return NULL; } - /* * Initialize internal control data before service. */ @@ -573,6 +561,10 @@ void thash_init(struct thash_cb *hcb, u64 sz) u64 kvm_get_mpt_entry(u64 gpfn) { u64 *base = (u64 *) KVM_P2M_BASE; + + if (gpfn >= (KVM_P2M_SIZE >> 3)) + panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn); + return *(base + gpfn); } @@ -589,7 +581,6 @@ u64 kvm_gpa_to_mpa(u64 gpa) return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); } - /* * Fetch guest bundle code. * INPUT: @@ -631,7 +622,6 @@ int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) return IA64_NO_FAULT; } - void kvm_init_vhpt(struct kvm_vcpu *v) { v->arch.vhpt.num = VHPT_NUM_ENTRIES; diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index 2aeae4670098..8dfd31e87c4c 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c @@ -49,7 +49,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %14s", irq_desc[i].chip->typename); seq_printf(p, " %s", action->name); diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 8133dbc44964..570d85c3f97f 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -117,3 +117,6 @@ endif archclean: rm -f vmlinux.gz vmlinux.bz2 + +install: + sh $(srctree)/arch/m68k/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)" diff --git a/arch/m68k/include/asm/irq_mm.h b/arch/m68k/include/asm/irq_mm.h index 226bfc0f21b1..0cab42cad79e 100644 --- a/arch/m68k/include/asm/irq_mm.h +++ b/arch/m68k/include/asm/irq_mm.h @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <linux/hardirq.h> +#include <linux/irqreturn.h> #include <linux/spinlock_types.h> /* @@ -80,7 +81,7 @@ struct pt_regs; * interrupt source (if it supports chaining). */ typedef struct irq_node { - int (*handler)(int, void *); + irqreturn_t (*handler)(int, void *); void *dev_id; struct irq_node *next; unsigned long flags; diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h index 05309f7e3d06..50db3591ca15 100644 --- a/arch/m68k/include/asm/macintosh.h +++ b/arch/m68k/include/asm/macintosh.h @@ -34,6 +34,7 @@ struct mac_model char scc_type; char ether_type; char nubus_type; + char floppy_type; }; #define MAC_ADB_NONE 0 @@ -71,6 +72,12 @@ struct mac_model #define MAC_NO_NUBUS 0 #define MAC_NUBUS 1 +#define MAC_FLOPPY_IWM 0 +#define MAC_FLOPPY_SWIM_ADDR1 1 +#define MAC_FLOPPY_SWIM_ADDR2 2 +#define MAC_FLOPPY_SWIM_IOP 3 +#define MAC_FLOPPY_AV 4 + /* * Gestalt numbers */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index dbc64e92c41a..ca87f938b03f 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/m68k/install.sh b/arch/m68k/install.sh new file mode 100644 index 000000000000..9c6bae6112e3 --- /dev/null +++ b/arch/m68k/install.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for m68k architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + +# User may have a custom install script + +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi + +# Default install - same as make zlilo + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +sync diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 98b6bcfb37bf..be017984a456 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -22,6 +22,7 @@ /* keyb */ #include <linux/init.h> #include <linux/vt_kern.h> +#include <linux/platform_device.h> #define BOOTINFO_COMPAT_1_0 #include <asm/setup.h> @@ -43,6 +44,10 @@ #include <asm/mac_oss.h> #include <asm/mac_psc.h> +/* platform device info */ + +#define SWIM_IO_SIZE 0x2000 /* SWIM IO resource size */ + /* Mac bootinfo struct */ struct mac_booter_data mac_bi_data; @@ -224,7 +229,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_IWM }, /* @@ -239,7 +245,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_IWM }, { .ident = MAC_MODEL_IIX, .name = "IIx", @@ -247,7 +254,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_IICX, .name = "IIcx", @@ -255,7 +263,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_SE30, .name = "SE/30", @@ -263,7 +272,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* @@ -280,7 +290,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_IIFX, .name = "IIfx", @@ -288,7 +299,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_IOP, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_IOP }, { .ident = MAC_MODEL_IISI, .name = "IIsi", @@ -296,7 +308,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_IIVI, .name = "IIvi", @@ -304,7 +317,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_IIVX, .name = "IIvx", @@ -312,7 +326,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* @@ -326,7 +341,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_CCL, .name = "Color Classic", @@ -334,7 +350,9 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS}, + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 + }, /* * Some Mac LC machines. Basically the same as the IIci, ADB like IIsi @@ -347,7 +365,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_LCII, .name = "LC II", @@ -355,7 +374,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_LCIII, .name = "LC III", @@ -363,7 +383,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* @@ -383,7 +404,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_Q605_ACC, .name = "Quadra 605", @@ -391,7 +413,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_Q610, .name = "Quadra 610", @@ -400,7 +423,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_Q630, .name = "Quadra 630", @@ -410,7 +434,8 @@ static struct mac_model mac_data_table[] = { .ide_type = MAC_IDE_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_Q650, .name = "Quadra 650", @@ -419,7 +444,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, /* The Q700 does have a NS Sonic */ { @@ -430,7 +456,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA2, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_Q800, .name = "Quadra 800", @@ -439,7 +466,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_Q840, .name = "Quadra 840AV", @@ -448,7 +476,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA3, .scc_type = MAC_SCC_PSC, .ether_type = MAC_ETHER_MACE, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_AV }, { .ident = MAC_MODEL_Q900, .name = "Quadra 900", @@ -457,7 +486,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA2, .scc_type = MAC_SCC_IOP, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_IOP }, { .ident = MAC_MODEL_Q950, .name = "Quadra 950", @@ -466,7 +496,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA2, .scc_type = MAC_SCC_IOP, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_IOP }, /* @@ -480,7 +511,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_P475, .name = "Performa 475", @@ -488,7 +520,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_P475F, .name = "Performa 475", @@ -496,7 +529,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_P520, .name = "Performa 520", @@ -504,7 +538,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_P550, .name = "Performa 550", @@ -512,7 +547,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* These have the comm slot, and therefore the possibility of SONIC ethernet */ { @@ -523,7 +559,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_II, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_P588, .name = "Performa 588", @@ -533,7 +570,8 @@ static struct mac_model mac_data_table[] = { .ide_type = MAC_IDE_QUADRA, .scc_type = MAC_SCC_II, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_TV, .name = "TV", @@ -541,7 +579,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_P600, .name = "Performa 600", @@ -549,7 +588,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* @@ -565,7 +605,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_C650, .name = "Centris 650", @@ -574,7 +615,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR1 }, { .ident = MAC_MODEL_C660, .name = "Centris 660AV", @@ -583,7 +625,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA3, .scc_type = MAC_SCC_PSC, .ether_type = MAC_ETHER_MACE, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_AV }, /* @@ -599,7 +642,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB145, .name = "PowerBook 145", @@ -607,7 +651,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB150, .name = "PowerBook 150", @@ -616,7 +661,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_OLD, .ide_type = MAC_IDE_PB, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB160, .name = "PowerBook 160", @@ -624,7 +670,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB165, .name = "PowerBook 165", @@ -632,7 +679,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB165C, .name = "PowerBook 165c", @@ -640,7 +688,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB170, .name = "PowerBook 170", @@ -648,7 +697,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB180, .name = "PowerBook 180", @@ -656,7 +706,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB180C, .name = "PowerBook 180c", @@ -664,7 +715,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB190, .name = "PowerBook 190", @@ -673,7 +725,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_OLD, .ide_type = MAC_IDE_BABOON, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB520, .name = "PowerBook 520", @@ -682,7 +735,8 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* @@ -702,7 +756,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB230, .name = "PowerBook Duo 230", @@ -710,7 +765,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB250, .name = "PowerBook Duo 250", @@ -718,7 +774,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB270C, .name = "PowerBook Duo 270c", @@ -726,7 +783,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB280, .name = "PowerBook Duo 280", @@ -734,7 +792,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, { .ident = MAC_MODEL_PB280C, .name = "PowerBook Duo 280c", @@ -742,7 +801,8 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IIci, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS + .nubus_type = MAC_NUBUS, + .floppy_type = MAC_FLOPPY_SWIM_ADDR2 }, /* @@ -815,3 +875,42 @@ static void mac_get_model(char *str) strcpy(str, "Macintosh "); strcat(str, macintosh_config->name); } + +static struct resource swim_resources[1]; + +static struct platform_device swim_device = { + .name = "swim", + .id = -1, + .num_resources = ARRAY_SIZE(swim_resources), + .resource = swim_resources, +}; + +static struct platform_device *mac_platform_devices[] __initdata = { + &swim_device +}; + +int __init mac_platform_init(void) +{ + u8 *swim_base; + + switch (macintosh_config->floppy_type) { + case MAC_FLOPPY_SWIM_ADDR1: + swim_base = (u8 *)(VIA1_BASE + 0x1E000); + break; + case MAC_FLOPPY_SWIM_ADDR2: + swim_base = (u8 *)(VIA1_BASE + 0x16000); + break; + default: + return 0; + } + + swim_resources[0].name = "swim-regs"; + swim_resources[0].start = (resource_size_t)swim_base; + swim_resources[0].end = (resource_size_t)(swim_base + SWIM_IO_SIZE); + swim_resources[0].flags = IORESOURCE_MEM; + + return platform_add_devices(mac_platform_devices, + ARRAY_SIZE(mac_platform_devices)); +} + +arch_initcall(mac_platform_init); diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 7d97ba54536e..11bce3cb6482 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -645,3 +645,12 @@ int via_irq_pending(int irq) } return 0; } + +void via1_set_head(int head) +{ + if (head == 0) + via1[vBufA] &= ~VIA1A_vHeadSel; + else + via1[vBufA] |= VIA1A_vHeadSel; +} +EXPORT_SYMBOL(via1_set_head); diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c index 06d887cdcbfb..855fc6a79d72 100644 --- a/arch/m68knommu/platform/520x/config.c +++ b/arch/m68knommu/platform/520x/config.c @@ -49,8 +49,39 @@ static struct platform_device m520x_uart = { .dev.platform_data = m520x_uart_platform, }; +static struct resource m520x_fec_resources[] = { + { + .start = MCF_MBAR + 0x30000, + .end = MCF_MBAR + 0x30000 + 0x7ff, + .flags = IORESOURCE_MEM, + }, + { + .start = 64 + 36, + .end = 64 + 36, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 40, + .end = 64 + 40, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 42, + .end = 64 + 42, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device m520x_fec = { + .name = "fec", + .id = 0, + .num_resources = ARRAY_SIZE(m520x_fec_resources), + .resource = m520x_fec_resources, +}; + static struct platform_device *m520x_devices[] __initdata = { &m520x_uart, + &m520x_fec, }; /***************************************************************************/ @@ -103,6 +134,30 @@ static void __init m520x_uarts_init(void) /***************************************************************************/ +static void __init m520x_fec_init(void) +{ + u32 imr; + u8 v; + + /* Unmask FEC interrupts at ColdFire interrupt controller */ + writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 36); + writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 40); + writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 42); + + imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + imr &= ~0x0001FFF0; + writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + + /* Set multi-function pins to ethernet mode */ + v = readb(MCF_IPSBAR + MCF_GPIO_PAR_FEC); + writeb(v | 0xf0, MCF_IPSBAR + MCF_GPIO_PAR_FEC); + + v = readb(MCF_IPSBAR + MCF_GPIO_PAR_FECI2C); + writeb(v | 0x0f, MCF_IPSBAR + MCF_GPIO_PAR_FECI2C); +} + +/***************************************************************************/ + /* * Program the vector to be an auto-vectored. */ @@ -118,6 +173,7 @@ void __init config_BSP(char *commandp, int size) { mach_reset = coldfire_reset; m520x_uarts_init(); + m520x_fec_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c index 13f02611ea23..74133f27b30c 100644 --- a/arch/m68knommu/platform/523x/config.c +++ b/arch/m68knommu/platform/523x/config.c @@ -50,8 +50,39 @@ static struct platform_device m523x_uart = { .dev.platform_data = m523x_uart_platform, }; +static struct resource m523x_fec_resources[] = { + { + .start = MCF_MBAR + 0x1000, + .end = MCF_MBAR + 0x1000 + 0x7ff, + .flags = IORESOURCE_MEM, + }, + { + .start = 64 + 23, + .end = 64 + 23, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 27, + .end = 64 + 27, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 29, + .end = 64 + 29, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device m523x_fec = { + .name = "fec", + .id = 0, + .num_resources = ARRAY_SIZE(m523x_fec_resources), + .resource = m523x_fec_resources, +}; + static struct platform_device *m523x_devices[] __initdata = { &m523x_uart, + &m523x_fec, }; /***************************************************************************/ @@ -83,6 +114,25 @@ static void __init m523x_uarts_init(void) /***************************************************************************/ +static void __init m523x_fec_init(void) +{ + u32 imr; + + /* Unmask FEC interrupts at ColdFire interrupt controller */ + writeb(0x28, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 23); + writeb(0x27, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 27); + writeb(0x26, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 29); + + imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + imr &= ~0xf; + writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); + imr &= ~0xff800001; + writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); +} + +/***************************************************************************/ + void mcf_disableall(void) { *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff; @@ -103,6 +153,7 @@ void __init config_BSP(char *commandp, int size) mcf_disableall(); mach_reset = coldfire_reset; m523x_uarts_init(); + m523x_fec_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c index 230bae691a7f..e049245f4092 100644 --- a/arch/m68knommu/platform/5272/config.c +++ b/arch/m68knommu/platform/5272/config.c @@ -55,8 +55,39 @@ static struct platform_device m5272_uart = { .dev.platform_data = m5272_uart_platform, }; +static struct resource m5272_fec_resources[] = { + { + .start = MCF_MBAR + 0x840, + .end = MCF_MBAR + 0x840 + 0x1cf, + .flags = IORESOURCE_MEM, + }, + { + .start = 86, + .end = 86, + .flags = IORESOURCE_IRQ, + }, + { + .start = 87, + .end = 87, + .flags = IORESOURCE_IRQ, + }, + { + .start = 88, + .end = 88, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device m5272_fec = { + .name = "fec", + .id = 0, + .num_resources = ARRAY_SIZE(m5272_fec_resources), + .resource = m5272_fec_resources, +}; + static struct platform_device *m5272_devices[] __initdata = { &m5272_uart, + &m5272_fec, }; /***************************************************************************/ @@ -91,6 +122,22 @@ static void __init m5272_uarts_init(void) /***************************************************************************/ +static void __init m5272_fec_init(void) +{ + u32 imr; + + /* Unmask FEC interrupts at ColdFire interrupt controller */ + imr = readl(MCF_MBAR + MCFSIM_ICR3); + imr = (imr & ~0x00000fff) | 0x00000ddd; + writel(imr, MCF_MBAR + MCFSIM_ICR3); + + imr = readl(MCF_MBAR + MCFSIM_ICR1); + imr = (imr & ~0x0f000000) | 0x0d000000; + writel(imr, MCF_MBAR + MCFSIM_ICR1); +} + +/***************************************************************************/ + void mcf_disableall(void) { volatile unsigned long *icrp; @@ -155,6 +202,7 @@ void __init config_BSP(char *commandp, int size) static int __init init_BSP(void) { m5272_uarts_init(); + m5272_fec_init(); platform_add_devices(m5272_devices, ARRAY_SIZE(m5272_devices)); return 0; } diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68knommu/platform/527x/config.c index 73cd1aef4a90..49343fb157b0 100644 --- a/arch/m68knommu/platform/527x/config.c +++ b/arch/m68knommu/platform/527x/config.c @@ -50,8 +50,73 @@ static struct platform_device m527x_uart = { .dev.platform_data = m527x_uart_platform, }; +static struct resource m527x_fec0_resources[] = { + { + .start = MCF_MBAR + 0x1000, + .end = MCF_MBAR + 0x1000 + 0x7ff, + .flags = IORESOURCE_MEM, + }, + { + .start = 64 + 23, + .end = 64 + 23, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 27, + .end = 64 + 27, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 29, + .end = 64 + 29, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct resource m527x_fec1_resources[] = { + { + .start = MCF_MBAR + 0x1800, + .end = MCF_MBAR + 0x1800 + 0x7ff, + .flags = IORESOURCE_MEM, + }, + { + .start = 128 + 23, + .end = 128 + 23, + .flags = IORESOURCE_IRQ, + }, + { + .start = 128 + 27, + .end = 128 + 27, + .flags = IORESOURCE_IRQ, + }, + { + .start = 128 + 29, + .end = 128 + 29, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device m527x_fec[] = { + { + .name = "fec", + .id = 0, + .num_resources = ARRAY_SIZE(m527x_fec0_resources), + .resource = m527x_fec0_resources, + }, + { + .name = "fec", + .id = 1, + .num_resources = ARRAY_SIZE(m527x_fec1_resources), + .resource = m527x_fec1_resources, + }, +}; + static struct platform_device *m527x_devices[] __initdata = { &m527x_uart, + &m527x_fec[0], +#ifdef CONFIG_FEC2 + &m527x_fec[1], +#endif }; /***************************************************************************/ @@ -97,6 +162,51 @@ static void __init m527x_uarts_init(void) /***************************************************************************/ +static void __init m527x_fec_irq_init(int nr) +{ + unsigned long base; + u32 imr; + + base = MCF_IPSBAR + (nr ? MCFICM_INTC1 : MCFICM_INTC0); + + writeb(0x28, base + MCFINTC_ICR0 + 23); + writeb(0x27, base + MCFINTC_ICR0 + 27); + writeb(0x26, base + MCFINTC_ICR0 + 29); + + imr = readl(base + MCFINTC_IMRH); + imr &= ~0xf; + writel(imr, base + MCFINTC_IMRH); + imr = readl(base + MCFINTC_IMRL); + imr &= ~0xff800001; + writel(imr, base + MCFINTC_IMRL); +} + +static void __init m527x_fec_init(void) +{ + u16 par; + u8 v; + + m527x_fec_irq_init(0); + + /* Set multi-function pins to ethernet mode for fec0 */ + par = readw(MCF_IPSBAR + 0x100082); + writew(par | 0xf00, MCF_IPSBAR + 0x100082); + v = readb(MCF_IPSBAR + 0x100078); + writeb(v | 0xc0, MCF_IPSBAR + 0x100078); + +#ifdef CONFIG_FEC2 + m527x_fec_irq_init(1); + + /* Set multi-function pins to ethernet mode for fec1 */ + par = readw(MCF_IPSBAR + 0x100082); + writew(par | 0xa0, MCF_IPSBAR + 0x100082); + v = readb(MCF_IPSBAR + 0x100079); + writeb(v | 0xc0, MCF_IPSBAR + 0x100079); +#endif +} + +/***************************************************************************/ + void mcf_disableall(void) { *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff; @@ -116,13 +226,14 @@ void __init config_BSP(char *commandp, int size) { mcf_disableall(); mach_reset = coldfire_reset; + m527x_uarts_init(); + m527x_fec_init(); } /***************************************************************************/ static int __init init_BSP(void) { - m527x_uarts_init(); platform_add_devices(m527x_devices, ARRAY_SIZE(m527x_devices)); return 0; } diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68knommu/platform/528x/config.c index 44baeb225dc7..bee526f4d1af 100644 --- a/arch/m68knommu/platform/528x/config.c +++ b/arch/m68knommu/platform/528x/config.c @@ -57,8 +57,40 @@ static struct platform_device m528x_uart = { .dev.platform_data = m528x_uart_platform, }; +static struct resource m528x_fec_resources[] = { + { + .start = MCF_MBAR + 0x1000, + .end = MCF_MBAR + 0x1000 + 0x7ff, + .flags = IORESOURCE_MEM, + }, + { + .start = 64 + 23, + .end = 64 + 23, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 27, + .end = 64 + 27, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 29, + .end = 64 + 29, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device m528x_fec = { + .name = "fec", + .id = 0, + .num_resources = ARRAY_SIZE(m528x_fec_resources), + .resource = m528x_fec_resources, +}; + + static struct platform_device *m528x_devices[] __initdata = { &m528x_uart, + &m528x_fec, }; /***************************************************************************/ @@ -99,6 +131,31 @@ static void __init m528x_uarts_init(void) /***************************************************************************/ +static void __init m528x_fec_init(void) +{ + u32 imr; + u16 v16; + + /* Unmask FEC interrupts at ColdFire interrupt controller */ + writeb(0x28, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 23); + writeb(0x27, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 27); + writeb(0x26, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 29); + + imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + imr &= ~0xf; + writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); + imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); + imr &= ~0xff800001; + writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); + + /* Set multi-function pins to ethernet mode for fec0 */ + v16 = readw(MCF_IPSBAR + 0x100056); + writew(v16 | 0xf00, MCF_IPSBAR + 0x100056); + writeb(0xc0, MCF_IPSBAR + 0x100058); +} + +/***************************************************************************/ + void mcf_disableall(void) { *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff; @@ -158,6 +215,7 @@ void __init config_BSP(char *commandp, int size) static int __init init_BSP(void) { m528x_uarts_init(); + m528x_fec_init(); platform_add_devices(m528x_devices, ARRAY_SIZE(m528x_devices)); return 0; } diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c index a347623d6ee6..591f2f801134 100644 --- a/arch/m68knommu/platform/532x/config.c +++ b/arch/m68knommu/platform/532x/config.c @@ -61,8 +61,38 @@ static struct platform_device m532x_uart = { .dev.platform_data = m532x_uart_platform, }; +static struct resource m532x_fec_resources[] = { + { + .start = 0xfc030000, + .end = 0xfc0307ff, + .flags = IORESOURCE_MEM, + }, + { + .start = 64 + 36, + .end = 64 + 36, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 40, + .end = 64 + 40, + .flags = IORESOURCE_IRQ, + }, + { + .start = 64 + 42, + .end = 64 + 42, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device m532x_fec = { + .name = "fec", + .id = 0, + .num_resources = ARRAY_SIZE(m532x_fec_resources), + .resource = m532x_fec_resources, +}; static struct platform_device *m532x_devices[] __initdata = { &m532x_uart, + &m532x_fec, }; /***************************************************************************/ @@ -93,6 +123,24 @@ static void __init m532x_uarts_init(void) for (line = 0; (line < nrlines); line++) m532x_uart_init_line(line, m532x_uart_platform[line].irq); } +/***************************************************************************/ + +static void __init m532x_fec_init(void) +{ + /* Unmask FEC interrupts at ColdFire interrupt controller */ + MCF_INTC0_ICR36 = 0x2; + MCF_INTC0_ICR40 = 0x2; + MCF_INTC0_ICR42 = 0x2; + + MCF_INTC0_IMRH &= ~(MCF_INTC_IMRH_INT_MASK36 | + MCF_INTC_IMRH_INT_MASK40 | MCF_INTC_IMRH_INT_MASK42); + + /* Set multi-function pins to ethernet mode for fec0 */ + MCF_GPIO_PAR_FECI2C |= (MCF_GPIO_PAR_FECI2C_PAR_MDC_EMDC | + MCF_GPIO_PAR_FECI2C_PAR_MDIO_EMDIO); + MCF_GPIO_PAR_FEC = (MCF_GPIO_PAR_FEC_PAR_FEC_7W_FEC | + MCF_GPIO_PAR_FEC_PAR_FEC_MII_FEC); +} /***************************************************************************/ @@ -150,6 +198,7 @@ void __init config_BSP(char *commandp, int size) static int __init init_BSP(void) { m532x_uarts_init(); + m532x_fec_init(); platform_add_devices(m532x_devices, ARRAY_SIZE(m532x_devices)); return 0; } diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h index 9ce0607d7a4e..9e89cf99d4e4 100644 --- a/arch/mips/include/asm/sigcontext.h +++ b/arch/mips/include/asm/sigcontext.h @@ -9,6 +9,7 @@ #ifndef _ASM_SIGCONTEXT_H #define _ASM_SIGCONTEXT_H +#include <linux/types.h> #include <asm/sgidefs.h> #if _MIPS_SIM == _MIPS_SIM_ABI32 diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index facc2d7a87ca..2abca1780169 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -75,6 +75,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/mips/include/asm/swab.h b/arch/mips/include/asm/swab.h index 88f1f7d555cb..99993c0d6c12 100644 --- a/arch/mips/include/asm/swab.h +++ b/arch/mips/include/asm/swab.h @@ -9,7 +9,7 @@ #define _ASM_SWAB_H #include <linux/compiler.h> -#include <asm/types.h> +#include <linux/types.h> #define __SWAB_64_THRU_32__ diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 4b4007b3083a..7b845ba9dff4 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -108,7 +108,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %14s", irq_desc[i].chip->name); seq_printf(p, " %s", action->name); diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 49aac6e17df9..2a472713de8e 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -355,40 +355,6 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality) return ret; } -/* ustat compatibility */ -struct ustat32 { - compat_daddr_t f_tfree; - compat_ino_t f_tinode; - char f_fname[6]; - char f_fpack[6]; -}; - -extern asmlinkage long sys_ustat(dev_t dev, struct ustat __user * ubuf); - -SYSCALL_DEFINE2(32_ustat, dev_t, dev, struct ustat32 __user *, ubuf32) -{ - int err; - struct ustat tmp; - struct ustat32 tmp32; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - err = sys_ustat(dev, (struct ustat __user *)&tmp); - set_fs(old_fs); - - if (err) - goto out; - - memset(&tmp32, 0, sizeof(struct ustat32)); - tmp32.f_tfree = tmp.f_tfree; - tmp32.f_tinode = tmp.f_tinode; - - err = copy_to_user(ubuf32, &tmp32, sizeof(struct ustat32)) ? -EFAULT : 0; - -out: - return err; -} - SYSCALL_DEFINE4(32_sendfile, long, out_fd, long, in_fd, compat_off_t __user *, offset, s32, count) { diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 7438e92f8a01..f61d6b0e5731 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -253,7 +253,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_utime /* 6130 */ PTR sys_mknod PTR sys_32_personality - PTR sys_32_ustat + PTR compat_sys_ustat PTR compat_sys_statfs PTR compat_sys_fstatfs /* 6135 */ PTR sys_sysfs diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index b0fef4ff9827..60997f1f69d4 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -265,7 +265,7 @@ sys_call_table: PTR sys_olduname PTR sys_umask /* 4060 */ PTR sys_chroot - PTR sys_32_ustat + PTR compat_sys_ustat PTR sys_dup2 PTR sys_getppid PTR sys_getpgrp /* 4065 */ diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c index 56c64ccc9c21..50fdb5c16e0c 100644 --- a/arch/mn10300/kernel/irq.c +++ b/arch/mn10300/kernel/irq.c @@ -221,7 +221,7 @@ int show_interrupts(struct seq_file *p, void *v) if (action) { seq_printf(p, "%3d: ", i); for_each_present_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); seq_printf(p, " %14s.%u", irq_desc[i].chip->name, (GxICR(i) & GxICR_LEVEL) >> GxICR_LEVEL_SHIFT); diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index c584b00c6074..430f1aeea0b8 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -336,10 +336,11 @@ #define NUM_PDC_RESULT 32 #if !defined(__ASSEMBLY__) -#ifdef __KERNEL__ #include <linux/types.h> +#ifdef __KERNEL__ + extern int pdc_type; /* Values for pdc_type */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index fba402c95ac2..885472bf7b78 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -54,6 +54,9 @@ #define SO_MARK 0x401f +#define SO_TIMESTAMPING 0x4020 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/parisc/include/asm/swab.h b/arch/parisc/include/asm/swab.h index 3ff16c5a3358..e78403b129ef 100644 --- a/arch/parisc/include/asm/swab.h +++ b/arch/parisc/include/asm/swab.h @@ -1,7 +1,7 @@ #ifndef _PARISC_SWAB_H #define _PARISC_SWAB_H -#include <asm/types.h> +#include <linux/types.h> #include <linux/compiler.h> #define __SWAB_64_THRU_32__ diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 29e70e16ede8..adfd617b4c18 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -185,7 +185,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%3d: ", i); #ifdef CONFIG_SMP for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #else seq_printf(p, "%10u ", kstat_irqs(i)); #endif diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 303d2b647e41..03b9a01bc16c 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -130,7 +130,7 @@ ENTRY_OURS(newuname) ENTRY_SAME(umask) /* 60 */ ENTRY_SAME(chroot) - ENTRY_SAME(ustat) + ENTRY_COMP(ustat) ENTRY_SAME(dup2) ENTRY_SAME(getppid) ENTRY_SAME(getpgrp) /* 65 */ diff --git a/arch/powerpc/include/asm/bootx.h b/arch/powerpc/include/asm/bootx.h index 57b82e3f89ce..60a3c9ef3017 100644 --- a/arch/powerpc/include/asm/bootx.h +++ b/arch/powerpc/include/asm/bootx.h @@ -9,7 +9,7 @@ #ifndef __ASM_BOOTX_H__ #define __ASM_BOOTX_H__ -#include <asm/types.h> +#include <linux/types.h> #ifdef macintosh #include <Types.h> diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index cd46f023ec6d..b5600ce6055e 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -7,7 +7,7 @@ #include <asm/string.h> #endif -#include <asm/types.h> +#include <linux/types.h> #include <asm/ptrace.h> #include <asm/cputable.h> #include <asm/auxvec.h> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f993e4198d5c..bb2de6aa5ce0 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -20,7 +20,7 @@ #ifndef __LINUX_KVM_POWERPC_H #define __LINUX_KVM_POWERPC_H -#include <asm/types.h> +#include <linux/types.h> struct kvm_regs { __u64 pc; @@ -52,4 +52,11 @@ struct kvm_fpu { __u64 fpr[32]; }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index f49031b632ca..d22d39942a92 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -28,6 +28,13 @@ * need to find some way of advertising it. */ #define KVM44x_GUEST_TLB_SIZE 64 +struct kvmppc_44x_tlbe { + u32 tid; /* Only the low 8 bits are used. */ + u32 word0; + u32 word1; + u32 word2; +}; + struct kvmppc_44x_shadow_ref { struct page *page; u16 gtlb_index; diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 2197764796d9..56bfae59837f 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -42,7 +42,12 @@ #define BOOKE_INTERRUPT_DTLB_MISS 13 #define BOOKE_INTERRUPT_ITLB_MISS 14 #define BOOKE_INTERRUPT_DEBUG 15 -#define BOOKE_MAX_INTERRUPT 15 + +/* E500 */ +#define BOOKE_INTERRUPT_SPE_UNAVAIL 32 +#define BOOKE_INTERRUPT_SPE_FP_DATA 33 +#define BOOKE_INTERRUPT_SPE_FP_ROUND 34 +#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h new file mode 100644 index 000000000000..9d497ce49726 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, <yu.liu@freescale.com> + * + * Description: + * This file is derived from arch/powerpc/include/asm/kvm_44x.h, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_KVM_E500_H__ +#define __ASM_KVM_E500_H__ + +#include <linux/kvm_host.h> + +#define BOOKE_INTERRUPT_SIZE 36 + +#define E500_PID_NUM 3 +#define E500_TLB_NUM 2 + +struct tlbe{ + u32 mas1; + u32 mas2; + u32 mas3; + u32 mas7; +}; + +struct kvmppc_vcpu_e500 { + /* Unmodified copy of the guest's TLB. */ + struct tlbe *guest_tlb[E500_TLB_NUM]; + /* TLB that's actually used when the guest is running. */ + struct tlbe *shadow_tlb[E500_TLB_NUM]; + /* Pages which are referenced in the shadow TLB. */ + struct page **shadow_pages[E500_TLB_NUM]; + + unsigned int guest_tlb_size[E500_TLB_NUM]; + unsigned int shadow_tlb_size[E500_TLB_NUM]; + unsigned int guest_tlb_nv[E500_TLB_NUM]; + + u32 host_pid[E500_PID_NUM]; + u32 pid[E500_PID_NUM]; + + u32 mas0; + u32 mas1; + u32 mas2; + u32 mas3; + u32 mas4; + u32 mas5; + u32 mas6; + u32 mas7; + u32 l1csr1; + u32 hid0; + u32 hid1; + + struct kvm_vcpu vcpu; +}; + +static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); +} + +#endif /* __ASM_KVM_E500_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c1e436fe7738..dfdf13c9fefd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -64,13 +64,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvmppc_44x_tlbe { - u32 tid; /* Only the low 8 bits are used. */ - u32 word0; - u32 word1; - u32 word2; -}; - enum kvm_exit_types { MMIO_EXITS, DCR_EXITS, @@ -118,11 +111,6 @@ struct kvm_arch { struct kvm_vcpu_arch { u32 host_stack; u32 host_pid; - u32 host_dbcr0; - u32 host_dbcr1; - u32 host_dbcr2; - u32 host_iac[4]; - u32 host_msr; u64 fpr[32]; ulong gpr[32]; @@ -157,7 +145,7 @@ struct kvm_vcpu_arch { u32 tbu; u32 tcr; u32 tsr; - u32 ivor[16]; + u32 ivor[64]; ulong ivpr; u32 pir; @@ -170,6 +158,7 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbcr0; u32 dbcr1; + u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING struct kvmppc_exit_timing timing_exit; @@ -200,10 +189,4 @@ struct kvm_vcpu_arch { unsigned long pending_exceptions; }; -struct kvm_guest_debug { - int enabled; - unsigned long bp[4]; - int singlestep; -}; - #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 36d2a50a8487..2c6ee349df5e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,13 +52,19 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +/* Core-specific hooks */ + extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - u64 asid, u32 flags, u32 max_bytes, unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); - -/* Core-specific hooks */ +extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); +extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); +extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, + gva_t eaddr); +extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); +extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); @@ -71,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); -extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); -extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); - extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h index 3f941c0f7e8e..4285b64a65e0 100644 --- a/arch/powerpc/include/asm/mmu-fsl-booke.h +++ b/arch/powerpc/include/asm/mmu-fsl-booke.h @@ -75,6 +75,8 @@ #ifndef __ASSEMBLY__ +extern unsigned int tlbcam_index; + typedef struct { unsigned int id; unsigned int active; diff --git a/arch/powerpc/include/asm/ps3fb.h b/arch/powerpc/include/asm/ps3fb.h index 3f121fe4010d..e7233a849680 100644 --- a/arch/powerpc/include/asm/ps3fb.h +++ b/arch/powerpc/include/asm/ps3fb.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PS3FB_H_ #define _ASM_POWERPC_PS3FB_H_ +#include <linux/types.h> #include <linux/ioctl.h> /* ioctl */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index f5a4e168e498..1e5cfad0e3f7 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -61,4 +61,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/include/asm/spu_info.h b/arch/powerpc/include/asm/spu_info.h index 3545efbf9891..1286c823f0d8 100644 --- a/arch/powerpc/include/asm/spu_info.h +++ b/arch/powerpc/include/asm/spu_info.h @@ -23,9 +23,10 @@ #ifndef _SPU_INFO_H #define _SPU_INFO_H +#include <linux/types.h> + #ifdef __KERNEL__ #include <asm/spu.h> -#include <linux/types.h> #else struct mfc_cq_sr { __u64 mfc_cq_data0_RW; diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h index ef824ae4b79c..c581e3ef73ed 100644 --- a/arch/powerpc/include/asm/swab.h +++ b/arch/powerpc/include/asm/swab.h @@ -8,7 +8,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/types.h> +#include <linux/types.h> #include <linux/compiler.h> #ifdef __GNUC__ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 72353f6070a4..fe166491e9dc 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -65,7 +65,7 @@ SYSCALL(ni_syscall) SYSX(sys_ni_syscall,sys_olduname, sys_olduname) COMPAT_SYS_SPU(umask) SYSCALL_SPU(chroot) -SYSCALL(ustat) +COMPAT_SYS(ustat) SYSCALL_SPU(dup2) SYSCALL_SPU(getppid) SYSCALL_SPU(getpgrp) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 19ee491e9e23..42fe4da4e8ae 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -49,7 +49,7 @@ #include <asm/iseries/alpaca.h> #endif #ifdef CONFIG_KVM -#include <asm/kvm_44x.h> +#include <linux/kvm_host.h> #endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -361,8 +361,6 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); - DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 23b8b5e36f98..17efb7118db1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -190,7 +190,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%3d: ", i); #ifdef CONFIG_SMP for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #else seq_printf(p, "%10u ", kstat_irqs(i)); #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index a66bec57265a..0cef809cec21 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -28,72 +28,6 @@ #include "44x_tlb.h" -/* Note: clearing MSR[DE] just means that the debug interrupt will not be - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt - * will be delivered as an "imprecise debug event" (which is indicated by - * DBSR[IDE]. - */ -static void kvm44x_disable_debug_interrupts(void) -{ - mtmsr(mfmsr() & ~MSR_DE); -} - -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ - kvm44x_disable_debug_interrupts(); - - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); - mtmsr(vcpu->arch.host_msr); -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - u32 dbcr0 = 0; - - vcpu->arch.host_msr = mfmsr(); - kvm44x_disable_debug_interrupts(); - - /* Save host debug register state. */ - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); - - /* set registers up for guest */ - - if (dbg->bp[0]) { - mtspr(SPRN_IAC1, dbg->bp[0]); - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; - } - if (dbg->bp[1]) { - mtspr(SPRN_IAC2, dbg->bp[1]); - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; - } - if (dbg->bp[2]) { - mtspr(SPRN_IAC3, dbg->bp[2]); - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; - } - if (dbg->bp[3]) { - mtspr(SPRN_IAC4, dbg->bp[3]); - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; - } - - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBCR1, 0); - mtspr(SPRN_DBCR2, 0); -} - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvmppc_44x_tlb_load(vcpu); @@ -149,8 +83,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; int index; gva_t eaddr; u8 pid; @@ -166,9 +98,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } - gtlbe = &vcpu_44x->guest_tlb[index]; - - tr->physical_address = tlb_xlate(gtlbe, eaddr); + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); /* XXX what does "writeable" and "usermode" even mean? */ tr->valid = 1; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 82489a743a6f..61af58fcecee 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -27,25 +27,12 @@ #include "booke.h" #include "44x_tlb.h" -#define OP_RFI 19 - -#define XOP_RFI 50 -#define XOP_MFMSR 83 -#define XOP_WRTEE 131 -#define XOP_MTMSR 146 -#define XOP_WRTEEI 163 #define XOP_MFDCR 323 #define XOP_MTDCR 451 #define XOP_TLBSX 914 #define XOP_ICCCI 966 #define XOP_TLBWE 978 -static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); -} - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int ws; switch (get_op(inst)) { - case OP_RFI: - switch (get_xop(inst)) { - case XOP_RFI: - kvmppc_emul_rfi(vcpu); - kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); - *advance = 0; - break; - - default: - emulated = EMULATE_FAIL; - break; - } - break; - case 31: switch (get_xop(inst)) { - case XOP_MFMSR: - rt = get_rt(inst); - vcpu->arch.gpr[rt] = vcpu->arch.msr; - kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); - break; - - case XOP_MTMSR: - rs = get_rs(inst); - kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); - break; - - case XOP_WRTEE: - rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (vcpu->arch.gpr[rs] & MSR_EE); - kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); - break; - - case XOP_WRTEEI: - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (inst & MSR_EE); - kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); - break; - case XOP_MFDCR: dcrn = get_dcrn(inst); rt = get_rt(inst); @@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_FAIL; } + if (emulated == EMULATE_FAIL) + emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + return emulated; } int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) { + int emulated = EMULATE_DONE; + switch (sprn) { - case SPRN_MMUCR: - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; case SPRN_PID: kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; case SPRN_CCR0: vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; case SPRN_CCR1: vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; - case SPRN_DEAR: - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; - case SPRN_ESR: - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR0: - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR1: - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; - case SPRN_TSR: - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; - case SPRN_TCR: - vcpu->arch.tcr = vcpu->arch.gpr[rs]; - kvmppc_emulate_dec(vcpu); - break; - - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ - case SPRN_SPRG4: - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG5: - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG6: - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG7: - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; - - case SPRN_IVPR: - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR0: - vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR1: - vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR2: - vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR3: - vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR4: - vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR5: - vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR6: - vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR7: - vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR8: - vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR9: - vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR10: - vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR11: - vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR12: - vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR13: - vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR14: - vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR15: - vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; - break; - default: - return EMULATE_FAIL; + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); - return EMULATE_DONE; + return emulated; } int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) { + int emulated = EMULATE_DONE; + switch (sprn) { - /* 440 */ + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; case SPRN_MMUCR: vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; case SPRN_CCR0: vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; case SPRN_CCR1: vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; - - /* Book E */ - case SPRN_PID: - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; - case SPRN_IVPR: - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; - case SPRN_DEAR: - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; - case SPRN_ESR: - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; - case SPRN_DBCR0: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; - case SPRN_DBCR1: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; - - case SPRN_IVOR0: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; - break; - case SPRN_IVOR1: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; - break; - case SPRN_IVOR2: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; - break; - case SPRN_IVOR3: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; - break; - case SPRN_IVOR4: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; - break; - case SPRN_IVOR5: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; - break; - case SPRN_IVOR6: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; - break; - case SPRN_IVOR7: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; - break; - case SPRN_IVOR8: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; - break; - case SPRN_IVOR9: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; - break; - case SPRN_IVOR10: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; - break; - case SPRN_IVOR11: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; - break; - case SPRN_IVOR12: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; - break; - case SPRN_IVOR13: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; - break; - case SPRN_IVOR14: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; - break; - case SPRN_IVOR15: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; - break; - default: - return EMULATE_FAIL; + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); - return EMULATE_DONE; + return emulated; } diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 9a34b8edb9e2..4a16f472cc18 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -208,20 +208,38 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, return -1; } -int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, + gva_t eaddr) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_DS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ +} + static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, unsigned int stlb_index) { @@ -248,7 +266,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); } -void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; @@ -269,15 +287,19 @@ void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) * Caller must ensure that the specified guest TLB entry is safe to insert into * the shadow TLB. */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, - u32 flags, u32 max_bytes, unsigned int gtlb_index) +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + unsigned int gtlb_index) { struct kvmppc_44x_tlbe stlbe; struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; struct kvmppc_44x_shadow_ref *ref; struct page *new_page; hpa_t hpaddr; gfn_t gfn; + u32 asid = gtlbe->tid; + u32 flags = gtlbe->word2; + u32 max_bytes = get_tlb_bytes(gtlbe); unsigned int victim; /* Select TLB entry to clobber. Indirectly guard against races with the TLB @@ -448,10 +470,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) } if (tlbe_is_host_safe(vcpu, tlbe)) { - u64 asid; gva_t eaddr; gpa_t gpaddr; - u32 flags; u32 bytes; eaddr = get_tlb_eaddr(tlbe); @@ -462,10 +482,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) eaddr &= ~(bytes - 1); gpaddr &= ~(bytes - 1); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - flags = tlbe->word2 & 0xffff; - - kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); } KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h index 772191f29e62..a9ff80e51526 100644 --- a/arch/powerpc/kvm/44x_tlb.h +++ b/arch/powerpc/kvm/44x_tlb.h @@ -25,8 +25,6 @@ extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as); -extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); -extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc); @@ -85,11 +83,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) return (vcpu->arch.mmucr >> 16) & 0x1; } -static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) -{ - unsigned int pgmask = get_tlb_bytes(tlbe) - 1; - - return get_tlb_raddr(tlbe) | (eaddr & pgmask); -} - #endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 6dbdc4817d80..5a152a52796f 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -2,6 +2,9 @@ # KVM configuration # +config HAVE_KVM_IRQCHIP + bool + menuconfig VIRTUALIZATION bool "Virtualization" ---help--- @@ -43,6 +46,19 @@ config KVM_EXIT_TIMING If unsure, say N. +config KVM_E500 + bool "KVM support for PowerPC E500 processors" + depends on EXPERIMENTAL && E500 + select KVM + ---help--- + Support running unmodified E500 guest kernels in virtual machines on + E500 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + config KVM_TRACE bool "KVM trace support" depends on KVM && MARKERS && SYSFS diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index df7ba59e6d53..4b2df66c79d8 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -16,8 +16,18 @@ AFLAGS_booke_interrupts.o := -I$(obj) kvm-440-objs := \ booke.o \ + booke_emulate.o \ booke_interrupts.o \ 44x.o \ 44x_tlb.o \ 44x_emulate.o obj-$(CONFIG_KVM_440) += kvm-440.o + +kvm-e500-objs := \ + booke.o \ + booke_emulate.o \ + booke_interrupts.o \ + e500.o \ + e500_tlb.o \ + e500_emulate.o +obj-$(CONFIG_KVM_E500) += kvm-e500.o diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 35485dd6927e..642e4204cf25 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -30,10 +30,8 @@ #include <asm/kvm_ppc.h> #include "timing.h" #include <asm/cacheflush.h> -#include <asm/kvm_44x.h> #include "booke.h" -#include "44x_tlb.h" unsigned long kvmppc_booke_handlers; @@ -120,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_DATA_STORAGE: case BOOKE_IRQPRIO_INST_STORAGE: case BOOKE_IRQPRIO_FP_UNAVAIL: + case BOOKE_IRQPRIO_SPE_UNAVAIL: + case BOOKE_IRQPRIO_SPE_FP_DATA: + case BOOKE_IRQPRIO_SPE_FP_ROUND: case BOOKE_IRQPRIO_AP_UNAVAIL: case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; @@ -165,7 +166,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) unsigned int priority; priority = __ffs(*pending); - while (priority <= BOOKE_MAX_INTERRUPT) { + while (priority <= BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; @@ -263,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_SPE_UNAVAIL: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SPE_FP_DATA: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SPE_FP_ROUND: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; @@ -284,29 +300,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; - /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; int gtlb_index; + gpa_t gpaddr; gfn_t gfn; /* Check the guest TLB. */ - gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_mmu_dtlb_miss(vcpu); kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); r = RESUME_GUEST; break; } - gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); - gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { /* The guest TLB had a mapping, but the shadow TLB @@ -315,13 +329,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, - gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ + vcpu->arch.paddr_accessed = gpaddr; r = kvmppc_emulate_mmio(run, vcpu); kvmppc_account_exit(vcpu, MMIO_EXITS); } @@ -329,10 +343,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.pc; gpa_t gpaddr; gfn_t gfn; @@ -341,18 +352,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; /* Check the guest TLB. */ - gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); + kvmppc_mmu_itlb_miss(vcpu); kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); break; } kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); - gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - gpaddr = tlb_xlate(gtlbe, eaddr); + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { @@ -362,8 +373,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, - gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); } else { /* Guest mapped and leaped at non-RAM! */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index cf7c94ca24bf..d59bcca1f9d8 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -22,6 +22,7 @@ #include <linux/types.h> #include <linux/kvm_host.h> +#include <asm/kvm_ppc.h> #include "timing.h" /* interrupt priortity ordering */ @@ -30,17 +31,24 @@ #define BOOKE_IRQPRIO_ALIGNMENT 2 #define BOOKE_IRQPRIO_PROGRAM 3 #define BOOKE_IRQPRIO_FP_UNAVAIL 4 -#define BOOKE_IRQPRIO_SYSCALL 5 -#define BOOKE_IRQPRIO_AP_UNAVAIL 6 -#define BOOKE_IRQPRIO_DTLB_MISS 7 -#define BOOKE_IRQPRIO_ITLB_MISS 8 -#define BOOKE_IRQPRIO_MACHINE_CHECK 9 -#define BOOKE_IRQPRIO_DEBUG 10 -#define BOOKE_IRQPRIO_CRITICAL 11 -#define BOOKE_IRQPRIO_WATCHDOG 12 -#define BOOKE_IRQPRIO_EXTERNAL 13 -#define BOOKE_IRQPRIO_FIT 14 -#define BOOKE_IRQPRIO_DECREMENTER 15 +#define BOOKE_IRQPRIO_SPE_UNAVAIL 5 +#define BOOKE_IRQPRIO_SPE_FP_DATA 6 +#define BOOKE_IRQPRIO_SPE_FP_ROUND 7 +#define BOOKE_IRQPRIO_SYSCALL 8 +#define BOOKE_IRQPRIO_AP_UNAVAIL 9 +#define BOOKE_IRQPRIO_DTLB_MISS 10 +#define BOOKE_IRQPRIO_ITLB_MISS 11 +#define BOOKE_IRQPRIO_MACHINE_CHECK 12 +#define BOOKE_IRQPRIO_DEBUG 13 +#define BOOKE_IRQPRIO_CRITICAL 14 +#define BOOKE_IRQPRIO_WATCHDOG 15 +#define BOOKE_IRQPRIO_EXTERNAL 16 +#define BOOKE_IRQPRIO_FIT 17 +#define BOOKE_IRQPRIO_DECREMENTER 18 +#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 +#define BOOKE_IRQPRIO_MAX 19 + +extern unsigned long kvmppc_booke_handlers; /* Helper function for "full" MSR writes. No need to call this if only EE is * changing. */ @@ -57,4 +65,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) }; } +int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); + #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c new file mode 100644 index 000000000000..aebc65e93f4b --- /dev/null +++ b/arch/powerpc/kvm/booke_emulate.c @@ -0,0 +1,266 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <asm/disassemble.h> + +#include "booke.h" + +#define OP_19_XOP_RFI 50 + +#define OP_31_XOP_MFMSR 83 +#define OP_31_XOP_WRTEE 131 +#define OP_31_XOP_MTMSR 146 +#define OP_31_XOP_WRTEEI 163 + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int rs; + int rt; + + switch (get_op(inst)) { + case 19: + switch (get_xop(inst)) { + case OP_19_XOP_RFI: + kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case OP_31_XOP_MFMSR: + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); + break; + + case OP_31_XOP_MTMSR: + rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case OP_31_XOP_WRTEE: + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case OP_31_XOP_WRTEEI: + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DBSR: + vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break; + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR0: + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR1: + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR2: + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR3: + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR4: + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR5: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR6: + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR7: + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR8: + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR9: + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR10: + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR11: + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR12: + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR13: + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR14: + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR15: + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + case SPRN_DBSR: + vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break; + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 084ebcd7dd83..d0c6f841bbd1 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS KVM_HANDLER BOOKE_INTERRUPT_DEBUG +KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND _GLOBAL(kvmppc_handler_len) .long kvmppc_handler_1 - kvmppc_handler_0 @@ -347,7 +350,9 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 +#ifdef CONFIG_44x iccci 0, 0 /* XXX hack */ +#endif /* Load some guest volatiles. */ lwz r0, VCPU_GPR(r0)(r4) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c new file mode 100644 index 000000000000..d8067fd81cdd --- /dev/null +++ b/arch/powerpc/kvm/e500.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, <yu.liu@freescale.com> + * + * Description: + * This file is derived from arch/powerpc/kvm/44x.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kvm_host.h> +#include <linux/err.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_e500.h> +#include <asm/kvm_ppc.h> + +#include "booke.h" +#include "e500_tlb.h" + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_e500_tlb_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_e500_tlb_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_e500_tlb_setup(vcpu_e500); + + /* Use the same core vertion as host's */ + vcpu->arch.pvr = mfspr(SPRN_PVR); + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); + if (index < 0) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_e500 *vcpu_e500; + struct kvm_vcpu *vcpu; + int err; + + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_e500) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_e500->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = kvmppc_e500_tlb_init(vcpu_e500); + if (err) + goto uninit_vcpu; + + return vcpu; + +uninit_vcpu: + kvm_vcpu_uninit(vcpu); +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_e500_tlb_uninit(vcpu_e500); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +} + +static int kvmppc_e500_init(void) +{ + int r, i; + unsigned long ivor[3]; + unsigned long max_ivor = 0; + + r = kvmppc_booke_init(); + if (r) + return r; + + /* copy extra E500 exception handlers */ + ivor[0] = mfspr(SPRN_IVOR32); + ivor[1] = mfspr(SPRN_IVOR33); + ivor[2] = mfspr(SPRN_IVOR34); + for (i = 0; i < 3; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + (i + 16) * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); +} + +static void kvmppc_e500_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_e500_init); +module_exit(kvmppc_e500_exit); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c new file mode 100644 index 000000000000..3f760414b9f8 --- /dev/null +++ b/arch/powerpc/kvm/e500_emulate.c @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, <yu.liu@freescale.com> + * + * Description: + * This file is derived from arch/powerpc/kvm/44x_emulate.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/kvm_e500.h> + +#include "booke.h" +#include "e500_tlb.h" + +#define XOP_TLBIVAX 786 +#define XOP_TLBSX 914 +#define XOP_TLBRE 946 +#define XOP_TLBWE 978 + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int ra; + int rb; + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + + case XOP_TLBRE: + emulated = kvmppc_e500_emul_tlbre(vcpu); + break; + + case XOP_TLBWE: + emulated = kvmppc_e500_emul_tlbwe(vcpu); + break; + + case XOP_TLBSX: + rb = get_rb(inst); + emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); + break; + + case XOP_TLBIVAX: + ra = get_ra(inst); + rb = get_rb(inst); + emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + if (emulated == EMULATE_FAIL) + emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_PID: + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = vcpu->arch.gpr[rs]; + break; + case SPRN_PID1: + vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break; + case SPRN_PID2: + vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break; + case SPRN_MAS0: + vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS1: + vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS2: + vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS3: + vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS4: + vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS6: + vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS7: + vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break; + case SPRN_L1CSR1: + vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break; + case SPRN_HID0: + vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break; + case SPRN_HID1: + vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; + + case SPRN_MMUCSR0: + emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, + vcpu->arch.gpr[rs]); + break; + + /* extra exceptions */ + case SPRN_IVOR32: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR33: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR34: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR35: + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs]; + break; + + default: + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + } + + return emulated; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break; + case SPRN_PID1: + vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break; + case SPRN_PID2: + vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break; + case SPRN_MAS0: + vcpu->arch.gpr[rt] = vcpu_e500->mas0; break; + case SPRN_MAS1: + vcpu->arch.gpr[rt] = vcpu_e500->mas1; break; + case SPRN_MAS2: + vcpu->arch.gpr[rt] = vcpu_e500->mas2; break; + case SPRN_MAS3: + vcpu->arch.gpr[rt] = vcpu_e500->mas3; break; + case SPRN_MAS4: + vcpu->arch.gpr[rt] = vcpu_e500->mas4; break; + case SPRN_MAS6: + vcpu->arch.gpr[rt] = vcpu_e500->mas6; break; + case SPRN_MAS7: + vcpu->arch.gpr[rt] = vcpu_e500->mas7; break; + + case SPRN_TLB0CFG: + vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG); + vcpu->arch.gpr[rt] &= ~0xfffUL; + vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0]; + break; + + case SPRN_TLB1CFG: + vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG); + vcpu->arch.gpr[rt] &= ~0xfffUL; + vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1]; + break; + + case SPRN_L1CSR1: + vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break; + case SPRN_HID0: + vcpu->arch.gpr[rt] = vcpu_e500->hid0; break; + case SPRN_HID1: + vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; + + case SPRN_MMUCSR0: + vcpu->arch.gpr[rt] = 0; break; + + /* extra exceptions */ + case SPRN_IVOR32: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + break; + case SPRN_IVOR33: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + break; + case SPRN_IVOR34: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + break; + case SPRN_IVOR35: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + break; + default: + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + } + + return emulated; +} + diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c new file mode 100644 index 000000000000..0e773fc2d5e4 --- /dev/null +++ b/arch/powerpc/kvm/e500_tlb.c @@ -0,0 +1,757 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_e500.h> + +#include "../mm/mmu_decl.h" +#include "e500_tlb.h" + +#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) + +static unsigned int tlb1_entry_num; + +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe *tlbe; + int i, tlbsel; + + printk("| %8s | %8s | %8s | %8s | %8s |\n", + "nr", "mas1", "mas2", "mas3", "mas7"); + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + printk("Guest TLB%d:\n", tlbsel); + for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { + tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; + if (tlbe->mas1 & MAS1_VALID) + printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", + tlbsel, i, tlbe->mas1, tlbe->mas2, + tlbe->mas3, tlbe->mas7); + } + } + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + printk("Shadow TLB%d:\n", tlbsel); + for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) { + tlbe = &vcpu_e500->shadow_tlb[tlbsel][i]; + if (tlbe->mas1 & MAS1_VALID) + printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n", + tlbsel, i, tlbe->mas1, tlbe->mas2, + tlbe->mas3, tlbe->mas7); + } + } +} + +static inline unsigned int tlb0_get_next_victim( + struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned int victim; + + victim = vcpu_e500->guest_tlb_nv[0]++; + if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) + vcpu_e500->guest_tlb_nv[0] = 0; + + return victim; +} + +static inline unsigned int tlb1_max_shadow_size(void) +{ + return tlb1_entry_num - tlbcam_index; +} + +static inline int tlbe_is_writable(struct tlbe *tlbe) +{ + return tlbe->mas3 & (MAS3_SW|MAS3_UW); +} + +static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +{ + /* Mask off reserved bits. */ + mas3 &= MAS3_ATTRIB_MASK; + + if (!usermode) { + /* Guest is in supervisor mode, + * so we need to translate guest + * supervisor permissions into user permissions. */ + mas3 &= ~E500_TLB_USER_PERM_MASK; + mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; + } + + return mas3 | E500_TLB_SUPER_PERM_MASK; +} + +static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) +{ +#ifdef CONFIG_SMP + return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; +#else + return mas2 & MAS2_ATTRIB_MASK; +#endif +} + +/* + * writing shadow tlb entry to host TLB + */ +static inline void __write_host_tlbe(struct tlbe *stlbe) +{ + mtspr(SPRN_MAS1, stlbe->mas1); + mtspr(SPRN_MAS2, stlbe->mas2); + mtspr(SPRN_MAS3, stlbe->mas3); + mtspr(SPRN_MAS7, stlbe->mas7); + __asm__ __volatile__ ("tlbwe\n" : : ); +} + +static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + + local_irq_disable(); + if (tlbsel == 0) { + __write_host_tlbe(stlbe); + } else { + unsigned register mas0; + + mas0 = mfspr(SPRN_MAS0); + + mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel))); + __write_host_tlbe(stlbe); + + mtspr(SPRN_MAS0, mas0); + } + local_irq_enable(); +} + +void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int i; + unsigned register mas0; + + /* Load all valid TLB1 entries to reduce guest tlb miss fault */ + local_irq_disable(); + mas0 = mfspr(SPRN_MAS0); + for (i = 0; i < tlb1_max_shadow_size(); i++) { + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; + + if (get_tlb_v(stlbe)) { + mtspr(SPRN_MAS0, MAS0_TLBSEL(1) + | MAS0_ESEL(to_htlb1_esel(i))); + __write_host_tlbe(stlbe); + } + } + mtspr(SPRN_MAS0, mas0); + local_irq_enable(); +} + +void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) +{ + _tlbil_all(); +} + +/* Search the guest TLB for a matching entry. */ +static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, + gva_t eaddr, int tlbsel, unsigned int pid, int as) +{ + int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { + struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as && as != -1) + continue; + + return i; + } + + return -1; +} + +static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + struct page *page = vcpu_e500->shadow_pages[tlbsel][esel]; + + if (page) { + vcpu_e500->shadow_pages[tlbsel][esel] = NULL; + + if (get_tlb_v(stlbe)) { + if (tlbe_is_writable(stlbe)) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); + } + } +} + +static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + + kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); + stlbe->mas1 = 0; + KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel), + stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7, + handler); +} + +static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + gva_t eaddr, gva_t eend, u32 tid) +{ + unsigned int pid = tid & 0xff; + unsigned int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) { + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; + unsigned int tid; + + if (!get_tlb_v(stlbe)) + continue; + + if (eend < get_tlb_eaddr(stlbe)) + continue; + + if (eaddr > get_tlb_end(stlbe)) + continue; + + tid = get_tlb_tid(stlbe); + if (tid && (tid != pid)) + continue; + + kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); + write_host_tlbe(vcpu_e500, 1, i); + } +} + +static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, + unsigned int eaddr, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int victim, pidsel, tsized; + int tlbsel; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; + victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; + pidsel = (vcpu_e500->mas4 >> 16) & 0xf; + tsized = (vcpu_e500->mas4 >> 8) & 0xf; + + vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) + | MAS1_TID(vcpu_e500->pid[pidsel]) + | MAS1_TSIZE(tsized); + vcpu_e500->mas2 = (eaddr & MAS2_EPN) + | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); + vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) + | (get_cur_pid(vcpu) << 16) + | (as ? MAS6_SAS : 0); + vcpu_e500->mas7 = 0; +} + +static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) +{ + struct page *new_page; + struct tlbe *stlbe; + hpa_t hpaddr; + + stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + + /* Get reference to new page. */ + new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn); + if (is_error_page(new_page)) { + printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); + kvm_release_page_clean(new_page); + return; + } + hpaddr = page_to_phys(new_page); + + /* Drop reference to old page. */ + kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); + + vcpu_e500->shadow_pages[tlbsel][esel] = new_page; + + /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K) + | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) + | e500_shadow_mas2_attrib(gtlbe->mas2, + vcpu_e500->vcpu.arch.msr & MSR_PR); + stlbe->mas3 = (hpaddr & MAS3_RPN) + | e500_shadow_mas3_attrib(gtlbe->mas3, + vcpu_e500->vcpu.arch.msr & MSR_PR); + stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; + + KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel), + stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7, + handler); +} + +/* XXX only map the one-one case, for now use TLB0 */ +static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *gtlbe; + + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + + kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_raddr(gtlbe) >> PAGE_SHIFT, + gtlbe, tlbsel, esel); + + return esel; +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +/* XXX for both one-one and one-to-many , for now use TLB1 */ +static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe) +{ + unsigned int victim; + + victim = vcpu_e500->guest_tlb_nv[1]++; + + if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size())) + vcpu_e500->guest_tlb_nv[1] = 0; + + kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim); + + return victim; +} + +/* Invalidate all guest kernel mappings when enter usermode, + * so that when they fault back in they will get the + * proper permission bits. */ +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + if (usermode) { + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < tlb1_max_shadow_size(); i++) + kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); + + _tlbil_all(); + } +} + +static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + + if (unlikely(get_tlb_iprot(gtlbe))) + return -1; + + if (tlbsel == 1) { + kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_end(gtlbe), + get_tlb_tid(gtlbe)); + } else { + kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); + } + + gtlbe->mas1 = 0; + + return 0; +} + +int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) +{ + int esel; + + if (value & MMUCSR0_TLB0FI) + for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); + if (value & MMUCSR0_TLB1FI) + for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); + + _tlbil_all(); + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int ia; + int esel, tlbsel; + gva_t ea; + + ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb]; + + ia = (ea >> 2) & 0x1; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = (ea >> 3) & 0x1; + + if (ia) { + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } else { + ea &= 0xfffff000; + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, + get_cur_pid(vcpu), -1); + if (esel >= 0) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + + _tlbil_all(); + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel, esel; + struct tlbe *gtlbe; + + tlbsel = get_tlb_tlbsel(vcpu_e500); + esel = get_tlb_esel(vcpu_e500, tlbsel); + + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + vcpu_e500->mas0 &= ~MAS0_NV(~0); + vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = gtlbe->mas1; + vcpu_e500->mas2 = gtlbe->mas2; + vcpu_e500->mas3 = gtlbe->mas3; + vcpu_e500->mas7 = gtlbe->mas7; + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int as = !!get_cur_sas(vcpu_e500); + unsigned int pid = get_cur_spid(vcpu_e500); + int esel, tlbsel; + struct tlbe *gtlbe = NULL; + gva_t ea; + + ea = vcpu->arch.gpr[rb]; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); + if (esel >= 0) { + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + break; + } + } + + if (gtlbe) { + vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) + | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = gtlbe->mas1; + vcpu_e500->mas2 = gtlbe->mas2; + vcpu_e500->mas3 = gtlbe->mas3; + vcpu_e500->mas7 = gtlbe->mas7; + } else { + int victim; + + /* since we only have two TLBs, only lower bit is used. */ + tlbsel = vcpu_e500->mas4 >> 28 & 0x1; + victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; + + vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) + | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) + | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); + vcpu_e500->mas2 &= MAS2_EPN; + vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; + vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu_e500->mas7 = 0; + } + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + u64 eaddr; + u64 raddr; + u32 tid; + struct tlbe *gtlbe; + int tlbsel, esel, stlbsel, sesel; + + tlbsel = get_tlb_tlbsel(vcpu_e500); + esel = get_tlb_esel(vcpu_e500, tlbsel); + + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + + if (get_tlb_v(gtlbe) && tlbsel == 1) { + eaddr = get_tlb_eaddr(gtlbe); + tid = get_tlb_tid(gtlbe); + kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr, + get_tlb_end(gtlbe), tid); + } + + gtlbe->mas1 = vcpu_e500->mas1; + gtlbe->mas2 = vcpu_e500->mas2; + gtlbe->mas3 = vcpu_e500->mas3; + gtlbe->mas7 = vcpu_e500->mas7; + + KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0, + gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7, + handler); + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe_is_host_safe(vcpu, gtlbe)) { + switch (tlbsel) { + case 0: + /* TLB0 */ + gtlbe->mas1 &= ~MAS1_TSIZE(~0); + gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K); + + stlbsel = 0; + sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel); + + break; + + case 1: + /* TLB1 */ + eaddr = get_tlb_eaddr(gtlbe); + raddr = get_tlb_raddr(gtlbe); + + /* Create a 4KB mapping on the host. + * If the guest wanted a large page, + * only the first 4KB is mapped here and the rest + * are mapped on the fly. */ + stlbsel = 1; + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, + raddr >> PAGE_SHIFT, gtlbe); + break; + + default: + BUG(); + } + write_host_tlbe(vcpu_e500, stlbsel, sesel); + } + + return EMULATE_DONE; +} + +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_IS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_DS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_IS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_DS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); +} + +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, + gva_t eaddr) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe *gtlbe = + &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)]; + u64 pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel, i; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) + for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) + kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i); + + /* discard all guest mapping */ + _tlbil_all(); +} + +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, + unsigned int index) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel = tlbsel_of(index); + int esel = esel_of(index); + int stlbsel, sesel; + + switch (tlbsel) { + case 0: + stlbsel = 0; + sesel = esel; + break; + + case 1: { + gfn_t gfn = gpaddr >> PAGE_SHIFT; + struct tlbe *gtlbe + = &vcpu_e500->guest_tlb[tlbsel][esel]; + + stlbsel = 1; + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe); + break; + } + + default: + BUG(); + break; + } + write_host_tlbe(vcpu_e500, stlbsel, sesel); +} + +int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, + gva_t eaddr, unsigned int pid, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel, tlbsel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); + if (esel >= 0) + return index_of(tlbsel, esel); + } + + return -1; +} + +void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + struct tlbe *tlbe; + + /* Insert large initial mapping for guest. */ + tlbe = &vcpu_e500->guest_tlb[1][0]; + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M); + tlbe->mas2 = 0; + tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; + tlbe->mas7 = 0; + + /* 4K map for serial output. Used by kernel wrapper. */ + tlbe = &vcpu_e500->guest_tlb[1][1]; + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K); + tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; + tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; + tlbe->mas7 = 0; +} + +int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF; + + vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE; + vcpu_e500->guest_tlb[0] = + kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->guest_tlb[0] == NULL) + goto err_out; + + vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE; + vcpu_e500->shadow_tlb[0] = + kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->shadow_tlb[0] == NULL) + goto err_out_guest0; + + vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE; + vcpu_e500->guest_tlb[1] = + kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); + if (vcpu_e500->guest_tlb[1] == NULL) + goto err_out_shadow0; + + vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num; + vcpu_e500->shadow_tlb[1] = + kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL); + if (vcpu_e500->shadow_tlb[1] == NULL) + goto err_out_guest1; + + vcpu_e500->shadow_pages[0] = (struct page **) + kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->shadow_pages[0] == NULL) + goto err_out_shadow1; + + vcpu_e500->shadow_pages[1] = (struct page **) + kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL); + if (vcpu_e500->shadow_pages[1] == NULL) + goto err_out_page0; + + return 0; + +err_out_page0: + kfree(vcpu_e500->shadow_pages[0]); +err_out_shadow1: + kfree(vcpu_e500->shadow_tlb[1]); +err_out_guest1: + kfree(vcpu_e500->guest_tlb[1]); +err_out_shadow0: + kfree(vcpu_e500->shadow_tlb[0]); +err_out_guest0: + kfree(vcpu_e500->guest_tlb[0]); +err_out: + return -1; +} + +void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->shadow_pages[1]); + kfree(vcpu_e500->shadow_pages[0]); + kfree(vcpu_e500->shadow_tlb[1]); + kfree(vcpu_e500->guest_tlb[1]); + kfree(vcpu_e500->shadow_tlb[0]); + kfree(vcpu_e500->guest_tlb[0]); +} diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h new file mode 100644 index 000000000000..45b064b76906 --- /dev/null +++ b/arch/powerpc/kvm/e500_tlb.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.h, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef __KVM_E500_TLB_H__ +#define __KVM_E500_TLB_H__ + +#include <linux/kvm_host.h> +#include <asm/mmu-fsl-booke.h> +#include <asm/tlb.h> +#include <asm/kvm_e500.h> + +#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */ +#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT) +#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1) + +#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */ +#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT) +#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1) + +#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) +#define KVM_E500_TLB1_SIZE 16 + +#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) +#define tlbsel_of(index) ((index) >> 16) +#define esel_of(index) ((index) & 0xFFFF) + +#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) +#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) +#define MAS2_ATTRIB_MASK \ + (MAS2_X0 | MAS2_X1) +#define MAS3_ATTRIB_MASK \ + (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ + | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) + +extern void kvmppc_dump_tlbs(struct kvm_vcpu *); +extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); +extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); +extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); +extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); +extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); + +/* TLB helper functions */ +static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 8) & 0xf; +} + +static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +{ + return tlbe->mas2 & 0xfffff000; +} + +static inline u64 get_tlb_bytes(const struct tlbe *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1ULL << 10 << (pgsize << 1); +} + +static inline gva_t get_tlb_end(const struct tlbe *tlbe) +{ + u64 bytes = get_tlb_bytes(tlbe); + return get_tlb_eaddr(tlbe) + bytes - 1; +} + +static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +{ + u64 rpn = tlbe->mas7; + return (rpn << 32) | (tlbe->mas3 & 0xfffff000); +} + +static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 16) & 0xff; +} + +static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 12) & 0x1; +} + +static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 31) & 0x1; +} + +static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 30) & 0x1; +} + +static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pid & 0xff; +} + +static inline unsigned int get_cur_spid( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return (vcpu_e500->mas6 >> 16) & 0xff; +} + +static inline unsigned int get_cur_sas( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return vcpu_e500->mas6 & 0x1; +} + +static inline unsigned int get_tlb_tlbsel( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + /* + * Manual says that tlbsel has 2 bits wide. + * Since we only have two TLBs, only lower bit is used. + */ + return (vcpu_e500->mas0 >> 28) & 0x1; +} + +static inline unsigned int get_tlb_nv_bit( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return vcpu_e500->mas0 & 0xfff; +} + +static inline unsigned int get_tlb_esel_bit( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return (vcpu_e500->mas0 >> 16) & 0xfff; +} + +static inline unsigned int get_tlb_esel( + const struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel) +{ + unsigned int esel = get_tlb_esel_bit(vcpu_e500); + + if (tlbsel == 0) { + esel &= KVM_E500_TLB0_WAY_NUM_MASK; + esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK) + << KVM_E500_TLB0_WAY_NUM_BIT; + } else { + esel &= KVM_E500_TLB1_SIZE - 1; + } + + return esel; +} + +static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct tlbe *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +#endif /* __KVM_E500_TLB_H__ */ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index d1d38daa93fb..a561d6e8da1c 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -30,6 +30,39 @@ #include <asm/disassemble.h> #include "timing.h" +#define OP_TRAP 3 + +#define OP_31_XOP_LWZX 23 +#define OP_31_XOP_LBZX 87 +#define OP_31_XOP_STWX 151 +#define OP_31_XOP_STBX 215 +#define OP_31_XOP_STBUX 247 +#define OP_31_XOP_LHZX 279 +#define OP_31_XOP_LHZUX 311 +#define OP_31_XOP_MFSPR 339 +#define OP_31_XOP_STHX 407 +#define OP_31_XOP_STHUX 439 +#define OP_31_XOP_MTSPR 467 +#define OP_31_XOP_DCBI 470 +#define OP_31_XOP_LWBRX 534 +#define OP_31_XOP_TLBSYNC 566 +#define OP_31_XOP_STWBRX 662 +#define OP_31_XOP_LHBRX 790 +#define OP_31_XOP_STHBRX 918 + +#define OP_LWZ 32 +#define OP_LWZU 33 +#define OP_LBZ 34 +#define OP_LBZU 35 +#define OP_STW 36 +#define OP_STWU 37 +#define OP_STB 38 +#define OP_STBU 39 +#define OP_LHZ 40 +#define OP_LHZU 41 +#define OP_STH 44 +#define OP_STHU 45 + void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { if (vcpu->arch.tcr & TCR_DIE) { @@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); switch (get_op(inst)) { - case 3: /* trap */ + case OP_TRAP: vcpu->arch.esr |= ESR_PTR; kvmppc_core_queue_program(vcpu); advance = 0; @@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case 31: switch (get_xop(inst)) { - case 23: /* lwzx */ + case OP_31_XOP_LWZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 87: /* lbzx */ + case OP_31_XOP_LBZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 151: /* stwx */ + case OP_31_XOP_STWX: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 4, 1); break; - case 215: /* stbx */ + case OP_31_XOP_STBX: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 1, 1); break; - case 247: /* stbux */ + case OP_31_XOP_STBUX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[rs] = ea; break; - case 279: /* lhzx */ + case OP_31_XOP_LHZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; - case 311: /* lhzux */ + case OP_31_XOP_LHZUX: rt = get_rt(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 339: /* mfspr */ + case OP_31_XOP_MFSPR: sprn = get_sprn(inst); rt = get_rt(inst); @@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; - case 407: /* sthx */ + case OP_31_XOP_STHX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 2, 1); break; - case 439: /* sthux */ + case OP_31_XOP_STHUX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 467: /* mtspr */ + case OP_31_XOP_MTSPR: sprn = get_sprn(inst); rs = get_rs(inst); switch (sprn) { @@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; - case 470: /* dcbi */ + case OP_31_XOP_DCBI: /* Do nothing. The guest is performing dcbi because * hardware DMA is not snooped by the dcache, but * emulated DMA either goes through the dcache as @@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) * coherence. */ break; - case 534: /* lwbrx */ + case OP_31_XOP_LWBRX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); break; - case 566: /* tlbsync */ + case OP_31_XOP_TLBSYNC: break; - case 662: /* stwbrx */ + case OP_31_XOP_STWBRX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 0); break; - case 790: /* lhbrx */ + case OP_31_XOP_LHBRX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); break; - case 918: /* sthbrx */ + case OP_31_XOP_STHBRX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; - case 32: /* lwz */ + case OP_LWZ: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 33: /* lwzu */ + case OP_LWZU: ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 34: /* lbz */ + case OP_LBZ: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 35: /* lbzu */ + case OP_LBZU: ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 36: /* stw */ + case OP_STW: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 4, 1); break; - case 37: /* stwu */ + case OP_STWU: ra = get_ra(inst); rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], @@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 38: /* stb */ + case OP_STB: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 1, 1); break; - case 39: /* stbu */ + case OP_STBU: ra = get_ra(inst); rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], @@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 40: /* lhz */ + case OP_LHZ: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; - case 41: /* lhzu */ + case OP_LHZU: ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 44: /* sth */ + case OP_STH: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 2, 1); break; - case 45: /* sthu */ + case OP_STHU: ra = get_ra(inst); rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5f81256287f5..9057335fdc61 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -216,46 +216,23 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - kvmppc_core_destroy_mmu(vcpu); + kvmppc_mmu_destroy(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - if (vcpu->guest_debug.enabled) - kvmppc_core_load_guest_debugstate(vcpu); - kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { - if (vcpu->guest_debug.enabled) - kvmppc_core_load_host_debugstate(vcpu); - - /* Don't leave guest TLB entries resident when being de-scheduled. */ - /* XXX It would be nice to differentiate between heavyweight exit and - * sched_out here, since we could avoid the TLB flush for heavyweight - * exits. */ - _tlbil_all(); kvmppc_core_vcpu_put(vcpu); } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { - int i; - - vcpu->guest_debug.enabled = dbg->enabled; - if (vcpu->guest_debug.enabled) { - for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) { - if (dbg->breakpoints[i].enabled) - vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; - else - vcpu->guest_debug.bp[i] = 0; - } - } - - return 0; + return -EINVAL; } static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 28c04dab2633..882e47080e74 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -237,8 +237,6 @@ extern int noirqdebug; static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) { - const unsigned int cpu = smp_processor_id(); - spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); @@ -254,7 +252,7 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) goto out_eoi; } - kstat_cpu(cpu).irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 6a0ad196aeb3..f085369301b1 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -508,7 +508,7 @@ static void __spu_add_to_rq(struct spu_context *ctx) list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); set_bit(ctx->prio, spu_prio->bitmap); if (!spu_prio->nr_waiting++) - __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); } } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6b0a3538dc63..2a8af5e16345 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -343,13 +343,6 @@ source "mm/Kconfig" comment "I/O subsystem configuration" -config MACHCHK_WARNING - bool "Process warning machine checks" - help - Select this option if you want the machine check handler on IBM S/390 or - zSeries to process warning machine checks (e.g. on power failures). - If unsure, say "Y". - config QDIO tristate "QDIO support" ---help--- @@ -521,7 +514,7 @@ config APPLDATA_OS config APPLDATA_NET_SUM tristate "Monitor overall network statistics" - depends on APPLDATA_BASE + depends on APPLDATA_BASE && NET help This provides network related data to the Linux - VM Monitor Stream, currently there is only a total sum of network I/O statistics, no @@ -552,7 +545,7 @@ config KEXEC but is independent of hardware/microcode support. config ZFCPDUMP - tristate "zfcpdump support" + bool "zfcpdump support" select SMP default n help diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index eca724d229ec..b49c00ce65e9 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -201,8 +201,7 @@ out_free: static void __exit prng_exit(void) { /* wipe me */ - memset(p->buf, 0, prng_chunk_size); - kfree(p->buf); + kzfree(p->buf); kfree(p); misc_deregister(&prng_dev); diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 8e9243ae0c19..b30606f6d523 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -57,7 +57,7 @@ * with operation of the form "set_bit(bitnr, flags)". */ -/* bitmap tables from arch/S390/kernel/bitmap.S */ +/* bitmap tables from arch/s390/kernel/bitmap.c */ extern const char _oi_bitmap[]; extern const char _ni_bitmap[]; extern const char _zb_findmap[]; @@ -525,16 +525,16 @@ static inline unsigned long __ffs_word_loop(const unsigned long *addr, static inline unsigned long __ffz_word(unsigned long nr, unsigned long word) { #ifdef __s390x__ - if (likely((word & 0xffffffff) == 0xffffffff)) { + if ((word & 0xffffffff) == 0xffffffff) { word >>= 32; nr += 32; } #endif - if (likely((word & 0xffff) == 0xffff)) { + if ((word & 0xffff) == 0xffff) { word >>= 16; nr += 16; } - if (likely((word & 0xff) == 0xff)) { + if ((word & 0xff) == 0xff) { word >>= 8; nr += 8; } @@ -549,16 +549,16 @@ static inline unsigned long __ffz_word(unsigned long nr, unsigned long word) static inline unsigned long __ffs_word(unsigned long nr, unsigned long word) { #ifdef __s390x__ - if (likely((word & 0xffffffff) == 0)) { + if ((word & 0xffffffff) == 0) { word >>= 32; nr += 32; } #endif - if (likely((word & 0xffff) == 0)) { + if ((word & 0xffff) == 0) { word >>= 16; nr += 16; } - if (likely((word & 0xff) == 0)) { + if ((word & 0xff) == 0) { word >>= 8; nr += 8; } diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h new file mode 100644 index 000000000000..2185a6d619d3 --- /dev/null +++ b/arch/s390/include/asm/crw.h @@ -0,0 +1,68 @@ +/* + * Data definitions for channel report processing + * Copyright IBM Corp. 2000,2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#ifndef _ASM_S390_CRW_H +#define _ASM_S390_CRW_H + +#include <linux/types.h> + +/* + * Channel Report Word + */ +struct crw { + __u32 res1 : 1; /* reserved zero */ + __u32 slct : 1; /* solicited */ + __u32 oflw : 1; /* overflow */ + __u32 chn : 1; /* chained */ + __u32 rsc : 4; /* reporting source code */ + __u32 anc : 1; /* ancillary report */ + __u32 res2 : 1; /* reserved zero */ + __u32 erc : 6; /* error-recovery code */ + __u32 rsid : 16; /* reporting-source ID */ +} __attribute__ ((packed)); + +typedef void (*crw_handler_t)(struct crw *, struct crw *, int); + +extern int crw_register_handler(int rsc, crw_handler_t handler); +extern void crw_unregister_handler(int rsc); +extern void crw_handle_channel_report(void); + +#define NR_RSCS 16 + +#define CRW_RSC_MONITOR 0x2 /* monitoring facility */ +#define CRW_RSC_SCH 0x3 /* subchannel */ +#define CRW_RSC_CPATH 0x4 /* channel path */ +#define CRW_RSC_CONFIG 0x9 /* configuration-alert facility */ +#define CRW_RSC_CSS 0xB /* channel subsystem */ + +#define CRW_ERC_EVENT 0x00 /* event information pending */ +#define CRW_ERC_AVAIL 0x01 /* available */ +#define CRW_ERC_INIT 0x02 /* initialized */ +#define CRW_ERC_TERROR 0x03 /* temporary error */ +#define CRW_ERC_IPARM 0x04 /* installed parm initialized */ +#define CRW_ERC_TERM 0x05 /* terminal */ +#define CRW_ERC_PERRN 0x06 /* perm. error, fac. not init */ +#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */ +#define CRW_ERC_PMOD 0x08 /* installed parameters modified */ + +static inline int stcrw(struct crw *pcrw) +{ + int ccode; + + asm volatile( + " stcrw 0(%2)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "=m" (*pcrw) + : "a" (pcrw) + : "cc" ); + return ccode; +} + +#endif /* _ASM_S390_CRW_H */ diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h index e2db6f16d9c8..218bce81ec70 100644 --- a/arch/s390/include/asm/dasd.h +++ b/arch/s390/include/asm/dasd.h @@ -162,15 +162,15 @@ typedef struct dasd_profile_info_t { unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */ } dasd_profile_info_t; -/* +/* * struct format_data_t * represents all data necessary to format a dasd */ typedef struct format_data_t { - int start_unit; /* from track */ - int stop_unit; /* to track */ - int blksize; /* sectorsize */ - int intensity; + unsigned int start_unit; /* from track */ + unsigned int stop_unit; /* to track */ + unsigned int blksize; /* sectorsize */ + unsigned int intensity; } format_data_t; /* diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index e82c10efe65a..aae276d00383 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -44,24 +44,18 @@ idal_is_needed(void *vaddr, unsigned int length) /* * Return the number of idal words needed for an address/length pair. */ -static inline unsigned int -idal_nr_words(void *vaddr, unsigned int length) +static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) { -#ifdef __s390x__ - if (idal_is_needed(vaddr, length)) - return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length + - (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; -#endif - return 0; + return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length + + (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; } /* * Create the list of idal words for an address/length pair. */ -static inline unsigned long * -idal_create_words(unsigned long *idaws, void *vaddr, unsigned int length) +static inline unsigned long *idal_create_words(unsigned long *idaws, + void *vaddr, unsigned int length) { -#ifdef __s390x__ unsigned long paddr; unsigned int cidaw; @@ -74,7 +68,6 @@ idal_create_words(unsigned long *idaws, void *vaddr, unsigned int length) paddr += IDA_BLOCK_SIZE; *idaws++ = paddr; } -#endif return idaws; } diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index e1f54654e3ae..0b2f829f6d50 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -42,4 +42,11 @@ struct kvm_fpu { __u64 fprs[16]; }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3c55e4107dcc..c6e674f5fca9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -21,9 +21,6 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 -struct kvm_guest_debug { -}; - struct sca_entry { atomic_t scn; __u64 reserved; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index f3720defdd16..b349f1c7fdfa 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -11,129 +11,118 @@ #ifndef _ASM_S390_LOWCORE_H #define _ASM_S390_LOWCORE_H -#ifndef __s390x__ -#define __LC_EXT_OLD_PSW 0x018 -#define __LC_SVC_OLD_PSW 0x020 -#define __LC_PGM_OLD_PSW 0x028 -#define __LC_MCK_OLD_PSW 0x030 -#define __LC_IO_OLD_PSW 0x038 -#define __LC_EXT_NEW_PSW 0x058 -#define __LC_SVC_NEW_PSW 0x060 -#define __LC_PGM_NEW_PSW 0x068 -#define __LC_MCK_NEW_PSW 0x070 -#define __LC_IO_NEW_PSW 0x078 -#else /* !__s390x__ */ -#define __LC_EXT_OLD_PSW 0x0130 -#define __LC_SVC_OLD_PSW 0x0140 -#define __LC_PGM_OLD_PSW 0x0150 -#define __LC_MCK_OLD_PSW 0x0160 -#define __LC_IO_OLD_PSW 0x0170 -#define __LC_EXT_NEW_PSW 0x01b0 -#define __LC_SVC_NEW_PSW 0x01c0 -#define __LC_PGM_NEW_PSW 0x01d0 -#define __LC_MCK_NEW_PSW 0x01e0 -#define __LC_IO_NEW_PSW 0x01f0 -#endif /* !__s390x__ */ - -#define __LC_IPL_PARMBLOCK_PTR 0x014 -#define __LC_EXT_PARAMS 0x080 -#define __LC_CPU_ADDRESS 0x084 -#define __LC_EXT_INT_CODE 0x086 - -#define __LC_SVC_ILC 0x088 -#define __LC_SVC_INT_CODE 0x08A -#define __LC_PGM_ILC 0x08C -#define __LC_PGM_INT_CODE 0x08E +#define __LC_IPL_PARMBLOCK_PTR 0x0014 +#define __LC_EXT_PARAMS 0x0080 +#define __LC_CPU_ADDRESS 0x0084 +#define __LC_EXT_INT_CODE 0x0086 -#define __LC_PER_ATMID 0x096 -#define __LC_PER_ADDRESS 0x098 -#define __LC_PER_ACCESS_ID 0x0A1 -#define __LC_AR_MODE_ID 0x0A3 +#define __LC_SVC_ILC 0x0088 +#define __LC_SVC_INT_CODE 0x008a +#define __LC_PGM_ILC 0x008c +#define __LC_PGM_INT_CODE 0x008e -#define __LC_SUBCHANNEL_ID 0x0B8 -#define __LC_SUBCHANNEL_NR 0x0BA -#define __LC_IO_INT_PARM 0x0BC -#define __LC_IO_INT_WORD 0x0C0 -#define __LC_MCCK_CODE 0x0E8 +#define __LC_PER_ATMID 0x0096 +#define __LC_PER_ADDRESS 0x0098 +#define __LC_PER_ACCESS_ID 0x00a1 +#define __LC_AR_MODE_ID 0x00a3 -#define __LC_LAST_BREAK 0x110 - -#define __LC_RETURN_PSW 0x200 - -#define __LC_SAVE_AREA 0xC00 - -#ifndef __s390x__ -#define __LC_IRB 0x208 -#define __LC_SYNC_ENTER_TIMER 0x248 -#define __LC_ASYNC_ENTER_TIMER 0x250 -#define __LC_EXIT_TIMER 0x258 -#define __LC_USER_TIMER 0x260 -#define __LC_SYSTEM_TIMER 0x268 -#define __LC_STEAL_TIMER 0x270 -#define __LC_LAST_UPDATE_TIMER 0x278 -#define __LC_LAST_UPDATE_CLOCK 0x280 -#define __LC_RETURN_MCCK_PSW 0x288 -#define __LC_KERNEL_STACK 0xC40 -#define __LC_THREAD_INFO 0xC44 -#define __LC_ASYNC_STACK 0xC48 -#define __LC_KERNEL_ASCE 0xC4C -#define __LC_USER_ASCE 0xC50 -#define __LC_PANIC_STACK 0xC54 -#define __LC_CPUID 0xC60 -#define __LC_CPUADDR 0xC68 -#define __LC_IPLDEV 0xC7C -#define __LC_CURRENT 0xC90 -#define __LC_INT_CLOCK 0xC98 -#else /* __s390x__ */ -#define __LC_IRB 0x210 -#define __LC_SYNC_ENTER_TIMER 0x250 -#define __LC_ASYNC_ENTER_TIMER 0x258 -#define __LC_EXIT_TIMER 0x260 -#define __LC_USER_TIMER 0x268 -#define __LC_SYSTEM_TIMER 0x270 -#define __LC_STEAL_TIMER 0x278 -#define __LC_LAST_UPDATE_TIMER 0x280 -#define __LC_LAST_UPDATE_CLOCK 0x288 -#define __LC_RETURN_MCCK_PSW 0x290 -#define __LC_KERNEL_STACK 0xD40 -#define __LC_THREAD_INFO 0xD48 -#define __LC_ASYNC_STACK 0xD50 -#define __LC_KERNEL_ASCE 0xD58 -#define __LC_USER_ASCE 0xD60 -#define __LC_PANIC_STACK 0xD68 -#define __LC_CPUID 0xD80 -#define __LC_CPUADDR 0xD88 -#define __LC_IPLDEV 0xDB8 -#define __LC_CURRENT 0xDD8 -#define __LC_INT_CLOCK 0xDE8 -#define __LC_VDSO_PER_CPU 0xE38 -#endif /* __s390x__ */ +#define __LC_SUBCHANNEL_ID 0x00b8 +#define __LC_SUBCHANNEL_NR 0x00ba +#define __LC_IO_INT_PARM 0x00bc +#define __LC_IO_INT_WORD 0x00c0 +#define __LC_MCCK_CODE 0x00e8 -#define __LC_PASTE 0xE40 +#define __LC_DUMP_REIPL 0x0e00 -#define __LC_PANIC_MAGIC 0xE00 #ifndef __s390x__ -#define __LC_PFAULT_INTPARM 0x080 -#define __LC_CPU_TIMER_SAVE_AREA 0x0D8 -#define __LC_CLOCK_COMP_SAVE_AREA 0x0E0 -#define __LC_PSW_SAVE_AREA 0x100 -#define __LC_PREFIX_SAVE_AREA 0x108 -#define __LC_AREGS_SAVE_AREA 0x120 -#define __LC_FPREGS_SAVE_AREA 0x160 -#define __LC_GPREGS_SAVE_AREA 0x180 -#define __LC_CREGS_SAVE_AREA 0x1C0 +#define __LC_EXT_OLD_PSW 0x0018 +#define __LC_SVC_OLD_PSW 0x0020 +#define __LC_PGM_OLD_PSW 0x0028 +#define __LC_MCK_OLD_PSW 0x0030 +#define __LC_IO_OLD_PSW 0x0038 +#define __LC_EXT_NEW_PSW 0x0058 +#define __LC_SVC_NEW_PSW 0x0060 +#define __LC_PGM_NEW_PSW 0x0068 +#define __LC_MCK_NEW_PSW 0x0070 +#define __LC_IO_NEW_PSW 0x0078 +#define __LC_SAVE_AREA 0x0200 +#define __LC_RETURN_PSW 0x0240 +#define __LC_RETURN_MCCK_PSW 0x0248 +#define __LC_SYNC_ENTER_TIMER 0x0250 +#define __LC_ASYNC_ENTER_TIMER 0x0258 +#define __LC_EXIT_TIMER 0x0260 +#define __LC_USER_TIMER 0x0268 +#define __LC_SYSTEM_TIMER 0x0270 +#define __LC_STEAL_TIMER 0x0278 +#define __LC_LAST_UPDATE_TIMER 0x0280 +#define __LC_LAST_UPDATE_CLOCK 0x0288 +#define __LC_CURRENT 0x0290 +#define __LC_THREAD_INFO 0x0294 +#define __LC_KERNEL_STACK 0x0298 +#define __LC_ASYNC_STACK 0x029c +#define __LC_PANIC_STACK 0x02a0 +#define __LC_KERNEL_ASCE 0x02a4 +#define __LC_USER_ASCE 0x02a8 +#define __LC_USER_EXEC_ASCE 0x02ac +#define __LC_CPUID 0x02b0 +#define __LC_INT_CLOCK 0x02c8 +#define __LC_IRB 0x0300 +#define __LC_PFAULT_INTPARM 0x0080 +#define __LC_CPU_TIMER_SAVE_AREA 0x00d8 +#define __LC_CLOCK_COMP_SAVE_AREA 0x00e0 +#define __LC_PSW_SAVE_AREA 0x0100 +#define __LC_PREFIX_SAVE_AREA 0x0108 +#define __LC_AREGS_SAVE_AREA 0x0120 +#define __LC_FPREGS_SAVE_AREA 0x0160 +#define __LC_GPREGS_SAVE_AREA 0x0180 +#define __LC_CREGS_SAVE_AREA 0x01c0 #else /* __s390x__ */ -#define __LC_PFAULT_INTPARM 0x11B8 +#define __LC_LAST_BREAK 0x0110 +#define __LC_EXT_OLD_PSW 0x0130 +#define __LC_SVC_OLD_PSW 0x0140 +#define __LC_PGM_OLD_PSW 0x0150 +#define __LC_MCK_OLD_PSW 0x0160 +#define __LC_IO_OLD_PSW 0x0170 +#define __LC_EXT_NEW_PSW 0x01b0 +#define __LC_SVC_NEW_PSW 0x01c0 +#define __LC_PGM_NEW_PSW 0x01d0 +#define __LC_MCK_NEW_PSW 0x01e0 +#define __LC_IO_NEW_PSW 0x01f0 +#define __LC_SAVE_AREA 0x0200 +#define __LC_RETURN_PSW 0x0280 +#define __LC_RETURN_MCCK_PSW 0x0290 +#define __LC_SYNC_ENTER_TIMER 0x02a0 +#define __LC_ASYNC_ENTER_TIMER 0x02a8 +#define __LC_EXIT_TIMER 0x02b0 +#define __LC_USER_TIMER 0x02b8 +#define __LC_SYSTEM_TIMER 0x02c0 +#define __LC_STEAL_TIMER 0x02c8 +#define __LC_LAST_UPDATE_TIMER 0x02d0 +#define __LC_LAST_UPDATE_CLOCK 0x02d8 +#define __LC_CURRENT 0x02e0 +#define __LC_THREAD_INFO 0x02e8 +#define __LC_KERNEL_STACK 0x02f0 +#define __LC_ASYNC_STACK 0x02f8 +#define __LC_PANIC_STACK 0x0300 +#define __LC_KERNEL_ASCE 0x0308 +#define __LC_USER_ASCE 0x0310 +#define __LC_USER_EXEC_ASCE 0x0318 +#define __LC_CPUID 0x0320 +#define __LC_INT_CLOCK 0x0340 +#define __LC_VDSO_PER_CPU 0x0350 +#define __LC_IRB 0x0380 +#define __LC_PASTE 0x03c0 +#define __LC_PFAULT_INTPARM 0x11b8 #define __LC_FPREGS_SAVE_AREA 0x1200 -#define __LC_GPREGS_SAVE_AREA 0x1280 +#define __LC_GPREGS_SAVE_AREA 0x1280 #define __LC_PSW_SAVE_AREA 0x1300 #define __LC_PREFIX_SAVE_AREA 0x1318 -#define __LC_FP_CREG_SAVE_AREA 0x131C +#define __LC_FP_CREG_SAVE_AREA 0x131c #define __LC_TODREG_SAVE_AREA 0x1324 -#define __LC_CPU_TIMER_SAVE_AREA 0x1328 +#define __LC_CPU_TIMER_SAVE_AREA 0x1328 #define __LC_CLOCK_COMP_SAVE_AREA 0x1331 -#define __LC_AREGS_SAVE_AREA 0x1340 -#define __LC_CREGS_SAVE_AREA 0x1380 +#define __LC_AREGS_SAVE_AREA 0x1340 +#define __LC_CREGS_SAVE_AREA 0x1380 #endif /* __s390x__ */ #ifndef __ASSEMBLY__ @@ -198,222 +187,240 @@ union save_area { struct _lowcore { #ifndef __s390x__ - /* prefix area: defined by architecture */ - psw_t restart_psw; /* 0x000 */ - __u32 ccw2[4]; /* 0x008 */ - psw_t external_old_psw; /* 0x018 */ - psw_t svc_old_psw; /* 0x020 */ - psw_t program_old_psw; /* 0x028 */ - psw_t mcck_old_psw; /* 0x030 */ - psw_t io_old_psw; /* 0x038 */ - __u8 pad1[0x58-0x40]; /* 0x040 */ - psw_t external_new_psw; /* 0x058 */ - psw_t svc_new_psw; /* 0x060 */ - psw_t program_new_psw; /* 0x068 */ - psw_t mcck_new_psw; /* 0x070 */ - psw_t io_new_psw; /* 0x078 */ - __u32 ext_params; /* 0x080 */ - __u16 cpu_addr; /* 0x084 */ - __u16 ext_int_code; /* 0x086 */ - __u16 svc_ilc; /* 0x088 */ - __u16 svc_code; /* 0x08a */ - __u16 pgm_ilc; /* 0x08c */ - __u16 pgm_code; /* 0x08e */ - __u32 trans_exc_code; /* 0x090 */ - __u16 mon_class_num; /* 0x094 */ - __u16 per_perc_atmid; /* 0x096 */ - __u32 per_address; /* 0x098 */ - __u32 monitor_code; /* 0x09c */ - __u8 exc_access_id; /* 0x0a0 */ - __u8 per_access_id; /* 0x0a1 */ - __u8 pad2[0xB8-0xA2]; /* 0x0a2 */ - __u16 subchannel_id; /* 0x0b8 */ - __u16 subchannel_nr; /* 0x0ba */ - __u32 io_int_parm; /* 0x0bc */ - __u32 io_int_word; /* 0x0c0 */ - __u8 pad3[0xc8-0xc4]; /* 0x0c4 */ - __u32 stfl_fac_list; /* 0x0c8 */ - __u8 pad4[0xd4-0xcc]; /* 0x0cc */ - __u32 extended_save_area_addr; /* 0x0d4 */ - __u32 cpu_timer_save_area[2]; /* 0x0d8 */ - __u32 clock_comp_save_area[2]; /* 0x0e0 */ - __u32 mcck_interruption_code[2]; /* 0x0e8 */ - __u8 pad5[0xf4-0xf0]; /* 0x0f0 */ - __u32 external_damage_code; /* 0x0f4 */ - __u32 failing_storage_address; /* 0x0f8 */ - __u8 pad6[0x100-0xfc]; /* 0x0fc */ - __u32 st_status_fixed_logout[4];/* 0x100 */ - __u8 pad7[0x120-0x110]; /* 0x110 */ - __u32 access_regs_save_area[16];/* 0x120 */ - __u32 floating_pt_save_area[8]; /* 0x160 */ - __u32 gpregs_save_area[16]; /* 0x180 */ - __u32 cregs_save_area[16]; /* 0x1c0 */ - - psw_t return_psw; /* 0x200 */ - __u8 irb[64]; /* 0x208 */ - __u64 sync_enter_timer; /* 0x248 */ - __u64 async_enter_timer; /* 0x250 */ - __u64 exit_timer; /* 0x258 */ - __u64 user_timer; /* 0x260 */ - __u64 system_timer; /* 0x268 */ - __u64 steal_timer; /* 0x270 */ - __u64 last_update_timer; /* 0x278 */ - __u64 last_update_clock; /* 0x280 */ - psw_t return_mcck_psw; /* 0x288 */ - __u8 pad8[0xc00-0x290]; /* 0x290 */ - - /* System info area */ - __u32 save_area[16]; /* 0xc00 */ - __u32 kernel_stack; /* 0xc40 */ - __u32 thread_info; /* 0xc44 */ - __u32 async_stack; /* 0xc48 */ - __u32 kernel_asce; /* 0xc4c */ - __u32 user_asce; /* 0xc50 */ - __u32 panic_stack; /* 0xc54 */ - __u32 user_exec_asce; /* 0xc58 */ - __u8 pad10[0xc60-0xc5c]; /* 0xc5c */ - /* entry.S sensitive area start */ - struct cpuinfo_S390 cpu_data; /* 0xc60 */ - __u32 ipl_device; /* 0xc7c */ - /* entry.S sensitive area end */ - - /* SMP info area: defined by DJB */ - __u64 clock_comparator; /* 0xc80 */ - __u32 ext_call_fast; /* 0xc88 */ - __u32 percpu_offset; /* 0xc8c */ - __u32 current_task; /* 0xc90 */ - __u32 softirq_pending; /* 0xc94 */ - __u64 int_clock; /* 0xc98 */ - __u8 pad11[0xe00-0xca0]; /* 0xca0 */ - - /* 0xe00 is used as indicator for dump tools */ - /* whether the kernel died with panic() or not */ - __u32 panic_magic; /* 0xe00 */ - - /* Align to the top 1k of prefix area */ - __u8 pad12[0x1000-0xe04]; /* 0xe04 */ + /* 0x0000 - 0x01ff: defined by architecture */ + psw_t restart_psw; /* 0x0000 */ + __u32 ccw2[4]; /* 0x0008 */ + psw_t external_old_psw; /* 0x0018 */ + psw_t svc_old_psw; /* 0x0020 */ + psw_t program_old_psw; /* 0x0028 */ + psw_t mcck_old_psw; /* 0x0030 */ + psw_t io_old_psw; /* 0x0038 */ + __u8 pad_0x0040[0x0058-0x0040]; /* 0x0040 */ + psw_t external_new_psw; /* 0x0058 */ + psw_t svc_new_psw; /* 0x0060 */ + psw_t program_new_psw; /* 0x0068 */ + psw_t mcck_new_psw; /* 0x0070 */ + psw_t io_new_psw; /* 0x0078 */ + __u32 ext_params; /* 0x0080 */ + __u16 cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + __u16 svc_ilc; /* 0x0088 */ + __u16 svc_code; /* 0x008a */ + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + __u32 trans_exc_code; /* 0x0090 */ + __u16 mon_class_num; /* 0x0094 */ + __u16 per_perc_atmid; /* 0x0096 */ + __u32 per_address; /* 0x0098 */ + __u32 monitor_code; /* 0x009c */ + __u8 exc_access_id; /* 0x00a0 */ + __u8 per_access_id; /* 0x00a1 */ + __u8 pad_0x00a2[0x00b8-0x00a2]; /* 0x00a2 */ + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ + __u32 stfl_fac_list; /* 0x00c8 */ + __u8 pad_0x00cc[0x00d4-0x00cc]; /* 0x00cc */ + __u32 extended_save_area_addr; /* 0x00d4 */ + __u32 cpu_timer_save_area[2]; /* 0x00d8 */ + __u32 clock_comp_save_area[2]; /* 0x00e0 */ + __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ + __u32 external_damage_code; /* 0x00f4 */ + __u32 failing_storage_address; /* 0x00f8 */ + __u8 pad_0x00fc[0x0100-0x00fc]; /* 0x00fc */ + __u32 st_status_fixed_logout[4]; /* 0x0100 */ + __u8 pad_0x0110[0x0120-0x0110]; /* 0x0110 */ + + /* CPU register save area: defined by architecture */ + __u32 access_regs_save_area[16]; /* 0x0120 */ + __u32 floating_pt_save_area[8]; /* 0x0160 */ + __u32 gpregs_save_area[16]; /* 0x0180 */ + __u32 cregs_save_area[16]; /* 0x01c0 */ + + /* Return psws. */ + __u32 save_area[16]; /* 0x0200 */ + psw_t return_psw; /* 0x0240 */ + psw_t return_mcck_psw; /* 0x0248 */ + + /* CPU time accounting values */ + __u64 sync_enter_timer; /* 0x0250 */ + __u64 async_enter_timer; /* 0x0258 */ + __u64 exit_timer; /* 0x0260 */ + __u64 user_timer; /* 0x0268 */ + __u64 system_timer; /* 0x0270 */ + __u64 steal_timer; /* 0x0278 */ + __u64 last_update_timer; /* 0x0280 */ + __u64 last_update_clock; /* 0x0288 */ + + /* Current process. */ + __u32 current_task; /* 0x0290 */ + __u32 thread_info; /* 0x0294 */ + __u32 kernel_stack; /* 0x0298 */ + + /* Interrupt and panic stack. */ + __u32 async_stack; /* 0x029c */ + __u32 panic_stack; /* 0x02a0 */ + + /* Address space pointer. */ + __u32 kernel_asce; /* 0x02a4 */ + __u32 user_asce; /* 0x02a8 */ + __u32 user_exec_asce; /* 0x02ac */ + + /* SMP info area */ + cpuid_t cpu_id; /* 0x02b0 */ + __u32 cpu_nr; /* 0x02b8 */ + __u32 softirq_pending; /* 0x02bc */ + __u32 percpu_offset; /* 0x02c0 */ + __u32 ext_call_fast; /* 0x02c4 */ + __u64 int_clock; /* 0x02c8 */ + __u64 clock_comparator; /* 0x02d0 */ + __u8 pad_0x02d8[0x0300-0x02d8]; /* 0x02d8 */ + + /* Interrupt response block */ + __u8 irb[64]; /* 0x0300 */ + + __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information + * block. Dump tools need IPIB for IPL after dump. + * Note: do not change the position of any fields in 0x0e00-0x0f00 + */ + __u32 ipib; /* 0x0e00 */ + __u32 ipib_checksum; /* 0x0e04 */ + + /* Align to the top 1k of prefix area */ + __u8 pad_0x0e08[0x1000-0x0e08]; /* 0x0e08 */ #else /* !__s390x__ */ - /* prefix area: defined by architecture */ - __u32 ccw1[2]; /* 0x000 */ - __u32 ccw2[4]; /* 0x008 */ - __u8 pad1[0x80-0x18]; /* 0x018 */ - __u32 ext_params; /* 0x080 */ - __u16 cpu_addr; /* 0x084 */ - __u16 ext_int_code; /* 0x086 */ - __u16 svc_ilc; /* 0x088 */ - __u16 svc_code; /* 0x08a */ - __u16 pgm_ilc; /* 0x08c */ - __u16 pgm_code; /* 0x08e */ - __u32 data_exc_code; /* 0x090 */ - __u16 mon_class_num; /* 0x094 */ - __u16 per_perc_atmid; /* 0x096 */ - addr_t per_address; /* 0x098 */ - __u8 exc_access_id; /* 0x0a0 */ - __u8 per_access_id; /* 0x0a1 */ - __u8 op_access_id; /* 0x0a2 */ - __u8 ar_access_id; /* 0x0a3 */ - __u8 pad2[0xA8-0xA4]; /* 0x0a4 */ - addr_t trans_exc_code; /* 0x0A0 */ - addr_t monitor_code; /* 0x09c */ - __u16 subchannel_id; /* 0x0b8 */ - __u16 subchannel_nr; /* 0x0ba */ - __u32 io_int_parm; /* 0x0bc */ - __u32 io_int_word; /* 0x0c0 */ - __u8 pad3[0xc8-0xc4]; /* 0x0c4 */ - __u32 stfl_fac_list; /* 0x0c8 */ - __u8 pad4[0xe8-0xcc]; /* 0x0cc */ - __u32 mcck_interruption_code[2]; /* 0x0e8 */ - __u8 pad5[0xf4-0xf0]; /* 0x0f0 */ - __u32 external_damage_code; /* 0x0f4 */ - addr_t failing_storage_address; /* 0x0f8 */ - __u8 pad6[0x120-0x100]; /* 0x100 */ - psw_t restart_old_psw; /* 0x120 */ - psw_t external_old_psw; /* 0x130 */ - psw_t svc_old_psw; /* 0x140 */ - psw_t program_old_psw; /* 0x150 */ - psw_t mcck_old_psw; /* 0x160 */ - psw_t io_old_psw; /* 0x170 */ - __u8 pad7[0x1a0-0x180]; /* 0x180 */ - psw_t restart_psw; /* 0x1a0 */ - psw_t external_new_psw; /* 0x1b0 */ - psw_t svc_new_psw; /* 0x1c0 */ - psw_t program_new_psw; /* 0x1d0 */ - psw_t mcck_new_psw; /* 0x1e0 */ - psw_t io_new_psw; /* 0x1f0 */ - psw_t return_psw; /* 0x200 */ - __u8 irb[64]; /* 0x210 */ - __u64 sync_enter_timer; /* 0x250 */ - __u64 async_enter_timer; /* 0x258 */ - __u64 exit_timer; /* 0x260 */ - __u64 user_timer; /* 0x268 */ - __u64 system_timer; /* 0x270 */ - __u64 steal_timer; /* 0x278 */ - __u64 last_update_timer; /* 0x280 */ - __u64 last_update_clock; /* 0x288 */ - psw_t return_mcck_psw; /* 0x290 */ - __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ - /* System info area */ - __u64 save_area[16]; /* 0xc00 */ - __u8 pad9[0xd40-0xc80]; /* 0xc80 */ - __u64 kernel_stack; /* 0xd40 */ - __u64 thread_info; /* 0xd48 */ - __u64 async_stack; /* 0xd50 */ - __u64 kernel_asce; /* 0xd58 */ - __u64 user_asce; /* 0xd60 */ - __u64 panic_stack; /* 0xd68 */ - __u64 user_exec_asce; /* 0xd70 */ - __u8 pad10[0xd80-0xd78]; /* 0xd78 */ - /* entry.S sensitive area start */ - struct cpuinfo_S390 cpu_data; /* 0xd80 */ - __u32 ipl_device; /* 0xdb8 */ - __u32 pad11; /* 0xdbc */ - /* entry.S sensitive area end */ - - /* SMP info area: defined by DJB */ - __u64 clock_comparator; /* 0xdc0 */ - __u64 ext_call_fast; /* 0xdc8 */ - __u64 percpu_offset; /* 0xdd0 */ - __u64 current_task; /* 0xdd8 */ - __u32 softirq_pending; /* 0xde0 */ - __u32 pad_0x0de4; /* 0xde4 */ - __u64 int_clock; /* 0xde8 */ - __u8 pad12[0xe00-0xdf0]; /* 0xdf0 */ - - /* 0xe00 is used as indicator for dump tools */ - /* whether the kernel died with panic() or not */ - __u32 panic_magic; /* 0xe00 */ + /* 0x0000 - 0x01ff: defined by architecture */ + __u32 ccw1[2]; /* 0x0000 */ + __u32 ccw2[4]; /* 0x0008 */ + __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ + __u32 ext_params; /* 0x0080 */ + __u16 cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + __u16 svc_ilc; /* 0x0088 */ + __u16 svc_code; /* 0x008a */ + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + __u32 data_exc_code; /* 0x0090 */ + __u16 mon_class_num; /* 0x0094 */ + __u16 per_perc_atmid; /* 0x0096 */ + addr_t per_address; /* 0x0098 */ + __u8 exc_access_id; /* 0x00a0 */ + __u8 per_access_id; /* 0x00a1 */ + __u8 op_access_id; /* 0x00a2 */ + __u8 ar_access_id; /* 0x00a3 */ + __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ + addr_t trans_exc_code; /* 0x00a8 */ + addr_t monitor_code; /* 0x00b0 */ + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ + __u32 stfl_fac_list; /* 0x00c8 */ + __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */ + __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ + __u32 external_damage_code; /* 0x00f4 */ + addr_t failing_storage_address; /* 0x00f8 */ + __u8 pad_0x0100[0x0120-0x0100]; /* 0x0100 */ + psw_t restart_old_psw; /* 0x0120 */ + psw_t external_old_psw; /* 0x0130 */ + psw_t svc_old_psw; /* 0x0140 */ + psw_t program_old_psw; /* 0x0150 */ + psw_t mcck_old_psw; /* 0x0160 */ + psw_t io_old_psw; /* 0x0170 */ + __u8 pad_0x0180[0x01a0-0x0180]; /* 0x0180 */ + psw_t restart_psw; /* 0x01a0 */ + psw_t external_new_psw; /* 0x01b0 */ + psw_t svc_new_psw; /* 0x01c0 */ + psw_t program_new_psw; /* 0x01d0 */ + psw_t mcck_new_psw; /* 0x01e0 */ + psw_t io_new_psw; /* 0x01f0 */ + + /* Entry/exit save area & return psws. */ + __u64 save_area[16]; /* 0x0200 */ + psw_t return_psw; /* 0x0280 */ + psw_t return_mcck_psw; /* 0x0290 */ + + /* CPU accounting and timing values. */ + __u64 sync_enter_timer; /* 0x02a0 */ + __u64 async_enter_timer; /* 0x02a8 */ + __u64 exit_timer; /* 0x02b0 */ + __u64 user_timer; /* 0x02b8 */ + __u64 system_timer; /* 0x02c0 */ + __u64 steal_timer; /* 0x02c8 */ + __u64 last_update_timer; /* 0x02d0 */ + __u64 last_update_clock; /* 0x02d8 */ + + /* Current process. */ + __u64 current_task; /* 0x02e0 */ + __u64 thread_info; /* 0x02e8 */ + __u64 kernel_stack; /* 0x02f0 */ + + /* Interrupt and panic stack. */ + __u64 async_stack; /* 0x02f8 */ + __u64 panic_stack; /* 0x0300 */ + + /* Address space pointer. */ + __u64 kernel_asce; /* 0x0308 */ + __u64 user_asce; /* 0x0310 */ + __u64 user_exec_asce; /* 0x0318 */ + + /* SMP info area */ + cpuid_t cpu_id; /* 0x0320 */ + __u32 cpu_nr; /* 0x0328 */ + __u32 softirq_pending; /* 0x032c */ + __u64 percpu_offset; /* 0x0330 */ + __u64 ext_call_fast; /* 0x0338 */ + __u64 int_clock; /* 0x0340 */ + __u64 clock_comparator; /* 0x0348 */ + __u64 vdso_per_cpu_data; /* 0x0350 */ + __u8 pad_0x0358[0x0380-0x0358]; /* 0x0358 */ + + /* Interrupt response block. */ + __u8 irb[64]; /* 0x0380 */ /* Per cpu primary space access list */ - __u8 pad_0xe04[0xe38-0xe04]; /* 0xe04 */ - __u64 vdso_per_cpu_data; /* 0xe38 */ - __u32 paste[16]; /* 0xe40 */ - - __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ - - /* 64 bit extparam used for pfault, diag 250 etc */ - __u64 ext_params2; /* 0x11B8 */ - - __u8 pad14[0x1200-0x11C0]; /* 0x11C0 */ - - /* System info area */ - - __u64 floating_pt_save_area[16]; /* 0x1200 */ - __u64 gpregs_save_area[16]; /* 0x1280 */ - __u32 st_status_fixed_logout[4]; /* 0x1300 */ - __u8 pad15[0x1318-0x1310]; /* 0x1310 */ - __u32 prefixreg_save_area; /* 0x1318 */ - __u32 fpt_creg_save_area; /* 0x131c */ - __u8 pad16[0x1324-0x1320]; /* 0x1320 */ - __u32 tod_progreg_save_area; /* 0x1324 */ - __u32 cpu_timer_save_area[2]; /* 0x1328 */ - __u32 clock_comp_save_area[2]; /* 0x1330 */ - __u8 pad17[0x1340-0x1338]; /* 0x1338 */ - __u32 access_regs_save_area[16]; /* 0x1340 */ - __u64 cregs_save_area[16]; /* 0x1380 */ + __u32 paste[16]; /* 0x03c0 */ + + __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information + * block. Dump tools need IPIB for IPL after dump. + * Note: do not change the position of any fields in 0x0e00-0x0f00 + */ + __u64 ipib; /* 0x0e00 */ + __u32 ipib_checksum; /* 0x0e08 */ + __u8 pad_0x0e0c[0x11b8-0x0e0c]; /* 0x0e0c */ + + /* 64 bit extparam used for pfault/diag 250: defined by architecture */ + __u64 ext_params2; /* 0x11B8 */ + __u8 pad_0x11c0[0x1200-0x11C0]; /* 0x11C0 */ + + /* CPU register save area: defined by architecture */ + __u64 floating_pt_save_area[16]; /* 0x1200 */ + __u64 gpregs_save_area[16]; /* 0x1280 */ + __u32 st_status_fixed_logout[4]; /* 0x1300 */ + __u8 pad_0x1310[0x1318-0x1310]; /* 0x1310 */ + __u32 prefixreg_save_area; /* 0x1318 */ + __u32 fpt_creg_save_area; /* 0x131c */ + __u8 pad_0x1320[0x1324-0x1320]; /* 0x1320 */ + __u32 tod_progreg_save_area; /* 0x1324 */ + __u32 cpu_timer_save_area[2]; /* 0x1328 */ + __u32 clock_comp_save_area[2]; /* 0x1330 */ + __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ + __u32 access_regs_save_area[16]; /* 0x1340 */ + __u64 cregs_save_area[16]; /* 0x1380 */ /* align to the top of the prefix area */ - - __u8 pad18[0x2000-0x1400]; /* 0x1400 */ + __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */ #endif /* !__s390x__ */ } __attribute__((packed)); /* End structure*/ @@ -433,8 +440,6 @@ static inline __u32 store_prefix(void) return address; } -#define __PANIC_MAGIC 0xDEADC0DE - #endif #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 28ec870655af..fc7edd6f41b6 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -74,7 +74,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - cpu_set(smp_processor_id(), next->cpu_vm_mask); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); update_mm(next, tsk); } diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h new file mode 100644 index 000000000000..f4b60441adca --- /dev/null +++ b/arch/s390/include/asm/nmi.h @@ -0,0 +1,66 @@ +/* + * Machine check handler definitions + * + * Copyright IBM Corp. 2000,2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#ifndef _ASM_S390_NMI_H +#define _ASM_S390_NMI_H + +#include <linux/types.h> + +struct mci { + __u32 sd : 1; /* 00 system damage */ + __u32 pd : 1; /* 01 instruction-processing damage */ + __u32 sr : 1; /* 02 system recovery */ + __u32 : 1; /* 03 */ + __u32 cd : 1; /* 04 timing-facility damage */ + __u32 ed : 1; /* 05 external damage */ + __u32 : 1; /* 06 */ + __u32 dg : 1; /* 07 degradation */ + __u32 w : 1; /* 08 warning pending */ + __u32 cp : 1; /* 09 channel-report pending */ + __u32 sp : 1; /* 10 service-processor damage */ + __u32 ck : 1; /* 11 channel-subsystem damage */ + __u32 : 2; /* 12-13 */ + __u32 b : 1; /* 14 backed up */ + __u32 : 1; /* 15 */ + __u32 se : 1; /* 16 storage error uncorrected */ + __u32 sc : 1; /* 17 storage error corrected */ + __u32 ke : 1; /* 18 storage-key error uncorrected */ + __u32 ds : 1; /* 19 storage degradation */ + __u32 wp : 1; /* 20 psw mwp validity */ + __u32 ms : 1; /* 21 psw mask and key validity */ + __u32 pm : 1; /* 22 psw program mask and cc validity */ + __u32 ia : 1; /* 23 psw instruction address validity */ + __u32 fa : 1; /* 24 failing storage address validity */ + __u32 : 1; /* 25 */ + __u32 ec : 1; /* 26 external damage code validity */ + __u32 fp : 1; /* 27 floating point register validity */ + __u32 gr : 1; /* 28 general register validity */ + __u32 cr : 1; /* 29 control register validity */ + __u32 : 1; /* 30 */ + __u32 st : 1; /* 31 storage logical validity */ + __u32 ie : 1; /* 32 indirect storage error */ + __u32 ar : 1; /* 33 access register validity */ + __u32 da : 1; /* 34 delayed access exception */ + __u32 : 7; /* 35-41 */ + __u32 pr : 1; /* 42 tod programmable register validity */ + __u32 fc : 1; /* 43 fp control register validity */ + __u32 ap : 1; /* 44 ancillary report */ + __u32 : 1; /* 45 */ + __u32 ct : 1; /* 46 cpu timer validity */ + __u32 cc : 1; /* 47 clock comparator validity */ + __u32 : 16; /* 47-63 */ +}; + +struct pt_regs; + +extern void s390_handle_mcck(void); +extern void s390_do_machine_check(struct pt_regs *regs); + +#endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index db4523fe38ac..61862b3ac794 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -42,22 +42,8 @@ static inline void get_cpu_id(cpuid_t *ptr) asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr)); } -struct cpuinfo_S390 -{ - cpuid_t cpu_id; - __u16 cpu_addr; - __u16 cpu_nr; - unsigned long loops_per_jiffy; - unsigned long *pgd_quick; -#ifdef __s390x__ - unsigned long *pmd_quick; -#endif /* __s390x__ */ - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; -}; - extern void s390_adjust_jiffies(void); -extern void print_cpu_info(struct cpuinfo_S390 *); +extern void print_cpu_info(void); extern int get_cpu_capability(unsigned int *); /* diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 8920025c3c02..f1b051630c50 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -172,6 +172,8 @@ #define NUM_CRS 16 #define NUM_ACRS 16 +#define NUM_CR_WORDS 3 + #define FPR_SIZE 8 #define FPC_SIZE 4 #define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ @@ -334,7 +336,7 @@ struct pt_regs */ typedef struct { - unsigned long cr[3]; + unsigned long cr[NUM_CR_WORDS]; } per_cr_words; #define PER_EM_MASK 0xE8000000UL diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 27fc1746de15..402d6dcf0d26 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -314,6 +314,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, int, int, unsigned long); /* qdio errors reported to the upper-layer program */ +#define QDIO_ERROR_SIGA_TARGET 0x02 #define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10 #define QDIO_ERROR_SIGA_BUSY 0x20 #define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40 diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 024b91e06239..2009158a4502 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -50,12 +50,7 @@ extern void machine_power_off_smp(void); #define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ -#define raw_smp_processor_id() (S390_lowcore.cpu_data.cpu_nr) - -static inline __u16 hard_smp_processor_id(void) -{ - return stap(); -} +#define raw_smp_processor_id() (S390_lowcore.cpu_nr) /* * returns 1 if cpu is in stopped/check stopped state or not operational diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index c786ab623b2d..02330c50241b 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -62,4 +62,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index d074673a6d9b..cd0241db5a46 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -100,6 +100,7 @@ static inline char *strcat(char *dst, const char *src) static inline char *strcpy(char *dst, const char *src) { +#if __GNUC__ < 4 register int r0 asm("0") = 0; char *ret = dst; @@ -109,10 +110,14 @@ static inline char *strcpy(char *dst, const char *src) : "+&a" (dst), "+&a" (src) : "d" (r0) : "cc", "memory"); return ret; +#else + return __builtin_strcpy(dst, src); +#endif } static inline size_t strlen(const char *s) { +#if __GNUC__ < 4 register unsigned long r0 asm("0") = 0; const char *tmp = s; @@ -121,6 +126,9 @@ static inline size_t strlen(const char *s) " jo 0b" : "+d" (r0), "+a" (tmp) : : "cc"); return r0 - (unsigned long) s; +#else + return __builtin_strlen(s); +#endif } static inline size_t strnlen(const char * s, size_t n) @@ -135,7 +143,13 @@ static inline size_t strnlen(const char * s, size_t n) : "+a" (end), "+a" (tmp) : "d" (r0) : "cc"); return end - s; } - +#else /* IN_ARCH_STRING_C */ +void *memchr(const void * s, int c, size_t n); +void *memscan(void *s, int c, size_t n); +char *strcat(char *dst, const char *src); +char *strcpy(char *dst, const char *src); +size_t strlen(const char *s); +size_t strnlen(const char * s, size_t n); #endif /* !IN_ARCH_STRING_C */ #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index ad93212d9e16..9d70057d828c 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -100,6 +100,7 @@ struct sysinfo_3_2_2 { char reserved_1[24]; } vm[8]; + char reserved_544[3552]; }; static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index d60394b9745e..304cffa623e1 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -51,7 +51,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) * If the process only ran on the local cpu, do a local flush. */ local_cpumask = cpumask_of_cpu(smp_processor_id()); - if (cpus_equal(mm->cpu_vm_mask, local_cpumask)) + if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) __tlb_flush_local(); else __tlb_flush_global(); @@ -73,7 +73,7 @@ static inline void __tlb_flush_idte(unsigned long asce) static inline void __tlb_flush_mm(struct mm_struct * mm) { - if (unlikely(cpus_empty(mm->cpu_vm_mask))) + if (unlikely(cpumask_empty(mm_cpumask(mm)))) return; /* * If the machine has IDTE we prefer to do a per mm flush diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index c979c3b56ab0..5e0ad618dc45 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -5,7 +5,6 @@ #define mc_capable() (1) -cpumask_t cpu_coregroup_map(unsigned int cpu); const struct cpumask *cpu_coregroup_mask(unsigned int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/s390/include/asm/vtoc.h b/arch/s390/include/asm/vtoc.h index 3a5267d90d29..8406a2b3157a 100644 --- a/arch/s390/include/asm/vtoc.h +++ b/arch/s390/include/asm/vtoc.h @@ -39,7 +39,7 @@ struct vtoc_labeldate __u16 day; } __attribute__ ((packed)); -struct vtoc_volume_label +struct vtoc_volume_label_cdl { char volkey[4]; /* volume key = volume label */ char vollbl[4]; /* volume label */ @@ -56,6 +56,14 @@ struct vtoc_volume_label char res3[29]; /* reserved */ } __attribute__ ((packed)); +struct vtoc_volume_label_ldl { + char vollbl[4]; /* volume label */ + char volid[6]; /* volume identifier */ + char res3[69]; /* reserved */ + char ldl_version; /* version number, valid for ldl format */ + __u64 formatted_blocks; /* valid when ldl_version >= f2 */ +} __attribute__ ((packed)); + struct vtoc_extent { __u8 typeind; /* extent type indicator */ @@ -140,7 +148,11 @@ struct vtoc_format4_label char res2[10]; /* reserved */ __u8 DS4EFLVL; /* extended free-space management level */ struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */ - char res3[9]; /* reserved */ + char res3; /* reserved */ + __u32 DS4DCYL; /* number of logical cyls */ + char res4[2]; /* reserved */ + __u8 DS4DEVF2; /* device flags */ + char res5; /* reserved */ } __attribute__ ((packed)); struct vtoc_ds5ext diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 3edc6c6f258b..228e3105ded7 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -17,10 +17,12 @@ CFLAGS_smp.o := -Wno-nonnull # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' +CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w + obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ - vdso.o vtime.o + vdso.o vtime.o sysinfo.o nmi.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/bitmap.S b/arch/s390/kernel/bitmap.S deleted file mode 100644 index dfb41f946e23..000000000000 --- a/arch/s390/kernel/bitmap.S +++ /dev/null @@ -1,56 +0,0 @@ -/* - * arch/s390/kernel/bitmap.S - * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... - * See include/asm-s390/{bitops.h|posix_types.h} for details - * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - */ - - .globl _oi_bitmap -_oi_bitmap: - .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 - - .globl _ni_bitmap -_ni_bitmap: - .byte 0xFE,0xFD,0xFB,0xF7,0xEF,0xDF,0xBF,0x7F - - .globl _zb_findmap -_zb_findmap: - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 - - .globl _sb_findmap -_sb_findmap: - .byte 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c new file mode 100644 index 000000000000..3ae4757b006a --- /dev/null +++ b/arch/s390/kernel/bitmap.c @@ -0,0 +1,54 @@ +/* + * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... + * See include/asm/{bitops.h|posix_types.h} for details + * + * Copyright IBM Corp. 1999,2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#include <linux/bitops.h> +#include <linux/module.h> + +const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; +EXPORT_SYMBOL(_oi_bitmap); + +const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f }; +EXPORT_SYMBOL(_ni_bitmap); + +const char _zb_findmap[] = { + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 }; +EXPORT_SYMBOL(_zb_findmap); + +const char _sb_findmap[] = { + 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, + 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 }; +EXPORT_SYMBOL(_sb_findmap); diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index a2be3a978d5c..123dd660d7fb 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -1,10 +1,11 @@ #ifndef _PTRACE32_H #define _PTRACE32_H +#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ #include "compat_linux.h" /* needed for psw_compat_t */ typedef struct { - __u32 cr[3]; + __u32 cr[NUM_CR_WORDS]; } per_cr_words32; typedef struct { diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 62c706eb0de6..87cf5a79a351 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -252,7 +252,7 @@ sys32_chroot_wrapper: sys32_ustat_wrapper: llgfr %r2,%r2 # dev_t llgtr %r3,%r3 # struct ustat * - jg sys_ustat + jg compat_sys_ustat .globl sys32_dup2_wrapper sys32_dup2_wrapper: diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index ba03fc0a3a56..be8bceaf37d9 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -603,7 +603,7 @@ debug_input(struct file *file, const char __user *user_buf, size_t length, static int debug_open(struct inode *inode, struct file *file) { - int i = 0, rc = 0; + int i, rc = 0; file_private_info_t *p_info; debug_info_t *debug_info, *debug_info_snapshot; @@ -642,8 +642,7 @@ found: p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); if(!p_info){ - if(debug_info_snapshot) - debug_info_free(debug_info_snapshot); + debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } @@ -698,8 +697,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, if ((uid != 0) || (gid != 0)) pr_warning("Root becomes the owner of all s390dbf files " "in sysfs\n"); - if (!initialized) - BUG(); + BUG_ON(!initialized); mutex_lock(&debug_mutex); /* create new debug_info */ @@ -1156,7 +1154,6 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view) else { debugfs_remove(id->debugfs_entries[i]); id->views[i] = NULL; - rc = 0; } spin_unlock_irqrestore(&id->lock, flags); out: diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2a2ca268b1dd..4d221c81c849 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -6,6 +6,7 @@ * Heiko Carstens <heiko.carstens@de.ibm.com> */ +#include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/string.h> @@ -20,6 +21,7 @@ #include <asm/processor.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/sclp.h> #include "entry.h" @@ -173,19 +175,21 @@ static noinline __init void init_kernel_storage_key(void) page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); } +static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); + static noinline __init void detect_machine_type(void) { - struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data; - - get_cpu_id(&S390_lowcore.cpu_data.cpu_id); - - /* Running under z/VM ? */ - if (cpuinfo->cpu_id.version == 0xff) - machine_flags |= MACHINE_FLAG_VM; + /* No VM information? Looks like LPAR */ + if (stsi(&vmms, 3, 2, 2) == -ENOSYS) + return; + if (!vmms.count) + return; - /* Running under KVM ? */ - if (cpuinfo->cpu_id.version == 0xfe) + /* Running under KVM? If not we assume z/VM */ + if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3)) machine_flags |= MACHINE_FLAG_KVM; + else + machine_flags |= MACHINE_FLAG_VM; } static __init void early_pgm_check_handler(void) @@ -348,7 +352,6 @@ static void __init setup_boot_command_line(void) /* copy arch command line */ strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); - boot_command_line[ARCH_COMMAND_LINE_SIZE - 1] = 0; /* append IPL PARM data to the boot command line */ if (MACHINE_IS_VM) { diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index ec7e35f6055b..1046c2c9f8d1 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -469,6 +469,8 @@ start: .org 0x10000 startup:basr %r13,0 # get base .LPG0: + xc 0x200(256),0x200 # partially clear lowcore + xc 0x300(256),0x300 #ifndef CONFIG_MARCH_G5 # check processor version against MARCH_{G5,Z900,Z990,Z9_109,Z10} diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index db476d114caa..2ced846065b7 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -20,7 +20,6 @@ startup_continue: lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore - mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12) # # Setup stack # diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index f9f70aa15244..65667b2e65ce 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -86,7 +86,6 @@ startup_continue: lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore - mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) lghi %r0,__LC_PASTE stg %r0,__LC_VDSO_PER_CPU # diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 2dcf590faba6..6f3711a0eaaa 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -23,7 +23,7 @@ #include <asm/ebcdic.h> #include <asm/reset.h> #include <asm/sclp.h> -#include <asm/setup.h> +#include <asm/checksum.h> #define IPL_PARM_BLOCK_VERSION 0 @@ -56,13 +56,14 @@ struct shutdown_trigger { }; /* - * Five shutdown action types are supported: + * The following shutdown action types are supported: */ #define SHUTDOWN_ACTION_IPL_STR "ipl" #define SHUTDOWN_ACTION_REIPL_STR "reipl" #define SHUTDOWN_ACTION_DUMP_STR "dump" #define SHUTDOWN_ACTION_VMCMD_STR "vmcmd" #define SHUTDOWN_ACTION_STOP_STR "stop" +#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl" struct shutdown_action { char *name; @@ -146,6 +147,7 @@ static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT; static struct ipl_parameter_block *reipl_block_fcp; static struct ipl_parameter_block *reipl_block_ccw; static struct ipl_parameter_block *reipl_block_nss; +static struct ipl_parameter_block *reipl_block_actual; static int dump_capabilities = DUMP_TYPE_NONE; static enum dump_type dump_type = DUMP_TYPE_NONE; @@ -835,6 +837,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_CCW_VM; else reipl_method = REIPL_METHOD_CCW_CIO; + reipl_block_actual = reipl_block_ccw; break; case IPL_TYPE_FCP: if (diag308_set_works) @@ -843,6 +846,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_FCP_RO_VM; else reipl_method = REIPL_METHOD_FCP_RO_DIAG; + reipl_block_actual = reipl_block_fcp; break; case IPL_TYPE_FCP_DUMP: reipl_method = REIPL_METHOD_FCP_DUMP; @@ -852,6 +856,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_NSS_DIAG; else reipl_method = REIPL_METHOD_NSS; + reipl_block_actual = reipl_block_nss; break; case IPL_TYPE_UNKNOWN: reipl_method = REIPL_METHOD_DEFAULT; @@ -960,7 +965,6 @@ static void reipl_run(struct shutdown_trigger *trigger) diag308(DIAG308_IPL, NULL); break; case REIPL_METHOD_FCP_DUMP: - default: break; } disabled_wait((unsigned long) __builtin_return_address(0)); @@ -1069,10 +1073,12 @@ static int __init reipl_fcp_init(void) { int rc; - if ((!diag308_set_works) && (ipl_info.type != IPL_TYPE_FCP)) - return 0; - if ((!diag308_set_works) && (ipl_info.type == IPL_TYPE_FCP)) - make_attrs_ro(reipl_fcp_attrs); + if (!diag308_set_works) { + if (ipl_info.type == IPL_TYPE_FCP) + make_attrs_ro(reipl_fcp_attrs); + else + return 0; + } reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); if (!reipl_block_fcp) @@ -1253,7 +1259,6 @@ static void dump_run(struct shutdown_trigger *trigger) diag308(DIAG308_DUMP, NULL); break; case DUMP_METHOD_NONE: - default: return; } printk(KERN_EMERG "Dump failed!\n"); @@ -1332,6 +1337,49 @@ static struct shutdown_action __refdata dump_action = { .init = dump_init, }; +static void dump_reipl_run(struct shutdown_trigger *trigger) +{ + preempt_disable(); + /* + * Bypass dynamic address translation (DAT) when storing IPL parameter + * information block address and checksum into the prefix area + * (corresponding to absolute addresses 0-8191). + * When enhanced DAT applies and the STE format control in one, + * the absolute address is formed without prefixing. In this case a + * normal store (stg/st) into the prefix area would no more match to + * absolute addresses 0-8191. + */ +#ifdef CONFIG_64BIT + asm volatile("sturg %0,%1" + :: "a" ((unsigned long) reipl_block_actual), + "a" (&lowcore_ptr[smp_processor_id()]->ipib)); +#else + asm volatile("stura %0,%1" + :: "a" ((unsigned long) reipl_block_actual), + "a" (&lowcore_ptr[smp_processor_id()]->ipib)); +#endif + asm volatile("stura %0,%1" + :: "a" (csum_partial(reipl_block_actual, + reipl_block_actual->hdr.len, 0)), + "a" (&lowcore_ptr[smp_processor_id()]->ipib_checksum)); + preempt_enable(); + dump_run(trigger); +} + +static int __init dump_reipl_init(void) +{ + if (!diag308_set_works) + return -EOPNOTSUPP; + else + return 0; +} + +static struct shutdown_action __refdata dump_reipl_action = { + .name = SHUTDOWN_ACTION_DUMP_REIPL_STR, + .fn = dump_reipl_run, + .init = dump_reipl_init, +}; + /* * vmcmd shutdown action: Trigger vm command on shutdown. */ @@ -1421,7 +1469,8 @@ static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, /* action list */ static struct shutdown_action *shutdown_actions_list[] = { - &ipl_action, &reipl_action, &dump_action, &vmcmd_action, &stop_action}; + &ipl_action, &reipl_action, &dump_reipl_action, &dump_action, + &vmcmd_action, &stop_action}; #define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *)) /* @@ -1434,11 +1483,11 @@ static int set_trigger(const char *buf, struct shutdown_trigger *trigger, size_t len) { int i; + for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { if (!shutdown_actions_list[i]) continue; - if (strncmp(buf, shutdown_actions_list[i]->name, - strlen(shutdown_actions_list[i]->name)) == 0) { + if (sysfs_streq(buf, shutdown_actions_list[i]->name)) { trigger->action = shutdown_actions_list[i]; return len; } @@ -1672,7 +1721,7 @@ static int on_panic_notify(struct notifier_block *self, static struct notifier_block on_panic_nb = { .notifier_call = on_panic_notify, - .priority = 0, + .priority = INT_MIN, }; void __init setup_ipl(void) @@ -1696,7 +1745,6 @@ void __init setup_ipl(void) sizeof(ipl_info.data.nss.name)); break; case IPL_TYPE_UNKNOWN: - default: /* We have no info to copy */ break; } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 59b4e796680a..eed4a00cb676 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -310,15 +310,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || - r_type == R_390_PLTOFF32 - || r_type == R_390_PLTOFF64 - ) + r_type == R_390_PLTOFF32 || + r_type == R_390_PLTOFF64) val = me->arch.plt_offset - me->arch.got_offset + info->plt_offset + rela->r_addend; - else - val = (Elf_Addr) me->module_core + - me->arch.plt_offset + info->plt_offset + - rela->r_addend - loc; + else { + if (!((r_type == R_390_PLT16DBL && + val - loc + 0xffffUL < 0x1ffffeUL) || + (r_type == R_390_PLT32DBL && + val - loc + 0xffffffffULL < 0x1fffffffeULL))) + val = (Elf_Addr) me->module_core + + me->arch.plt_offset + + info->plt_offset; + val += rela->r_addend - loc; + } if (r_type == R_390_PLT16DBL) *(unsigned short *) loc = val >> 1; else if (r_type == R_390_PLTOFF16) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c new file mode 100644 index 000000000000..4bfdc421d7e9 --- /dev/null +++ b/arch/s390/kernel/nmi.c @@ -0,0 +1,376 @@ +/* + * Machine check handler + * + * Copyright IBM Corp. 2000,2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/module.h> +#include <asm/lowcore.h> +#include <asm/smp.h> +#include <asm/etr.h> +#include <asm/cpu.h> +#include <asm/nmi.h> +#include <asm/crw.h> + +struct mcck_struct { + int kill_task; + int channel_report; + int warning; + unsigned long long mcck_code; +}; + +static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); + +static NORET_TYPE void s390_handle_damage(char *msg) +{ + smp_send_stop(); + disabled_wait((unsigned long) __builtin_return_address(0)); + while (1); +} + +/* + * Main machine check handler function. Will be called with interrupts enabled + * or disabled and machine checks enabled or disabled. + */ +void s390_handle_mcck(void) +{ + unsigned long flags; + struct mcck_struct mcck; + + /* + * Disable machine checks and get the current state of accumulated + * machine checks. Afterwards delete the old state and enable machine + * checks again. + */ + local_irq_save(flags); + local_mcck_disable(); + mcck = __get_cpu_var(cpu_mcck); + memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + clear_thread_flag(TIF_MCCK_PENDING); + local_mcck_enable(); + local_irq_restore(flags); + + if (mcck.channel_report) + crw_handle_channel_report(); + /* + * A warning may remain for a prolonged period on the bare iron. + * (actually until the machine is powered off, or the problem is gone) + * So we just stop listening for the WARNING MCH and avoid continuously + * being interrupted. One caveat is however, that we must do this per + * processor and cannot use the smp version of ctl_clear_bit(). + * On VM we only get one interrupt per virtally presented machinecheck. + * Though one suffices, we may get one interrupt per (virtual) cpu. + */ + if (mcck.warning) { /* WARNING pending ? */ + static int mchchk_wng_posted = 0; + + /* Use single cpu clear, as we cannot handle smp here. */ + __ctl_clear_bit(14, 24); /* Disable WARNING MCH */ + if (xchg(&mchchk_wng_posted, 1) == 0) + kill_cad_pid(SIGPWR, 1); + } + if (mcck.kill_task) { + local_irq_enable(); + printk(KERN_EMERG "mcck: Terminating task because of machine " + "malfunction (code 0x%016llx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); + do_exit(SIGSEGV); + } +} +EXPORT_SYMBOL_GPL(s390_handle_mcck); + +/* + * returns 0 if all registers could be validated + * returns 1 otherwise + */ +static int notrace s390_revalidate_registers(struct mci *mci) +{ + int kill_task; + u64 tmpclock; + u64 zero; + void *fpt_save_area, *fpt_creg_save_area; + + kill_task = 0; + zero = 0; + + if (!mci->gr) { + /* + * General purpose registers couldn't be restored and have + * unknown contents. Process needs to be terminated. + */ + kill_task = 1; + } + if (!mci->fp) { + /* + * Floating point registers can't be restored and + * therefore the process needs to be terminated. + */ + kill_task = 1; + } +#ifndef CONFIG_64BIT + asm volatile( + " ld 0,0(%0)\n" + " ld 2,8(%0)\n" + " ld 4,16(%0)\n" + " ld 6,24(%0)" + : : "a" (&S390_lowcore.floating_pt_save_area)); +#endif + + if (MACHINE_HAS_IEEE) { +#ifdef CONFIG_64BIT + fpt_save_area = &S390_lowcore.floating_pt_save_area; + fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; +#else + fpt_save_area = (void *) S390_lowcore.extended_save_area_addr; + fpt_creg_save_area = fpt_save_area + 128; +#endif + if (!mci->fc) { + /* + * Floating point control register can't be restored. + * Task will be terminated. + */ + asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero)); + kill_task = 1; + + } else + asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); + + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } + /* Revalidate access registers */ + asm volatile( + " lam 0,15,0(%0)" + : : "a" (&S390_lowcore.access_regs_save_area)); + if (!mci->ar) { + /* + * Access registers have unknown contents. + * Terminating task. + */ + kill_task = 1; + } + /* Revalidate control registers */ + if (!mci->cr) { + /* + * Control registers have unknown contents. + * Can't recover and therefore stopping machine. + */ + s390_handle_damage("invalid control registers."); + } else { +#ifdef CONFIG_64BIT + asm volatile( + " lctlg 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#else + asm volatile( + " lctl 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#endif + } + /* + * We don't even try to revalidate the TOD register, since we simply + * can't write something sensible into that register. + */ +#ifdef CONFIG_64BIT + /* + * See if we can revalidate the TOD programmable register with its + * old contents (should be zero) otherwise set it to zero. + */ + if (!mci->pr) + asm volatile( + " sr 0,0\n" + " sckpf" + : : : "0", "cc"); + else + asm volatile( + " l 0,0(%0)\n" + " sckpf" + : : "a" (&S390_lowcore.tod_progreg_save_area) + : "0", "cc"); +#endif + /* Revalidate clock comparator register */ + asm volatile( + " stck 0(%1)\n" + " sckc 0(%1)" + : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory"); + + /* Check if old PSW is valid */ + if (!mci->wp) + /* + * Can't tell if we come from user or kernel mode + * -> stopping machine. + */ + s390_handle_damage("old psw invalid."); + + if (!mci->ms || !mci->pm || !mci->ia) + kill_task = 1; + + return kill_task; +} + +#define MAX_IPD_COUNT 29 +#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ + +#define ED_STP_ISLAND 6 /* External damage STP island check */ +#define ED_STP_SYNC 7 /* External damage STP sync check */ +#define ED_ETR_SYNC 12 /* External damage ETR sync check */ +#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ + +/* + * machine check handler. + */ +void notrace s390_do_machine_check(struct pt_regs *regs) +{ + static int ipd_count; + static DEFINE_SPINLOCK(ipd_lock); + static unsigned long long last_ipd; + struct mcck_struct *mcck; + unsigned long long tmp; + struct mci *mci; + int umode; + + lockdep_off(); + s390_idle_check(); + + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + mcck = &__get_cpu_var(cpu_mcck); + umode = user_mode(regs); + + if (mci->sd) { + /* System damage -> stopping machine */ + s390_handle_damage("received system damage machine check."); + } + if (mci->pd) { + if (mci->b) { + /* Processing backup -> verify if we can survive this */ + u64 z_mcic, o_mcic, t_mcic; +#ifdef CONFIG_64BIT + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | + 1ULL<<16); +#else + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 | + 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); +#endif + t_mcic = *(u64 *)mci; + + if (((t_mcic & z_mcic) != 0) || + ((t_mcic & o_mcic) != o_mcic)) { + s390_handle_damage("processing backup machine " + "check with damage."); + } + + /* + * Nullifying exigent condition, therefore we might + * retry this instruction. + */ + spin_lock(&ipd_lock); + tmp = get_clock(); + if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME) + ipd_count++; + else + ipd_count = 1; + last_ipd = tmp; + if (ipd_count == MAX_IPD_COUNT) + s390_handle_damage("too many ipd retries."); + spin_unlock(&ipd_lock); + } else { + /* Processing damage -> stopping machine */ + s390_handle_damage("received instruction processing " + "damage machine check."); + } + } + if (s390_revalidate_registers(mci)) { + if (umode) { + /* + * Couldn't restore all register contents while in + * user mode -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = *(unsigned long long *) mci; + set_thread_flag(TIF_MCCK_PENDING); + } else { + /* + * Couldn't restore all register contents while in + * kernel mode -> stopping machine. + */ + s390_handle_damage("unable to revalidate registers."); + } + } + if (mci->cd) { + /* Timing facility damage */ + s390_handle_damage("TOD clock damaged"); + } + if (mci->ed && mci->ec) { + /* External damage */ + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) + etr_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) + etr_switch_to_local(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) + stp_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) + stp_island_check(); + } + if (mci->se) + /* Storage error uncorrected */ + s390_handle_damage("received storage error uncorrected " + "machine check."); + if (mci->ke) + /* Storage key-error uncorrected */ + s390_handle_damage("received storage key-error uncorrected " + "machine check."); + if (mci->ds && mci->fa) + /* Storage degradation */ + s390_handle_damage("received storage degradation machine " + "check."); + if (mci->cp) { + /* Channel report word pending */ + mcck->channel_report = 1; + set_thread_flag(TIF_MCCK_PENDING); + } + if (mci->w) { + /* Warning pending */ + mcck->warning = 1; + set_thread_flag(TIF_MCCK_PENDING); + } + lockdep_on(); +} + +static int __init machine_check_init(void) +{ + ctl_set_bit(14, 25); /* enable external damage MCH */ + ctl_set_bit(14, 27); /* enable system recovery MCH */ + ctl_set_bit(14, 24); /* enable warning MCH */ + return 0; +} +arch_initcall(machine_check_init); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 5cd38a90e64d..b48e961a38f6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -1,18 +1,10 @@ /* - * arch/s390/kernel/process.c + * This file handles the architecture dependent parts of process handling. * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Hartmut Penner (hp@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * - * Derived from "arch/i386/kernel/process.c" - * Copyright (C) 1995, Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of process handling.. + * Copyright IBM Corp. 1999,2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Hartmut Penner <hp@de.ibm.com>, + * Denis Joseph Barrow, */ #include <linux/compiler.h> @@ -47,6 +39,7 @@ #include <asm/processor.h> #include <asm/irq.h> #include <asm/timer.h> +#include <asm/nmi.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -76,7 +69,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -extern void s390_handle_mcck(void); /* * The idle loop on a S390... */ @@ -149,6 +141,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -168,34 +161,35 @@ void release_thread(struct task_struct *dead_task) } int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) { - struct fake_frame - { - struct stack_frame sf; - struct pt_regs childregs; - } *frame; - - frame = container_of(task_pt_regs(p), struct fake_frame, childregs); - p->thread.ksp = (unsigned long) frame; + struct thread_info *ti; + struct fake_frame + { + struct stack_frame sf; + struct pt_regs childregs; + } *frame; + + frame = container_of(task_pt_regs(p), struct fake_frame, childregs); + p->thread.ksp = (unsigned long) frame; /* Store access registers to kernel stack of new process. */ - frame->childregs = *regs; + frame->childregs = *regs; frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ - frame->childregs.gprs[15] = new_stackp; - frame->sf.back_chain = 0; + frame->childregs.gprs[15] = new_stackp; + frame->sf.back_chain = 0; - /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + /* new return point is ret_from_fork */ + frame->sf.gprs[8] = (unsigned long) ret_from_fork; - /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + /* fake return stack for resume(), don't go back to schedule */ + frame->sf.gprs[9] = (unsigned long) frame; /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); #ifndef CONFIG_64BIT - /* + /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the child. */ @@ -220,10 +214,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, #endif /* CONFIG_64BIT */ /* start new process with ar4 pointing to the correct address space */ p->thread.mm_segment = get_fs(); - /* Don't copy debug registers */ - memset(&p->thread.per_info,0,sizeof(p->thread.per_info)); - - return 0; + /* Don't copy debug registers */ + memset(&p->thread.per_info, 0, sizeof(p->thread.per_info)); + /* Initialize per thread user and system timer values */ + ti = task_thread_info(p); + ti->user_timer = 0; + ti->system_timer = 0; + return 0; } SYSCALL_DEFINE0(fork) @@ -311,7 +308,7 @@ out: int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) { #ifndef CONFIG_64BIT - /* + /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the dump. */ @@ -322,6 +319,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) #endif /* CONFIG_64BIT */ return 1; } +EXPORT_SYMBOL(dump_fpu); unsigned long get_wchan(struct task_struct *p) { @@ -346,4 +344,3 @@ unsigned long get_wchan(struct task_struct *p) } return 0; } - diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 82c1872cfe80..802c8ab247f3 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -18,10 +18,11 @@ #include <asm/lowcore.h> #include <asm/param.h> -void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) +void __cpuinit print_cpu_info(void) { pr_info("Processor %d started, address %d, identification %06X\n", - cpuinfo->cpu_nr, cpuinfo->cpu_addr, cpuinfo->cpu_id.ident); + S390_lowcore.cpu_nr, S390_lowcore.cpu_addr, + S390_lowcore.cpu_id.ident); } /* @@ -30,48 +31,46 @@ void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) static int show_cpuinfo(struct seq_file *m, void *v) { - static const char *hwcap_str[8] = { + static const char *hwcap_str[9] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat" + "edat", "etf3eh" }; - struct cpuinfo_S390 *cpuinfo; - unsigned long n = (unsigned long) v - 1; - int i; + struct _lowcore *lc; + unsigned long n = (unsigned long) v - 1; + int i; - s390_adjust_jiffies(); - preempt_disable(); - if (!n) { - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); - seq_puts(m, "features\t: "); - for (i = 0; i < 8; i++) - if (hwcap_str[i] && (elf_hwcap & (1UL << i))) - seq_printf(m, "%s ", hwcap_str[i]); - seq_puts(m, "\n"); - } + s390_adjust_jiffies(); + preempt_disable(); + if (!n) { + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_puts(m, "features\t: "); + for (i = 0; i < 9; i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + seq_puts(m, "\n"); + } - if (cpu_online(n)) { + if (cpu_online(n)) { #ifdef CONFIG_SMP - if (smp_processor_id() == n) - cpuinfo = &S390_lowcore.cpu_data; - else - cpuinfo = &lowcore_ptr[n]->cpu_data; + lc = (smp_processor_id() == n) ? + &S390_lowcore : lowcore_ptr[n]; #else - cpuinfo = &S390_lowcore.cpu_data; + lc = &S390_lowcore; #endif - seq_printf(m, "processor %li: " - "version = %02X, " - "identification = %06X, " - "machine = %04X\n", - n, cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine); - } - preempt_enable(); - return 0; + seq_printf(m, "processor %li: " + "version = %02X, " + "identification = %06X, " + "machine = %04X\n", + n, lc->cpu_id.version, + lc->cpu_id.ident, + lc->cpu_id.machine); + } + preempt_enable(); + return 0; } static void *c_start(struct seq_file *m, loff_t *pos) diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index c41930499a5f..774147824c3d 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -1,10 +1,7 @@ /* - * arch/s390/kernel/reipl.S - * - * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) - Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * Copyright IBM Corp 2000,2009 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Denis Joseph Barrow, */ #include <asm/lowcore.h> @@ -30,7 +27,7 @@ do_reipl_asm: basr %r13,0 mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10) stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) stckc .Lclkcmp-.Lpg0(%r13) - mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13) + mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13) stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 46b90cb03707..656fcbb9bd83 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -1,49 +1,5 @@ -/* - * arch/s390/kernel/s390_ksyms.c - * - * S390 version - */ -#include <linux/highuid.h> #include <linux/module.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/syscalls.h> -#include <linux/interrupt.h> -#include <asm/checksum.h> -#include <asm/cpcmd.h> -#include <asm/delay.h> -#include <asm/pgalloc.h> -#include <asm/setup.h> #include <asm/ftrace.h> -#ifdef CONFIG_IP_MULTICAST -#include <net/arp.h> -#endif - -/* - * memory management - */ -EXPORT_SYMBOL(_oi_bitmap); -EXPORT_SYMBOL(_ni_bitmap); -EXPORT_SYMBOL(_zb_findmap); -EXPORT_SYMBOL(_sb_findmap); - -/* - * binfmt_elf loader - */ -extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(empty_zero_page); - -/* - * misc. - */ -EXPORT_SYMBOL(machine_flags); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(csum_fold); -EXPORT_SYMBOL(console_mode); -EXPORT_SYMBOL(console_devno); -EXPORT_SYMBOL(console_irq); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c5cfb6185eac..06201b93cbbf 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -74,9 +74,17 @@ EXPORT_SYMBOL(uaccess); * Machine setup.. */ unsigned int console_mode = 0; +EXPORT_SYMBOL(console_mode); + unsigned int console_devno = -1; +EXPORT_SYMBOL(console_devno); + unsigned int console_irq = -1; +EXPORT_SYMBOL(console_irq); + unsigned long machine_flags; +EXPORT_SYMBOL(machine_flags); + unsigned long elf_hwcap = 0; char elf_platform[ELF_PLATFORM_SIZE]; @@ -86,6 +94,10 @@ volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ int __initdata memory_end_set; unsigned long __initdata memory_end; +/* An array with a pointer to the lowcore of every CPU. */ +struct _lowcore *lowcore_ptr[NR_CPUS]; +EXPORT_SYMBOL(lowcore_ptr); + /* * This is set up by the setup-routine at boot-time * for S390 need to find out, what we have to setup @@ -109,13 +121,10 @@ static struct resource data_resource = { */ void __cpuinit cpu_init(void) { - int addr = hard_smp_processor_id(); - /* * Store processor id in lowcore (used e.g. in timer_interrupt) */ - get_cpu_id(&S390_lowcore.cpu_data.cpu_id); - S390_lowcore.cpu_data.cpu_addr = addr; + get_cpu_id(&S390_lowcore.cpu_id); /* * Force FPU initialization: @@ -125,8 +134,7 @@ void __cpuinit cpu_init(void) atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - if (current->mm) - BUG(); + BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); } @@ -217,7 +225,7 @@ static void __init conmode_default(void) } } -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) +#ifdef CONFIG_ZFCPDUMP static void __init setup_zfcpdump(unsigned int console_devno) { static char str[41]; @@ -289,11 +297,7 @@ static int __init early_parse_mem(char *p) early_param("mem", early_parse_mem); #ifdef CONFIG_S390_SWITCH_AMODE -#ifdef CONFIG_PGSTE -unsigned int switch_amode = 1; -#else unsigned int switch_amode = 0; -#endif EXPORT_SYMBOL_GPL(switch_amode); static int set_amode_and_uaccess(unsigned long user_amode, @@ -414,7 +418,6 @@ setup_lowcore(void) PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; lc->io_new_psw.mask = psw_kernel_bits; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; - lc->ipl_device = S390_lowcore.ipl_device; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; lc->async_stack = (unsigned long) @@ -434,6 +437,7 @@ setup_lowcore(void) lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif set_prefix((u32)(unsigned long) lc); + lowcore_ptr[0] = lc; } static void __init @@ -510,7 +514,7 @@ static void __init setup_memory_end(void) unsigned long max_mem; int i; -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) +#ifdef CONFIG_ZFCPDUMP if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; memory_end_set = 1; @@ -677,7 +681,6 @@ setup_memory(void) static void __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data; unsigned long long facility_list_extended; unsigned int facility_list; int i; @@ -693,15 +696,22 @@ static void __init setup_hwcaps(void) * Bit 17: the message-security assist is installed * Bit 19: the long-displacement facility is installed * Bit 21: the extended-immediate facility is installed + * Bit 22: extended-translation facility 3 is installed + * Bit 30: extended-translation facility 3 enhancement facility * These get translated to: * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, - * HWCAP_S390_LDISP bit 4, and HWCAP_S390_EIMM bit 5. + * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and + * HWCAP_S390_ETF3EH bit 8 (22 && 30). */ for (i = 0; i < 6; i++) if (facility_list & (1UL << (31 - stfl_bits[i]))) elf_hwcap |= 1UL << i; + if ((facility_list & (1UL << (31 - 22))) + && (facility_list & (1UL << (31 - 30)))) + elf_hwcap |= 1UL << 8; + /* * Check for additional facilities with store-facility-list-extended. * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 @@ -710,20 +720,22 @@ static void __init setup_hwcaps(void) * How many facility words are stored depends on the number of * doublewords passed to the instruction. The additional facilites * are: - * Bit 43: decimal floating point facility is installed + * Bit 42: decimal floating point facility is installed + * Bit 44: perform floating point operation facility is installed * translated to: - * HWCAP_S390_DFP bit 6. + * HWCAP_S390_DFP bit 6 (42 && 44). */ if ((elf_hwcap & (1UL << 2)) && __stfle(&facility_list_extended, 1) > 0) { - if (facility_list_extended & (1ULL << (64 - 43))) + if ((facility_list_extended & (1ULL << (63 - 42))) + && (facility_list_extended & (1ULL << (63 - 44)))) elf_hwcap |= 1UL << 6; } if (MACHINE_HAS_HPAGE) elf_hwcap |= 1UL << 7; - switch (cpuinfo->cpu_id.machine) { + switch (S390_lowcore.cpu_id.machine) { case 0x9672: #if !defined(CONFIG_64BIT) default: /* Use "g5" as default for 31 bit kernels. */ @@ -816,7 +828,7 @@ setup_arch(char **cmdline_p) setup_lowcore(); cpu_init(); - __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; + __cpu_logical_map[0] = stap(); s390_init_cpu_topology(); /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2d337cbb9329..006ed5016eb4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -32,6 +32,7 @@ #include <linux/delay.h> #include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/irqflags.h> #include <linux/cpu.h> #include <linux/timex.h> #include <linux/bootmem.h> @@ -50,12 +51,6 @@ #include <asm/vdso.h> #include "entry.h" -/* - * An array with a pointer the lowcore of every CPU. - */ -struct _lowcore *lowcore_ptr[NR_CPUS]; -EXPORT_SYMBOL(lowcore_ptr); - static struct task_struct *current_set[NR_CPUS]; static u8 smp_cpu_type; @@ -81,9 +76,7 @@ void smp_send_stop(void) /* Disable all interrupts/machine checks */ __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); - - /* write magic number to zero page (absolute 0) */ - lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; + trace_hardirqs_off(); /* stop all processors */ for_each_online_cpu(cpu) { @@ -233,7 +226,7 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); */ #define CPU_INIT_NO 1 -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) +#ifdef CONFIG_ZFCPDUMP /* * zfcpdump_prefix_array holds prefix registers for the following scenario: @@ -274,7 +267,7 @@ EXPORT_SYMBOL_GPL(zfcpdump_save_areas); static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } -#endif /* CONFIG_ZFCPDUMP || CONFIG_ZFCPDUMP_MODULE */ +#endif /* CONFIG_ZFCPDUMP */ static int cpu_stopped(int cpu) { @@ -304,8 +297,8 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) { int cpu_id, logical_cpu; - logical_cpu = first_cpu(avail); - if (logical_cpu == NR_CPUS) + logical_cpu = cpumask_first(&avail); + if (logical_cpu >= nr_cpu_ids) return 0; for (cpu_id = 0; cpu_id <= 65535; cpu_id++) { if (cpu_known(cpu_id)) @@ -316,8 +309,8 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) continue; cpu_set(logical_cpu, cpu_present_map); smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = next_cpu(logical_cpu, avail); - if (logical_cpu == NR_CPUS) + logical_cpu = cpumask_next(logical_cpu, &avail); + if (logical_cpu >= nr_cpu_ids) break; } return 0; @@ -329,8 +322,8 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) int cpu_id, logical_cpu, cpu; int rc; - logical_cpu = first_cpu(avail); - if (logical_cpu == NR_CPUS) + logical_cpu = cpumask_first(&avail); + if (logical_cpu >= nr_cpu_ids) return 0; info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -351,8 +344,8 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; else smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = next_cpu(logical_cpu, avail); - if (logical_cpu == NR_CPUS) + logical_cpu = cpumask_next(logical_cpu, &avail); + if (logical_cpu >= nr_cpu_ids) break; } out: @@ -379,7 +372,7 @@ static void __init smp_detect_cpus(void) c_cpus = 1; s_cpus = 0; - boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr; + boot_cpu_addr = __cpu_logical_map[0]; info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); @@ -453,7 +446,7 @@ int __cpuinit start_secondary(void *cpuvoid) /* Switch on interrupts */ local_irq_enable(); /* Print info about this processor */ - print_cpu_info(&S390_lowcore.cpu_data); + print_cpu_info(); /* cpu_idle will call schedule for us */ cpu_idle(); return 0; @@ -515,7 +508,6 @@ out: return -ENOMEM; } -#ifdef CONFIG_HOTPLUG_CPU static void smp_free_lowcore(int cpu) { struct _lowcore *lowcore; @@ -534,7 +526,6 @@ static void smp_free_lowcore(int cpu) free_pages((unsigned long) lowcore, lc_order); lowcore_ptr[cpu] = NULL; } -#endif /* CONFIG_HOTPLUG_CPU */ /* Upping and downing of CPUs */ int __cpuinit __cpu_up(unsigned int cpu) @@ -543,16 +534,23 @@ int __cpuinit __cpu_up(unsigned int cpu) struct _lowcore *cpu_lowcore; struct stack_frame *sf; sigp_ccode ccode; + u32 lowcore; if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) return -EIO; if (smp_alloc_lowcore(cpu)) return -ENOMEM; - - ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]), - cpu, sigp_set_prefix); - if (ccode) - return -EIO; + do { + ccode = signal_processor(cpu, sigp_initial_cpu_reset); + if (ccode == sigp_busy) + udelay(10); + if (ccode == sigp_not_operational) + goto err_out; + } while (ccode == sigp_busy); + + lowcore = (u32)(unsigned long)lowcore_ptr[cpu]; + while (signal_processor_p(lowcore, cpu, sigp_set_prefix) == sigp_busy) + udelay(10); idle = current_set[cpu]; cpu_lowcore = lowcore_ptr[cpu]; @@ -571,9 +569,8 @@ int __cpuinit __cpu_up(unsigned int cpu) : : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; cpu_lowcore->current_task = (unsigned long) idle; - cpu_lowcore->cpu_data.cpu_nr = cpu; + cpu_lowcore->cpu_nr = cpu; cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; - cpu_lowcore->ipl_device = S390_lowcore.ipl_device; eieio(); while (signal_processor(cpu, sigp_restart) == sigp_busy) @@ -582,6 +579,10 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_online(cpu)) cpu_relax(); return 0; + +err_out: + smp_free_lowcore(cpu); + return -EIO; } static int __init setup_possible_cpus(char *s) @@ -589,9 +590,8 @@ static int __init setup_possible_cpus(char *s) int pcpus, cpu; pcpus = simple_strtoul(s, NULL, 0); - cpu_possible_map = cpumask_of_cpu(0); - for (cpu = 1; cpu < pcpus && cpu < NR_CPUS; cpu++) - cpu_set(cpu, cpu_possible_map); + for (cpu = 0; cpu < pcpus && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); return 0; } early_param("possible_cpus", setup_possible_cpus); @@ -663,7 +663,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); - print_cpu_info(&S390_lowcore.cpu_data); + print_cpu_info(); /* Reallocate current lowcore, but keep its contents. */ lc_order = sizeof(long) == 8 ? 1 : 0; diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c new file mode 100644 index 000000000000..b5e75e1061c8 --- /dev/null +++ b/arch/s390/kernel/sysinfo.c @@ -0,0 +1,428 @@ +/* + * Copyright IBM Corp. 2001, 2009 + * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include <asm/cpcmd.h> + +/* Sigh, math-emu. Don't ask. */ +#include <asm/sfp-util.h> +#include <math-emu/soft-fp.h> +#include <math-emu/single.h> + +static inline int stsi_0(void) +{ + int rc = stsi(NULL, 0, 0, 0); + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); +} + +static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) +{ + if (stsi(info, 1, 1, 1) == -ENOSYS) + return len; + + EBCASC(info->manufacturer, sizeof(info->manufacturer)); + EBCASC(info->type, sizeof(info->type)); + EBCASC(info->model, sizeof(info->model)); + EBCASC(info->sequence, sizeof(info->sequence)); + EBCASC(info->plant, sizeof(info->plant)); + EBCASC(info->model_capacity, sizeof(info->model_capacity)); + EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); + EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); + len += sprintf(page + len, "Manufacturer: %-16.16s\n", + info->manufacturer); + len += sprintf(page + len, "Type: %-4.4s\n", + info->type); + if (info->model[0] != '\0') + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + len += sprintf(page + len, + "Model: %-16.16s %-16.16s\n", + info->model_capacity, info->model); + else + len += sprintf(page + len, "Model: %-16.16s\n", + info->model_capacity); + len += sprintf(page + len, "Sequence Code: %-16.16s\n", + info->sequence); + len += sprintf(page + len, "Plant: %-4.4s\n", + info->plant); + len += sprintf(page + len, "Model Capacity: %-16.16s %08u\n", + info->model_capacity, *(u32 *) info->model_cap_rating); + if (info->model_perm_cap[0] != '\0') + len += sprintf(page + len, + "Model Perm. Capacity: %-16.16s %08u\n", + info->model_perm_cap, + *(u32 *) info->model_perm_cap_rating); + if (info->model_temp_cap[0] != '\0') + len += sprintf(page + len, + "Model Temp. Capacity: %-16.16s %08u\n", + info->model_temp_cap, + *(u32 *) info->model_temp_cap_rating); + return len; +} + +static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) +{ + struct sysinfo_1_2_2_extension *ext; + int i; + + if (stsi(info, 1, 2, 2) == -ENOSYS) + return len; + ext = (struct sysinfo_1_2_2_extension *) + ((unsigned long) info + info->acc_offset); + + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "CPUs Total: %d\n", + info->cpus_total); + len += sprintf(page + len, "CPUs Configured: %d\n", + info->cpus_configured); + len += sprintf(page + len, "CPUs Standby: %d\n", + info->cpus_standby); + len += sprintf(page + len, "CPUs Reserved: %d\n", + info->cpus_reserved); + + if (info->format == 1) { + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the strange + * encoding of the alternate cpu capability. + */ + len += sprintf(page + len, "Capability: %u %u\n", + info->capability, ext->alt_capability); + for (i = 2; i <= info->cpus_total; i++) + len += sprintf(page + len, + "Adjustment %02d-way: %u %u\n", + i, info->adjustment[i-2], + ext->alt_adjustment[i-2]); + + } else { + len += sprintf(page + len, "Capability: %u\n", + info->capability); + for (i = 2; i <= info->cpus_total; i++) + len += sprintf(page + len, + "Adjustment %02d-way: %u\n", + i, info->adjustment[i-2]); + } + + if (info->secondary_capability != 0) + len += sprintf(page + len, "Secondary Capability: %d\n", + info->secondary_capability); + return len; +} + +static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len) +{ + if (stsi(info, 2, 2, 2) == -ENOSYS) + return len; + + EBCASC(info->name, sizeof(info->name)); + + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "LPAR Number: %d\n", + info->lpar_number); + + len += sprintf(page + len, "LPAR Characteristics: "); + if (info->characteristics & LPAR_CHAR_DEDICATED) + len += sprintf(page + len, "Dedicated "); + if (info->characteristics & LPAR_CHAR_SHARED) + len += sprintf(page + len, "Shared "); + if (info->characteristics & LPAR_CHAR_LIMITED) + len += sprintf(page + len, "Limited "); + len += sprintf(page + len, "\n"); + + len += sprintf(page + len, "LPAR Name: %-8.8s\n", + info->name); + + len += sprintf(page + len, "LPAR Adjustment: %d\n", + info->caf); + + len += sprintf(page + len, "LPAR CPUs Total: %d\n", + info->cpus_total); + len += sprintf(page + len, "LPAR CPUs Configured: %d\n", + info->cpus_configured); + len += sprintf(page + len, "LPAR CPUs Standby: %d\n", + info->cpus_standby); + len += sprintf(page + len, "LPAR CPUs Reserved: %d\n", + info->cpus_reserved); + len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n", + info->cpus_dedicated); + len += sprintf(page + len, "LPAR CPUs Shared: %d\n", + info->cpus_shared); + return len; +} + +static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len) +{ + int i; + + if (stsi(info, 3, 2, 2) == -ENOSYS) + return len; + for (i = 0; i < info->count; i++) { + EBCASC(info->vm[i].name, sizeof(info->vm[i].name)); + EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi)); + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "VM%02d Name: %-8.8s\n", + i, info->vm[i].name); + len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n", + i, info->vm[i].cpi); + + len += sprintf(page + len, "VM%02d Adjustment: %d\n", + i, info->vm[i].caf); + + len += sprintf(page + len, "VM%02d CPUs Total: %d\n", + i, info->vm[i].cpus_total); + len += sprintf(page + len, "VM%02d CPUs Configured: %d\n", + i, info->vm[i].cpus_configured); + len += sprintf(page + len, "VM%02d CPUs Standby: %d\n", + i, info->vm[i].cpus_standby); + len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n", + i, info->vm[i].cpus_reserved); + } + return len; +} + +static int proc_read_sysinfo(char *page, char **start, + off_t off, int count, + int *eof, void *data) +{ + unsigned long info = get_zeroed_page(GFP_KERNEL); + int level, len; + + if (!info) + return 0; + + len = 0; + level = stsi_0(); + if (level >= 1) + len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); + + if (level >= 1) + len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); + + if (level >= 2) + len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len); + + if (level >= 3) + len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len); + + free_page(info); + return len; +} + +static __init int create_proc_sysinfo(void) +{ + create_proc_read_entry("sysinfo", 0444, NULL, + proc_read_sysinfo, NULL); + return 0; +} +device_initcall(create_proc_sysinfo); + +/* + * Service levels interface. + */ + +static DECLARE_RWSEM(service_level_sem); +static LIST_HEAD(service_level_list); + +int register_service_level(struct service_level *slr) +{ + struct service_level *ptr; + + down_write(&service_level_sem); + list_for_each_entry(ptr, &service_level_list, list) + if (ptr == slr) { + up_write(&service_level_sem); + return -EEXIST; + } + list_add_tail(&slr->list, &service_level_list); + up_write(&service_level_sem); + return 0; +} +EXPORT_SYMBOL(register_service_level); + +int unregister_service_level(struct service_level *slr) +{ + struct service_level *ptr, *next; + int rc = -ENOENT; + + down_write(&service_level_sem); + list_for_each_entry_safe(ptr, next, &service_level_list, list) { + if (ptr != slr) + continue; + list_del(&ptr->list); + rc = 0; + break; + } + up_write(&service_level_sem); + return rc; +} +EXPORT_SYMBOL(unregister_service_level); + +static void *service_level_start(struct seq_file *m, loff_t *pos) +{ + down_read(&service_level_sem); + return seq_list_start(&service_level_list, *pos); +} + +static void *service_level_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &service_level_list, pos); +} + +static void service_level_stop(struct seq_file *m, void *p) +{ + up_read(&service_level_sem); +} + +static int service_level_show(struct seq_file *m, void *p) +{ + struct service_level *slr; + + slr = list_entry(p, struct service_level, list); + slr->seq_print(m, slr); + return 0; +} + +static const struct seq_operations service_level_seq_ops = { + .start = service_level_start, + .next = service_level_next, + .stop = service_level_stop, + .show = service_level_show +}; + +static int service_level_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &service_level_seq_ops); +} + +static const struct file_operations service_level_ops = { + .open = service_level_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void service_level_vm_print(struct seq_file *m, + struct service_level *slr) +{ + char *query_buffer, *str; + + query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA); + if (!query_buffer) + return; + cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL); + str = strchr(query_buffer, '\n'); + if (str) + *str = 0; + seq_printf(m, "VM: %s\n", query_buffer); + kfree(query_buffer); +} + +static struct service_level service_level_vm = { + .seq_print = service_level_vm_print +}; + +static __init int create_proc_service_level(void) +{ + proc_create("service_levels", 0, NULL, &service_level_ops); + if (MACHINE_IS_VM) + register_service_level(&service_level_vm); + return 0; +} +subsys_initcall(create_proc_service_level); + +/* + * Bogomips calculation based on cpu capability. + */ +int get_cpu_capability(unsigned int *capability) +{ + struct sysinfo_1_2_2 *info; + int rc; + + info = (void *) get_zeroed_page(GFP_KERNEL); + if (!info) + return -ENOMEM; + rc = stsi(info, 1, 2, 2); + if (rc == -ENOSYS) + goto out; + rc = 0; + *capability = info->capability; +out: + free_page((unsigned long) info); + return rc; +} + +/* + * CPU capability might have changed. Therefore recalculate loops_per_jiffy. + */ +void s390_adjust_jiffies(void) +{ + struct sysinfo_1_2_2 *info; + const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + unsigned int capability; + + info = (void *) get_zeroed_page(GFP_KERNEL); + if (!info) + return; + + if (stsi(info, 1, 2, 2) != -ENOSYS) { + /* + * Major sigh. The cpu capability encoding is "special". + * If the first 9 bits of info->capability are 0 then it + * is a 32 bit unsigned integer in the range 0 .. 2^23. + * If the first 9 bits are != 0 then it is a 32 bit float. + * In addition a lower value indicates a proportionally + * higher cpu capacity. Bogomips are the other way round. + * To get to a halfway suitable number we divide 1e7 + * by the cpu capability number. Yes, that means a floating + * point division .. math-emu here we come :-) + */ + FP_UNPACK_SP(SA, &fmil); + if ((info->capability >> 23) == 0) + FP_FROM_INT_S(SB, info->capability, 32, int); + else + FP_UNPACK_SP(SB, &info->capability); + FP_DIV_S(SR, SA, SB); + FP_TO_INT_S(capability, SR, 32, 0); + } else + /* + * Really old machine without stsi block for basic + * cpu information. Report 42.0 bogomips. + */ + capability = 42; + loops_per_jiffy = capability * (500000/HZ); + free_page((unsigned long) info); +} + +/* + * calibrate the delay loop + */ +void __cpuinit calibrate_delay(void) +{ + s390_adjust_jiffies(); + /* Print the good old Bogomips line .. */ + printk(KERN_DEBUG "Calibrating delay loop (skipped)... " + "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +} diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index fc468cae4460..f72d41068dc2 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -331,6 +331,7 @@ static unsigned long long adjust_time(unsigned long long old, } static DEFINE_PER_CPU(atomic_t, clock_sync_word); +static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; #define CLOCK_SYNC_HAS_ETR 0 @@ -394,6 +395,20 @@ static void enable_sync_clock(void) atomic_set_mask(0x80000000, sw_ptr); } +/* + * Function to check if the clock is in sync. + */ +static inline int check_sync_clock(void) +{ + atomic_t *sw_ptr; + int rc; + + sw_ptr = &get_cpu_var(clock_sync_word); + rc = (atomic_read(sw_ptr) & 0x80000000U) != 0; + put_cpu_var(clock_sync_sync); + return rc; +} + /* Single threaded workqueue used for etr and stp sync events */ static struct workqueue_struct *time_sync_wq; @@ -485,6 +500,8 @@ static void etr_reset(void) if (etr_setr(&etr_eacr) == 0) { etr_tolec = get_clock(); set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); } else if (etr_port0_online || etr_port1_online) { pr_warning("The real or virtual hardware system does " "not provide an ETR interface\n"); @@ -533,8 +550,7 @@ void etr_switch_to_local(void) { if (!etr_eacr.sl) return; - if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) - disable_sync_clock(NULL); + disable_sync_clock(NULL); set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events); queue_work(time_sync_wq, &etr_work); } @@ -549,8 +565,7 @@ void etr_sync_check(void) { if (!etr_eacr.es) return; - if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) - disable_sync_clock(NULL); + disable_sync_clock(NULL); set_bit(ETR_EVENT_SYNC_CHECK, &etr_events); queue_work(time_sync_wq, &etr_work); } @@ -914,7 +929,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib, * Do not try to get the alternate port aib if the clock * is not in sync yet. */ - if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags) && !eacr.es) + if (!check_sync_clock()) return eacr; /* @@ -997,7 +1012,6 @@ static void etr_work_fn(struct work_struct *work) on_each_cpu(disable_sync_clock, NULL, 1); del_timer_sync(&etr_timer); etr_update_eacr(eacr); - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); goto out_unlock; } @@ -1071,18 +1085,13 @@ static void etr_work_fn(struct work_struct *work) /* Both ports not usable. */ eacr.es = eacr.sl = 0; sync_port = -1; - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); } - if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) - eacr.es = 0; - /* * If the clock is in sync just update the eacr and return. * If there is no valid sync port wait for a port update. */ - if (test_bit(CLOCK_SYNC_STP, &clock_sync_flags) || - eacr.es || sync_port < 0) { + if (check_sync_clock() || sync_port < 0) { etr_update_eacr(eacr); etr_set_tolec_timeout(now); goto out_unlock; @@ -1103,13 +1112,11 @@ static void etr_work_fn(struct work_struct *work) * and set up a timer to try again after 0.5 seconds */ etr_update_eacr(eacr); - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); if (now < etr_tolec + (1600000 << 12) || etr_sync_clock_stop(&aib, sync_port) != 0) { /* Sync failed. Try again in 1/2 second. */ eacr.es = 0; etr_update_eacr(eacr); - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); etr_set_sync_timeout(); } else etr_set_tolec_timeout(now); @@ -1191,19 +1198,30 @@ static ssize_t etr_online_store(struct sys_device *dev, return -EINVAL; if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); if (dev == &etr_port0_dev) { if (etr_port0_online == value) - return count; /* Nothing to do. */ + goto out; /* Nothing to do. */ etr_port0_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); queue_work(time_sync_wq, &etr_work); } else { if (etr_port1_online == value) - return count; /* Nothing to do. */ + goto out; /* Nothing to do. */ etr_port1_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); queue_work(time_sync_wq, &etr_work); } +out: + mutex_unlock(&clock_sync_mutex); return count; } @@ -1471,8 +1489,6 @@ static void stp_timing_alert(struct stp_irq_parm *intparm) */ void stp_sync_check(void) { - if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) - return; disable_sync_clock(NULL); queue_work(time_sync_wq, &stp_work); } @@ -1485,8 +1501,6 @@ void stp_sync_check(void) */ void stp_island_check(void) { - if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) - return; disable_sync_clock(NULL); queue_work(time_sync_wq, &stp_work); } @@ -1513,10 +1527,6 @@ static int stp_sync_clock(void *data) enable_sync_clock(); - set_bit(CLOCK_SYNC_STP, &clock_sync_flags); - if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) - queue_work(time_sync_wq, &etr_work); - rc = 0; if (stp_info.todoff[0] || stp_info.todoff[1] || stp_info.todoff[2] || stp_info.todoff[3] || @@ -1535,9 +1545,6 @@ static int stp_sync_clock(void *data) if (rc) { disable_sync_clock(NULL); stp_sync->in_sync = -EAGAIN; - clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); - if (etr_port0_online || etr_port1_online) - queue_work(time_sync_wq, &etr_work); } else stp_sync->in_sync = 1; xchg(&first, 0); @@ -1569,6 +1576,10 @@ static void stp_work_fn(struct work_struct *work) if (rc || stp_info.c == 0) goto out_unlock; + /* Skip synchronization if the clock is already in sync. */ + if (check_sync_clock()) + goto out_unlock; + memset(&stp_sync, 0, sizeof(stp_sync)); get_online_cpus(); atomic_set(&stp_sync.cpus, num_online_cpus() - 1); @@ -1684,8 +1695,14 @@ static ssize_t stp_online_store(struct sysdev_class *class, return -EINVAL; if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); stp_online = value; + if (stp_online) + set_bit(CLOCK_SYNC_STP, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); queue_work(time_sync_wq, &stp_work); + mutex_unlock(&clock_sync_mutex); return count; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index cc362c9ea8f1..3c72c9cf22b6 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(topology_lock); cpumask_t cpu_core_map[NR_CPUS]; -cpumask_t cpu_coregroup_map(unsigned int cpu) +static cpumask_t cpu_coregroup_map(unsigned int cpu) { struct core_info *core = &core_info; unsigned long flags; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 4584d81984c0..c2e42cc65ce7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -61,9 +61,11 @@ extern pgm_check_handler_t do_asce_exception; #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) #ifndef CONFIG_64BIT +#define LONG "%08lx " #define FOURLONG "%08lx %08lx %08lx %08lx\n" static int kstack_depth_to_print = 12; #else /* CONFIG_64BIT */ +#define LONG "%016lx " #define FOURLONG "%016lx %016lx %016lx %016lx\n" static int kstack_depth_to_print = 20; #endif /* CONFIG_64BIT */ @@ -155,7 +157,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) break; if (i && ((i * sizeof (long) % 32) == 0)) printk("\n "); - printk("%p ", (void *)*stack++); + printk(LONG, *stack++); } printk("\n"); show_trace(task, sp); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 690e17819686..89b2e7f1b7a9 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -144,7 +144,6 @@ out: return -ENOMEM; } -#ifdef CONFIG_HOTPLUG_CPU void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; @@ -163,7 +162,6 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) free_page(page_table); free_pages(segment_table, SEGMENT_ORDER); } -#endif /* CONFIG_HOTPLUG_CPU */ static void __vdso_init_cr5(void *dummy) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index d796d05c9c01..7a2063eb88f0 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -108,6 +108,8 @@ SECTIONS EXIT_TEXT } + /* early.c uses stsi, which requires page aligned data. */ + . = ALIGN(PAGE_SIZE); .init.data : { INIT_DATA } diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index e051cad1f1e0..3e260b7e37b2 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -4,6 +4,9 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + menuconfig VIRTUALIZATION bool "Virtualization" default y diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 61236102203e..9d19803111ba 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -103,7 +103,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) static intercept_handler_t instruction_handlers[256] = { [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, - [0xb2] = kvm_s390_handle_priv, + [0xb2] = kvm_s390_handle_b2, [0xb7] = handle_lctl, [0xeb] = handle_lctlg, }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f4fe28a2521a..0189356fe209 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -555,9 +555,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", s390int->parm); break; + case KVM_S390_SIGP_SET_PREFIX: + inti->prefix.address = s390int->parm; + inti->type = s390int->type; + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + s390int->parm); + break; case KVM_S390_SIGP_STOP: case KVM_S390_RESTART: - case KVM_S390_SIGP_SET_PREFIX: case KVM_S390_INT_EMERGENCY: VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); inti->type = s390int->type; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0d33893e1e89..f4d56e9939c9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -23,7 +23,7 @@ #include <linux/timer.h> #include <asm/lowcore.h> #include <asm/pgtable.h> - +#include <asm/nmi.h> #include "kvm-s390.h" #include "gaccess.h" @@ -286,7 +286,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, (unsigned long) vcpu); get_cpu_id(&vcpu->arch.cpu_id); - vcpu->arch.cpu_id.version = 0xfe; + vcpu->arch.cpu_id.version = 0xff; return 0; } @@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -EINVAL; /* not implemented yet */ } @@ -440,8 +440,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -extern void s390_handle_mcck(void); - static void __vcpu_run(struct kvm_vcpu *vcpu) { memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3893cf12eacf..00bbe69b78da 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -50,7 +50,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); /* implemented in priv.c */ -int kvm_s390_handle_priv(struct kvm_vcpu *vcpu); +int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3605df45dd41..4b88834b8dd8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -304,12 +304,24 @@ static intercept_handler_t priv_handlers[256] = { [0xb1] = handle_stfl, }; -int kvm_s390_handle_priv(struct kvm_vcpu *vcpu) +int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) { intercept_handler_t handler; + /* + * a lot of B2 instructions are priviledged. We first check for + * the priviledges ones, that we can handle in the kernel. If the + * kernel can handle this instruction, we check for the problem + * state bit and (a) handle the instruction or (b) send a code 2 + * program check. + * Anything else goes to userspace.*/ handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); + if (handler) { + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + else + return handler(vcpu); + } return -ENOTSUPP; } diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 2a01b9e02801..f27dbedf0866 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -153,8 +153,6 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) switch (parameter & 0xff) { case 0: - printk(KERN_WARNING "kvm: request to switch to ESA/390 mode" - " not supported"); rc = 3; /* not operational */ break; case 1: diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 6ccb9fab055a..3f5f680726ed 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/delay.h> #include <linux/timex.h> +#include <linux/module.h> #include <linux/irqflags.h> #include <linux/interrupt.h> @@ -92,6 +93,7 @@ out: local_irq_restore(flags); preempt_enable(); } +EXPORT_SYMBOL(__udelay); /* * Simple udelay variant. To be used on startup and reboot diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index ae5cf5d03d41..4143b7c19096 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -44,7 +44,11 @@ static inline char *__strnend(const char *s, size_t n) */ size_t strlen(const char *s) { +#if __GNUC__ < 4 return __strend(s) - s; +#else + return __builtin_strlen(s); +#endif } EXPORT_SYMBOL(strlen); @@ -70,6 +74,7 @@ EXPORT_SYMBOL(strnlen); */ char *strcpy(char *dest, const char *src) { +#if __GNUC__ < 4 register int r0 asm("0") = 0; char *ret = dest; @@ -78,6 +83,9 @@ char *strcpy(char *dest, const char *src) : "+&a" (dest), "+&a" (src) : "d" (r0) : "cc", "memory" ); return ret; +#else + return __builtin_strcpy(dest, src); +#endif } EXPORT_SYMBOL(strcpy); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 4d537205e83c..833e8366c351 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -200,29 +200,6 @@ static void do_low_address(struct pt_regs *regs, unsigned long error_code) do_no_context(regs, error_code, 0); } -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - - up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - return 1; - } - printk("VM: killing process %s\n", tsk->comm); - if (regs->psw.mask & PSW_MASK_PSTATE) - do_group_exit(SIGKILL); - do_no_context(regs, error_code, address); - return 0; -} - static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -367,7 +344,6 @@ good_area: goto bad_area; } -survive: if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* @@ -378,8 +354,8 @@ survive: fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { - if (do_out_of_memory(regs, error_code, address)) - goto survive; + up_read(&mm->mmap_sem); + pagefault_out_of_memory(); return; } else if (fault & VM_FAULT_SIGBUS) { do_sigbus(regs, error_code, address); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f0258ca3b17e..c634dfbe92e9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -40,7 +40,9 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); + char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +EXPORT_SYMBOL(empty_zero_page); /* * paging_init() sets up the page tables diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6b6ddc4ea02b..be6c1cf4ad5a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -258,6 +258,10 @@ int s390_enable_sie(void) struct task_struct *tsk = current; struct mm_struct *mm, *old_mm; + /* Do we have switched amode? If no, we cannot do sie */ + if (!switch_amode) + return -EINVAL; + /* Do we have pgstes? if yes, we are done */ if (tsk->mm->context.has_pgste) return 0; @@ -292,7 +296,7 @@ int s390_enable_sie(void) tsk->mm = tsk->active_mm = mm; preempt_disable(); update_mm(mm, tsk); - cpu_set(smp_processor_id(), mm->cpu_vm_mask); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); preempt_enable(); task_unlock(tsk); mmput(old_mm); diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h index 6d4bf6512959..345653b96826 100644 --- a/arch/sh/include/asm/socket.h +++ b/arch/sh/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* __ASM_SH_SOCKET_H */ diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 90d63aefd275..3f1372eb0091 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v) goto unlock; seq_printf(p, "%3d: ",i); for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); seq_printf(p, " %14s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); seq_printf(p, " %s", action->name); diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index bf50d0c2d583..982a12f959f4 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -50,6 +50,9 @@ #define SO_MARK 0x0022 +#define SO_TIMESTAMPING 0x0023 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index ec81cdedef2c..ee38e731bfa6 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -57,6 +57,8 @@ static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned i static inline void tlb_flush_mmu(struct mmu_gather *mp) { + if (!mp->fullmm) + flush_tlb_pending(); if (mp->need_flush) { free_pages_and_swap_cache(mp->pages, mp->pages_nr); mp->pages_nr = 0; @@ -78,8 +80,6 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un if (mp->fullmm) mp->fullmm = 0; - else - flush_tlb_pending(); /* keep the page table cache within bounds */ check_pgt_cache(); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 1c378d8e90c5..8ba064f08a6f 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -185,7 +185,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %9s", irq_desc[i].chip->typename); seq_printf(p, " %s", action->name); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 6cd1a5b65067..79457f682b5a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1031,7 +1031,7 @@ void smp_fetch_global_regs(void) * If the address space is non-shared (ie. mm->count == 1) we avoid * cross calls when we want to flush the currently running process's * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->active_mm->cpu_vm_mask and performing the + * processor's in current->mm->cpu_vm_mask and performing the * flush locally only. This will force any subsequent cpus which run * this task to flush the context from the local tlb if the process * migrates to another cpu (again). @@ -1074,7 +1074,7 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long u32 ctx = CTX_HWBITS(mm->context); int cpu = get_cpu(); - if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) mm->cpu_vm_mask = cpumask_of_cpu(cpu); else smp_cross_call_masked(&xcall_flush_tlb_pending, diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index f93c42a2b522..a8000b1cda74 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -51,7 +51,7 @@ sys_call_table32: /*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 .word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount /*160*/ .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys32_getdomainname, sys32_setdomainname, sys_nis_syscall - .word sys_quotactl, sys_set_tid_address, compat_sys_mount, sys_ustat, sys32_setxattr + .word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys32_setxattr /*170*/ .word sys32_lsetxattr, sys32_fsetxattr, sys_getxattr, sys_lgetxattr, compat_sys_getdents .word sys_setsid, sys_fchdir, sys32_fgetxattr, sys_listxattr, sys_llistxattr /*180*/ .word sys32_flistxattr, sys_removexattr, sys_lremovexattr, compat_sys_sigpending, sys_ni_syscall diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 2db3c2229b95..4ee2e48c4b39 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -36,10 +36,10 @@ #include <linux/clocksource.h> #include <linux/of_device.h> #include <linux/platform_device.h> +#include <linux/irq.h> #include <asm/oplib.h> #include <asm/timer.h> -#include <asm/irq.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/starfire.h> @@ -724,12 +724,14 @@ void timer_interrupt(int irq, struct pt_regs *regs) unsigned long tick_mask = tick_ops->softint_mask; int cpu = smp_processor_id(); struct clock_event_device *evt = &per_cpu(sparc64_events, cpu); + struct irq_desc *desc; clear_softint(tick_mask); irq_enter(); - kstat_this_cpu.irqs[0]++; + desc = irq_to_desc(0); + kstat_incr_irqs_this_cpu(0, desc); if (unlikely(!evt->event_handler)) { printk(KERN_WARNING diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index fde510b664d3..434224e2229f 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -86,7 +86,7 @@ static int uml_net_rx(struct net_device *dev) drop_skb->dev = dev; /* Read a packet into drop_skb and don't do anything with it. */ (*lp->read)(lp->fd, drop_skb, lp); - lp->stats.rx_dropped++; + dev->stats.rx_dropped++; return 0; } @@ -99,8 +99,8 @@ static int uml_net_rx(struct net_device *dev) skb_trim(skb, pkt_len); skb->protocol = (*lp->protocol)(skb); - lp->stats.rx_bytes += skb->len; - lp->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; netif_rx(skb); return pkt_len; } @@ -224,8 +224,8 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) len = (*lp->write)(lp->fd, skb, lp); if (len == skb->len) { - lp->stats.tx_packets++; - lp->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; dev->trans_start = jiffies; netif_start_queue(dev); @@ -234,7 +234,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) } else if (len == 0) { netif_start_queue(dev); - lp->stats.tx_dropped++; + dev->stats.tx_dropped++; } else { netif_start_queue(dev); @@ -248,12 +248,6 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static struct net_device_stats *uml_net_get_stats(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - return &lp->stats; -} - static void uml_net_set_multicast_list(struct net_device *dev) { return; @@ -377,6 +371,18 @@ static void net_device_release(struct device *dev) free_netdev(netdev); } +static const struct net_device_ops uml_netdev_ops = { + .ndo_open = uml_net_open, + .ndo_stop = uml_net_close, + .ndo_start_xmit = uml_net_start_xmit, + .ndo_set_multicast_list = uml_net_set_multicast_list, + .ndo_tx_timeout = uml_net_tx_timeout, + .ndo_set_mac_address = uml_net_set_mac, + .ndo_change_mtu = uml_net_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + /* * Ensures that platform_driver_register is called only once by * eth_configure. Will be set in an initcall. @@ -473,14 +479,7 @@ static void eth_configure(int n, void *init, char *mac, set_ether_mac(dev, device->mac); dev->mtu = transport->user->mtu; - dev->open = uml_net_open; - dev->hard_start_xmit = uml_net_start_xmit; - dev->stop = uml_net_close; - dev->get_stats = uml_net_get_stats; - dev->set_multicast_list = uml_net_set_multicast_list; - dev->tx_timeout = uml_net_tx_timeout; - dev->set_mac_address = uml_net_set_mac; - dev->change_mtu = uml_net_change_mtu; + dev->netdev_ops = ¨_netdev_ops; dev->ethtool_ops = ¨_net_ethtool_ops; dev->watchdog_timeo = (HZ >> 1); dev->irq = UM_ETH_IRQ; diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h index d843c7924a7c..5c367f22595b 100644 --- a/arch/um/include/shared/net_kern.h +++ b/arch/um/include/shared/net_kern.h @@ -26,7 +26,7 @@ struct uml_net_private { spinlock_t lock; struct net_device *dev; struct timer_list tl; - struct net_device_stats stats; + struct work_struct work; int fd; unsigned char mac[ETH_ALEN]; diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 3d7aad09b171..336b61569072 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -42,7 +42,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %14s", irq_desc[i].chip->typename); seq_printf(p, " %s", action->name); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc2fbadff9f9..3a330a437c6f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,9 @@ config GENERIC_HARDIRQS bool default y +config GENERIC_HARDIRQS_NO__DO_IRQ + def_bool y + config GENERIC_IRQ_PROBE bool default y diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 5a0d76dc56a4..8ef8876666b2 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -557,7 +557,7 @@ ia32_sys_call_table: .quad sys32_olduname .quad sys_umask /* 60 */ .quad sys_chroot - .quad sys32_ustat + .quad compat_sys_ustat .quad sys_dup2 .quad sys_getppid .quad sys_getpgrp /* 65 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 6c0d7f6231af..efac92fd1efb 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -638,28 +638,6 @@ long sys32_uname(struct old_utsname __user *name) return err ? -EFAULT : 0; } -long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) -{ - struct ustat u; - mm_segment_t seg; - int ret; - - seg = get_fs(); - set_fs(KERNEL_DS); - ret = sys_ustat(dev, (struct ustat __user *)&u); - set_fs(seg); - if (ret < 0) - return ret; - - if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) || - __put_user((__u32) u.f_tfree, &u32p->f_tfree) || - __put_user((__u32) u.f_tinode, &u32p->f_tfree) || - __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || - __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) - ret = -EFAULT; - return ret; -} - asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs) { diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 50ca486fd88c..1f7e62517284 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -129,13 +129,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -struct ustat32 { - __u32 f_tfree; - compat_ino_t f_tinode; - char f_fname[6]; - char f_fpack[6]; -}; - #define IA32_STACK_TOP IA32_PAGE_OFFSET #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 886c9402ec45..dc3f6cf11704 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -15,6 +15,7 @@ #define __KVM_HAVE_DEVICE_ASSIGNMENT #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI +#define __KVM_HAVE_GUEST_DEBUG /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -212,7 +213,30 @@ struct kvm_pit_channel_state { __s64 count_load_time; }; +struct kvm_debug_exit_arch { + __u32 exception; + __u32 pad; + __u64 pc; + __u64 dr6; + __u64 dr7; +}; + +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 +#define KVM_GUESTDBG_INJECT_DB 0x00040000 +#define KVM_GUESTDBG_INJECT_BP 0x00080000 + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { + __u64 debugreg[8]; +}; + struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; + +struct kvm_reinject_control { + __u8 pit_reinject; + __u8 reserved[31]; +}; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 730843d1d2fb..f0faf58044ff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -22,6 +22,7 @@ #include <asm/pvclock-abi.h> #include <asm/desc.h> #include <asm/mtrr.h> +#include <asm/msr-index.h> #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 @@ -134,11 +135,18 @@ enum { #define KVM_NR_MEM_OBJS 40 -struct kvm_guest_debug { - int enabled; - unsigned long bp[4]; - int singlestep; -}; +#define KVM_NR_DB_REGS 4 + +#define DR6_BD (1 << 13) +#define DR6_BS (1 << 14) +#define DR6_FIXED_1 0xffff0ff0 +#define DR6_VOLATILE 0x0000e00f + +#define DR7_BP_EN_MASK 0x000000ff +#define DR7_GE (1 << 9) +#define DR7_GD (1 << 13) +#define DR7_FIXED_1 0x00000400 +#define DR7_VOLATILE 0xffff23ff /* * We don't want allocation failures within the mmu code, so we preallocate @@ -162,7 +170,8 @@ struct kvm_pte_chain { * bits 0:3 - total guest paging levels (2-4, or zero for real mode) * bits 4:7 - page table level for this shadow (1-4) * bits 8:9 - page table quadrant for 2-level guests - * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + * bit 16 - direct mapping of virtual to physical mapping at gfn + * used for real mode and two-dimensional paging * bits 17:19 - common access permissions for all ptes in this shadow page */ union kvm_mmu_page_role { @@ -172,9 +181,10 @@ union kvm_mmu_page_role { unsigned level:4; unsigned quadrant:2; unsigned pad_for_nice_hex_output:6; - unsigned metaphysical:1; + unsigned direct:1; unsigned access:3; unsigned invalid:1; + unsigned cr4_pge:1; }; }; @@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer { char buf[512] __aligned(sizeof(long)); }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + gva_t guest_gva; + int in; + int port; + int size; + int string; + int down; + int rep; +}; + /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level * 32-bit). The kvm_mmu structure abstracts the details of the current mmu @@ -236,6 +258,7 @@ struct kvm_mmu { hpa_t root_hpa; int root_level; int shadow_root_level; + union kvm_mmu_page_role base_role; u64 *pae_root; }; @@ -258,6 +281,7 @@ struct kvm_vcpu_arch { unsigned long cr3; unsigned long cr4; unsigned long cr8; + u32 hflags; u64 pdptrs[4]; /* pae */ u64 shadow_efer; u64 apic_base; @@ -338,6 +362,15 @@ struct kvm_vcpu_arch { struct mtrr_state_type mtrr_state; u32 pat; + + int switch_db_regs; + unsigned long host_db[KVM_NR_DB_REGS]; + unsigned long host_dr6; + unsigned long host_dr7; + unsigned long db[KVM_NR_DB_REGS]; + unsigned long dr6; + unsigned long dr7; + unsigned long eff_db[KVM_NR_DB_REGS]; }; struct kvm_mem_alias { @@ -378,6 +411,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; + u64 vm_init_tsc; }; struct kvm_vm_stat { @@ -446,8 +480,7 @@ struct kvm_x86_ops { void (*vcpu_put)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg); - void (*guest_debug_pre)(struct kvm_vcpu *vcpu); + struct kvm_guest_debug *dbg); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); @@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, u32 error_code); -void kvm_pic_set_irq(void *opaque, int irq, int level); +int kvm_pic_set_irq(void *opaque, int irq, int level); void kvm_inject_nmi(struct kvm_vcpu *vcpu); void fx_init(struct kvm_vcpu *vcpu); -int emulator_read_std(unsigned long addr, - void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu); int emulator_write_emulated(unsigned long addr, const void *val, unsigned int bytes, @@ -737,6 +766,10 @@ enum { TASK_SWITCH_GATE = 3, }; +#define HF_GIF_MASK (1 << 0) +#define HF_HIF_MASK (1 << 1) +#define HF_VINTR_MASK (1 << 2) + /* * Hardware virtualization extension instructions may fault if a * reboot turns off virtualization while processes are running. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 358acc59ae04..f4e505f286bc 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -18,11 +18,15 @@ #define _EFER_LME 8 /* Long mode enable */ #define _EFER_LMA 10 /* Long mode active (read-only) */ #define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) #define EFER_LMA (1<<_EFER_LMA) #define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) +#define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_PERFCTR0 0x000000c1 @@ -360,4 +364,9 @@ #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_HSAVE_PA 0xc0010117 + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index a8894647dd9a..3ac5032fae09 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h @@ -6,8 +6,4 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_X86_64 -extern long sys_arch_prctl(int, unsigned long); -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a3..c2308f5250fd 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -1,27 +1,12 @@ #ifndef _ASM_X86_SETUP_H #define _ASM_X86_SETUP_H +#ifdef __KERNEL__ + #define COMMAND_LINE_SIZE 2048 #ifndef __ASSEMBLY__ -/* Interrupt control for vSMPowered x86_64 systems */ -void vsmp_init(void); - - -void setup_bios_corruption_check(void); - - -#ifdef CONFIG_X86_VISWS -extern void visws_early_detect(void); -extern int is_visws_box(void); -#else -static inline void visws_early_detect(void) { } -static inline int is_visws_box(void) { return 0; } -#endif - -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); -extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); /* * Any setup quirks to be performed? */ @@ -48,16 +33,8 @@ struct x86_quirks { int (*update_genapic)(void); }; -extern struct x86_quirks *x86_quirks; -extern unsigned long saved_video_mode; - -#ifndef CONFIG_PARAVIRT -#define paravirt_post_allocator_init() do {} while (0) -#endif #endif /* __ASSEMBLY__ */ -#ifdef __KERNEL__ - #ifdef __i386__ #include <linux/pfn.h> @@ -78,6 +55,28 @@ extern unsigned long saved_video_mode; #ifndef __ASSEMBLY__ #include <asm/bootparam.h> +/* Interrupt control for vSMPowered x86_64 systems */ +void vsmp_init(void); + +void setup_bios_corruption_check(void); + +#ifdef CONFIG_X86_VISWS +extern void visws_early_detect(void); +extern int is_visws_box(void); +#else +static inline void visws_early_detect(void) { } +static inline int is_visws_box(void) { return 0; } +#endif + +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); +extern struct x86_quirks *x86_quirks; +extern unsigned long saved_video_mode; + +#ifndef CONFIG_PARAVIRT +#define paravirt_post_allocator_init() do {} while (0) +#endif + #ifndef _SETUP /* diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h index 8ab9cc8b2ecc..ca8bf2cd0ba9 100644 --- a/arch/x86/include/asm/socket.h +++ b/arch/x86/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_X86_SOCKET_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1b8afa78e869..82ada75f3ebf 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_CPUID_FEATURE_SHIFT 2 #define SVM_CPUID_FUNC 0x8000000a -#define MSR_EFER_SVME_MASK (1ULL << 12) -#define MSR_VM_CR 0xc0010114 -#define MSR_VM_HSAVE_PA 0xc0010117ULL - #define SVM_VM_CR_SVM_DISABLE 4 #define SVM_SELECTOR_S_SHIFT 4 diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index ffb08be2a530..72a6dcd1299b 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -70,8 +70,6 @@ struct old_utsname; asmlinkage long sys32_olduname(struct oldold_utsname __user *); long sys32_uname(struct old_utsname __user *); -long sys32_ustat(unsigned, struct ustat32 __user *); - asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, compat_uptr_t __user *, struct pt_regs *); asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index c0b0bda754ee..e26d34b0bc79 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -74,6 +74,7 @@ asmlinkage long sys_vfork(struct pt_regs *); asmlinkage long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); +long sys_arch_prctl(int, unsigned long); /* kernel/ioport.c */ asmlinkage long sys_iopl(unsigned int, struct pt_regs *); diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 593636275238..e0f9aa16358b 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void) wrmsrl(MSR_VM_HSAVE_PA, 0); rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); + wrmsrl(MSR_EFER, efer & ~EFER_SVME); } /** Makes sure SVM is disabled, if it is supported on the CPU diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d0238e6151d8..498f944010b9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -270,8 +270,9 @@ enum vmcs_field { #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ -#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ +#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 @@ -311,7 +312,7 @@ enum vmcs_field { #define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ #define TYPE_MOV_TO_DR (0 << 4) #define TYPE_MOV_FROM_DR (1 << 4) -#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ +#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ /* segment AR */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 24ff26a38ade..5fff00c70de0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -4,6 +4,7 @@ #include <linux/string.h> #include <linux/bitops.h> #include <linux/smp.h> +#include <linux/sched.h> #include <linux/thread_info.h> #include <linux/module.h> @@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate - * with P/T states and does not stop in deep C-states + * with P/T states and does not stop in deep C-states. + * + * It is also reliable across cores and sockets. (but not across + * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); + sched_clock_stable = 1; } } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a00545fe5cdd..648b3a2a3a44 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void) */ static int boot_hpet_disable; int hpet_force_user; +static int hpet_verbose; static int __init hpet_setup(char *str) { @@ -88,6 +89,8 @@ static int __init hpet_setup(char *str) boot_hpet_disable = 1; if (!strncmp("force", str, 5)) hpet_force_user = 1; + if (!strncmp("verbose", str, 7)) + hpet_verbose = 1; } return 1; } @@ -119,6 +122,43 @@ int is_hpet_enabled(void) } EXPORT_SYMBOL_GPL(is_hpet_enabled); +static void _hpet_print_config(const char *function, int line) +{ + u32 i, timers, l, h; + printk(KERN_INFO "hpet: %s(%d):\n", function, line); + l = hpet_readl(HPET_ID); + h = hpet_readl(HPET_PERIOD); + timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; + printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); + l = hpet_readl(HPET_CFG); + h = hpet_readl(HPET_STATUS); + printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); + l = hpet_readl(HPET_COUNTER); + h = hpet_readl(HPET_COUNTER+4); + printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + + for (i = 0; i < timers; i++) { + l = hpet_readl(HPET_Tn_CFG(i)); + h = hpet_readl(HPET_Tn_CFG(i)+4); + printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", + i, l, h); + l = hpet_readl(HPET_Tn_CMP(i)); + h = hpet_readl(HPET_Tn_CMP(i)+4); + printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", + i, l, h); + l = hpet_readl(HPET_Tn_ROUTE(i)); + h = hpet_readl(HPET_Tn_ROUTE(i)+4); + printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", + i, l, h); + } +} + +#define hpet_print_config() \ +do { \ + if (hpet_verbose) \ + _hpet_print_config(__FUNCTION__, __LINE__); \ +} while (0) + /* * When the hpet driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. @@ -191,27 +231,37 @@ static struct clock_event_device hpet_clockevent = { .rating = 50, }; -static void hpet_start_counter(void) +static void hpet_stop_counter(void) { unsigned long cfg = hpet_readl(HPET_CFG); - cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); +} + +static void hpet_start_counter(void) +{ + unsigned long cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } +static void hpet_restart_counter(void) +{ + hpet_stop_counter(); + hpet_start_counter(); +} + static void hpet_resume_device(void) { force_hpet_resume(); } -static void hpet_restart_counter(void) +static void hpet_resume_counter(void) { hpet_resume_device(); - hpet_start_counter(); + hpet_restart_counter(); } static void hpet_enable_legacy_int(void) @@ -259,29 +309,23 @@ static int hpet_setup_msi_irq(unsigned int irq); static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { - unsigned long cfg, cmp, now; + unsigned long cfg; uint64_t delta; switch (mode) { case CLOCK_EVT_MODE_PERIODIC: + hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); /* Make sure we use edge triggered interrupts */ cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); - /* - * The first write after writing TN_SETVAL to the - * config register sets the counter value, the second - * write sets the period. - */ - hpet_writel(cmp, HPET_Tn_CMP(timer)); - udelay(1); hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); + hpet_start_counter(); + hpet_print_config(); break; case CLOCK_EVT_MODE_ONESHOT: @@ -308,6 +352,7 @@ static void hpet_set_mode(enum clock_event_mode mode, irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } + hpet_print_config(); break; } } @@ -526,6 +571,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); num_timers++; /* Value read out starts from 0 */ + hpet_print_config(); hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); if (!hpet_devs) @@ -695,7 +741,7 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_restart_counter, + .resume = hpet_resume_counter, #ifdef CONFIG_X86_64 .vread = vread_hpet, #endif @@ -707,7 +753,7 @@ static int hpet_clocksource_register(void) cycle_t t1; /* Start the counter */ - hpet_start_counter(); + hpet_restart_counter(); /* Verify whether hpet counter works */ t1 = read_hpet(); @@ -793,6 +839,7 @@ int __init hpet_enable(void) * information and the number of channels */ id = hpet_readl(HPET_ID); + hpet_print_config(); #ifdef CONFIG_HPET_EMULATE_RTC /* @@ -845,6 +892,7 @@ static __init int hpet_late_init(void) return -ENODEV; hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + hpet_print_config(); for_each_online_cpu(cpu) { hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 309949e9e1c1..697d1b78cfbf 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -172,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, ich_force_enable_hpet); - +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */ + ich_force_enable_hpet); static struct pci_dev *cached_dev; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b8e7aaf7ef75..08afa1579e6d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -17,20 +17,21 @@ #include <asm/delay.h> #include <asm/hypervisor.h> -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); -unsigned int tsc_khz; + +unsigned int __read_mostly tsc_khz; EXPORT_SYMBOL(tsc_khz); /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ -static int tsc_unstable; +static int __read_mostly tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent erroneous rdtsc usage on !cpu_has_tsc processors */ -static int tsc_disabled = -1; +static int __read_mostly tsc_disabled = -1; static int tsc_clocksource_reliable; /* diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b81125f0bdee..0a303c3ed11f 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -4,6 +4,10 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + default y + menuconfig VIRTUALIZATION bool "Virtualization" depends on HAVE_KVM || X86 diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 72bd275a9b5c..c13bb92d3157 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) if (!atomic_inc_and_test(&pt->pending)) set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); + if (!pt->reinject) + atomic_set(&pt->pending, 1); + if (vcpu0 && waitqueue_active(&vcpu0->wq)) wake_up_interruptible(&vcpu0->wq); @@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit) pit->pit_state.irq_ack = 1; } +static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) +{ + struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); + + if (!mask) { + atomic_set(&pit->pit_state.pit_timer.pending, 0); + pit->pit_state.irq_ack = 1; + } +} + struct kvm_pit *kvm_create_pit(struct kvm *kvm) { struct kvm_pit *pit; @@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) if (!pit) return NULL; - mutex_lock(&kvm->lock); pit->irq_source_id = kvm_request_irq_source_id(kvm); - mutex_unlock(&kvm->lock); if (pit->irq_source_id < 0) { kfree(pit); return NULL; @@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); + pit_state->pit_timer.reinject = true; mutex_unlock(&pit->pit_state.lock); kvm_pit_reset(pit); + pit->mask_notifier.func = pit_mask_notifer; + kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); + return pit; } @@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm) struct hrtimer *timer; if (kvm->arch.vpit) { + kvm_unregister_irq_mask_notifier(kvm, 0, + &kvm->arch.vpit->mask_notifier); mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 4178022b97aa..6acbe4b505d5 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -9,6 +9,7 @@ struct kvm_kpit_timer { s64 period; /* unit: ns */ s64 scheduled; atomic_t pending; + bool reinject; }; struct kvm_kpit_channel_state { @@ -45,6 +46,7 @@ struct kvm_pit { struct kvm *kvm; struct kvm_kpit_state pit_state; int irq_source_id; + struct kvm_irq_mask_notifier mask_notifier; }; #define KVM_PIT_BASE_ADDRESS 0x40 diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 179dcb0103fd..1ccb50c74f18 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -32,11 +32,13 @@ #include <linux/kvm_host.h> static void pic_lock(struct kvm_pic *s) + __acquires(&s->lock) { spin_lock(&s->lock); } static void pic_unlock(struct kvm_pic *s) + __releases(&s->lock) { struct kvm *kvm = s->kvm; unsigned acks = s->pending_acks; @@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s) spin_unlock(&s->lock); while (acks) { - kvm_notify_acked_irq(kvm, __ffs(acks)); + kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)), + __ffs(acks)); acks &= acks - 1; } @@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm) /* * set irq level. If an edge is detected, then the IRR is set to 1 */ -static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) +static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) { - int mask; + int mask, ret = 1; mask = 1 << irq; if (s->elcr & mask) /* level triggered */ if (level) { + ret = !(s->irr & mask); s->irr |= mask; s->last_irr |= mask; } else { @@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) } else /* edge triggered */ if (level) { - if ((s->last_irr & mask) == 0) + if ((s->last_irr & mask) == 0) { + ret = !(s->irr & mask); s->irr |= mask; + } s->last_irr |= mask; } else s->last_irr &= ~mask; + + return (s->imr & mask) ? -1 : ret; } /* @@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s) pic_unlock(s); } -void kvm_pic_set_irq(void *opaque, int irq, int level) +int kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; + int ret = -1; pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { - pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); } pic_unlock(s); + + return ret; } /* @@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm) } pic_update_irq(s); pic_unlock(s); - kvm_notify_acked_irq(kvm, irq); + kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq); return intno; } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 82579ee538d0..9f593188129e 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -32,6 +32,8 @@ #include "lapic.h" #define PIC_NUM_PINS 16 +#define SELECT_PIC(irq) \ + ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) struct kvm; struct kvm_vcpu; diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 8e5ee99551f6..ed66e4c078dc 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = { }; #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) -#define NUM_DB_REGS 4 struct kvm_vcpu; @@ -29,18 +28,23 @@ struct vcpu_svm { struct svm_cpu_data *svm_data; uint64_t asid_generation; - unsigned long db_regs[NUM_DB_REGS]; - u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; u64 host_gs_base; unsigned long host_cr2; - unsigned long host_db_regs[NUM_DB_REGS]; - unsigned long host_dr6; - unsigned long host_dr7; u32 *msrpm; + struct vmcb *hsave; + u64 hsave_msr; + + u64 nested_vmcb; + + /* These are the merged vectors */ + u32 *nested_msrpm; + + /* gpa pointers to the real vectors */ + u64 nested_vmcb_msrpm; }; #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2d4477c71473..2a36f7f7c4c7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -145,11 +145,20 @@ struct kvm_rmap_desc { struct kvm_rmap_desc *more; }; -struct kvm_shadow_walk { - int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, - u64 addr, u64 *spte, int level); +struct kvm_shadow_walk_iterator { + u64 addr; + hpa_t shadow_addr; + int level; + u64 *sptep; + unsigned index; }; +#define for_each_shadow_entry(_vcpu, _addr, _walker) \ + for (shadow_walk_init(&(_walker), _vcpu, _addr); \ + shadow_walk_okay(&(_walker)); \ + shadow_walk_next(&(_walker))) + + struct kvm_unsync_walk { int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); }; @@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, BUG_ON(!mc->nobjs); p = mc->objects[--mc->nobjs]; - memset(p, 0, size); return p; } @@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&sp->oos_link); - ASSERT(is_empty_shadow_page(sp->spt)); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; - sp->global = 1; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; return sp; @@ -983,8 +989,8 @@ struct kvm_mmu_pages { idx < 512; \ idx = find_next_bit(bitmap, 512, idx+1)) -int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, - int idx) +static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, + int idx) { int i; @@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry(sp, node, bucket, hash_link) - if (sp->gfn == gfn && !sp->role.metaphysical + if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) { pgprintk("%s: found role %x\n", __func__, sp->role.word); @@ -1115,8 +1121,9 @@ struct mmu_page_path { i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ i = mmu_pages_next(&pvec, &parents, i)) -int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, - int i) +static int mmu_pages_next(struct kvm_mmu_pages *pvec, + struct mmu_page_path *parents, + int i) { int n; @@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, return n; } -void mmu_pages_clear_parents(struct mmu_page_path *parents) +static void mmu_pages_clear_parents(struct mmu_page_path *parents) { struct kvm_mmu_page *sp; unsigned int level = 0; @@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gaddr, unsigned level, - int metaphysical, + int direct, unsigned access, u64 *parent_pte) { @@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp; struct hlist_node *node, *tmp; - role.word = 0; - role.glevels = vcpu->arch.mmu.root_level; + role = vcpu->arch.mmu.base_role; role.level = level; - role.metaphysical = metaphysical; + role.direct = direct; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); sp->gfn = gfn; sp->role = role; + sp->global = role.cr4_pge; hlist_add_head(&sp->hash_link, bucket); - if (!metaphysical) { + if (!direct) { if (rmap_write_protect(vcpu->kvm, gfn)) kvm_flush_remote_tlbs(vcpu->kvm); account_shadowed(vcpu->kvm, gfn); @@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, return sp; } -static int walk_shadow(struct kvm_shadow_walk *walker, - struct kvm_vcpu *vcpu, u64 addr) +static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, + struct kvm_vcpu *vcpu, u64 addr) { - hpa_t shadow_addr; - int level; - int r; - u64 *sptep; - unsigned index; - - shadow_addr = vcpu->arch.mmu.root_hpa; - level = vcpu->arch.mmu.shadow_root_level; - if (level == PT32E_ROOT_LEVEL) { - shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; - shadow_addr &= PT64_BASE_ADDR_MASK; - if (!shadow_addr) - return 1; - --level; + iterator->addr = addr; + iterator->shadow_addr = vcpu->arch.mmu.root_hpa; + iterator->level = vcpu->arch.mmu.shadow_root_level; + if (iterator->level == PT32E_ROOT_LEVEL) { + iterator->shadow_addr + = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; + iterator->shadow_addr &= PT64_BASE_ADDR_MASK; + --iterator->level; + if (!iterator->shadow_addr) + iterator->level = 0; } +} - while (level >= PT_PAGE_TABLE_LEVEL) { - index = SHADOW_PT_INDEX(addr, level); - sptep = ((u64 *)__va(shadow_addr)) + index; - r = walker->entry(walker, vcpu, addr, sptep, level); - if (r) - return r; - shadow_addr = *sptep & PT64_BASE_ADDR_MASK; - --level; - } - return 0; +static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) +{ + if (iterator->level < PT_PAGE_TABLE_LEVEL) + return false; + iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); + iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; + return true; +} + +static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) +{ + iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK; + --iterator->level; } static void kvm_mmu_page_unlink_children(struct kvm *kvm, @@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_page_unlink_children(kvm, sp); kvm_mmu_unlink_parents(kvm, sp); kvm_flush_remote_tlbs(kvm); - if (!sp->role.invalid && !sp->role.metaphysical) + if (!sp->role.invalid && !sp->role.direct) unaccount_shadowed(kvm, sp->gfn); if (sp->unsync) kvm_unlink_unsync_page(kvm, sp); @@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) - if (sp->gfn == gfn && !sp->role.metaphysical) { + if (sp->gfn == gfn && !sp->role.direct) { pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); r = 1; @@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) { + unsigned index; + struct hlist_head *bucket; struct kvm_mmu_page *sp; + struct hlist_node *node, *nn; - while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { - pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); - kvm_mmu_zap_page(kvm, sp); + index = kvm_page_table_hashfn(gfn); + bucket = &kvm->arch.mmu_page_hash[index]; + hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { + if (sp->gfn == gfn && !sp->role.direct + && !sp->role.invalid) { + pgprintk("%s: zap %lx %x\n", + __func__, gfn, sp->role.word); + kvm_mmu_zap_page(kvm, sp); + } } } @@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) bucket = &vcpu->kvm->arch.mmu_page_hash[index]; /* don't unsync if pagetable is shadowed with multiple roles */ hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { - if (s->gfn != sp->gfn || s->role.metaphysical) + if (s->gfn != sp->gfn || s->role.direct) continue; if (s->role.word != sp->role.word) return 1; @@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, u64 mt_mask = shadow_mt_mask; struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); - if (!(vcpu->arch.cr4 & X86_CR4_PGE)) - global = 0; if (!global && sp->global) { sp->global = 0; if (sp->unsync) { @@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*shadow_pte), pfn); rmap_remove(vcpu->kvm, shadow_pte); - } else { - if (largepage) - was_rmapped = is_large_pte(*shadow_pte); - else - was_rmapped = 1; - } + } else + was_rmapped = 1; } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, dirty, largepage, global, gfn, pfn, speculative, true)) { @@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } -struct direct_shadow_walk { - struct kvm_shadow_walk walker; - pfn_t pfn; - int write; - int largepage; - int pt_write; -}; - -static int direct_map_entry(struct kvm_shadow_walk *_walk, - struct kvm_vcpu *vcpu, - u64 addr, u64 *sptep, int level) +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, + int largepage, gfn_t gfn, pfn_t pfn) { - struct direct_shadow_walk *walk = - container_of(_walk, struct direct_shadow_walk, walker); + struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; + int pt_write = 0; gfn_t pseudo_gfn; - gfn_t gfn = addr >> PAGE_SHIFT; - - if (level == PT_PAGE_TABLE_LEVEL - || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { - mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, - 0, walk->write, 1, &walk->pt_write, - walk->largepage, 0, gfn, walk->pfn, false); - ++vcpu->stat.pf_fixed; - return 1; - } - if (*sptep == shadow_trap_nonpresent_pte) { - pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; - sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, - 1, ACC_ALL, sptep); - if (!sp) { - pgprintk("nonpaging_map: ENOMEM\n"); - kvm_release_pfn_clean(walk->pfn); - return -ENOMEM; + for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { + if (iterator.level == PT_PAGE_TABLE_LEVEL + || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { + mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, + 0, write, 1, &pt_write, + largepage, 0, gfn, pfn, false); + ++vcpu->stat.pf_fixed; + break; } - set_shadow_pte(sptep, - __pa(sp->spt) - | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask); - } - return 0; -} + if (*iterator.sptep == shadow_trap_nonpresent_pte) { + pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; + sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, + iterator.level - 1, + 1, ACC_ALL, iterator.sptep); + if (!sp) { + pgprintk("nonpaging_map: ENOMEM\n"); + kvm_release_pfn_clean(pfn); + return -ENOMEM; + } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, - int largepage, gfn_t gfn, pfn_t pfn) -{ - int r; - struct direct_shadow_walk walker = { - .walker = { .entry = direct_map_entry, }, - .pfn = pfn, - .largepage = largepage, - .write = write, - .pt_write = 0, - }; - - r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT); - if (r < 0) - return r; - return walker.pt_write; + set_shadow_pte(iterator.sptep, + __pa(sp->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask); + } + } + return pt_write; } static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) @@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) int i; gfn_t root_gfn; struct kvm_mmu_page *sp; - int metaphysical = 0; + int direct = 0; root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; @@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); if (tdp_enabled) - metaphysical = 1; + direct = 1; sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, metaphysical, + PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; vcpu->arch.mmu.root_hpa = root; return; } - metaphysical = !is_paging(vcpu); + direct = !is_paging(vcpu); if (tdp_enabled) - metaphysical = 1; + direct = 1; for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) } else if (vcpu->arch.mmu.root_level == 0) root_gfn = 0; sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, - PT32_ROOT_LEVEL, metaphysical, + PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; @@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) static int init_kvm_softmmu(struct kvm_vcpu *vcpu) { + int r; + ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); if (!is_paging(vcpu)) - return nonpaging_init_context(vcpu); + r = nonpaging_init_context(vcpu); else if (is_long_mode(vcpu)) - return paging64_init_context(vcpu); + r = paging64_init_context(vcpu); else if (is_pae(vcpu)) - return paging32E_init_context(vcpu); + r = paging32E_init_context(vcpu); else - return paging32_init_context(vcpu); + r = paging32_init_context(vcpu); + + vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; + + return r; } static int init_kvm_mmu(struct kvm_vcpu *vcpu) @@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { - if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) + if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) continue; pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); @@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) gfn_t gfn; list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { - if (sp->role.metaphysical) + if (sp->role.direct) continue; gfn = unalias_gfn(vcpu->kvm, sp->gfn); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 258e5d56298e..eaab2145f62b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) static inline int is_long_mode(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - return vcpu->arch.shadow_efer & EFER_LME; + return vcpu->arch.shadow_efer & EFER_LMA; #else return 0; #endif diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9fd78b6e17ad..6bd70206c561 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -25,7 +25,6 @@ #if PTTYPE == 64 #define pt_element_t u64 #define guest_walker guest_walker64 - #define shadow_walker shadow_walker64 #define FNAME(name) paging##64_##name #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK @@ -42,7 +41,6 @@ #elif PTTYPE == 32 #define pt_element_t u32 #define guest_walker guest_walker32 - #define shadow_walker shadow_walker32 #define FNAME(name) paging##32_##name #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK @@ -73,18 +71,6 @@ struct guest_walker { u32 error_code; }; -struct shadow_walker { - struct kvm_shadow_walk walker; - struct guest_walker *guest_walker; - int user_fault; - int write_fault; - int largepage; - int *ptwrite; - pfn_t pfn; - u64 *sptep; - gpa_t pte_gpa; -}; - static gfn_t gpte_to_gfn(pt_element_t gpte) { return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, /* * Fetch a shadow pte for a specific level in the paging hierarchy. */ -static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, - struct kvm_vcpu *vcpu, u64 addr, - u64 *sptep, int level) +static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + struct guest_walker *gw, + int user_fault, int write_fault, int largepage, + int *ptwrite, pfn_t pfn) { - struct shadow_walker *sw = - container_of(_sw, struct shadow_walker, walker); - struct guest_walker *gw = sw->guest_walker; unsigned access = gw->pt_access; struct kvm_mmu_page *shadow_page; - u64 spte; - int metaphysical; + u64 spte, *sptep; + int direct; gfn_t table_gfn; int r; + int level; pt_element_t curr_pte; + struct kvm_shadow_walk_iterator iterator; - if (level == PT_PAGE_TABLE_LEVEL - || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { - mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, - sw->user_fault, sw->write_fault, - gw->ptes[gw->level-1] & PT_DIRTY_MASK, - sw->ptwrite, sw->largepage, - gw->ptes[gw->level-1] & PT_GLOBAL_MASK, - gw->gfn, sw->pfn, false); - sw->sptep = sptep; - return 1; - } + if (!is_present_pte(gw->ptes[gw->level - 1])) + return NULL; - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) - return 0; + for_each_shadow_entry(vcpu, addr, iterator) { + level = iterator.level; + sptep = iterator.sptep; + if (level == PT_PAGE_TABLE_LEVEL + || (largepage && level == PT_DIRECTORY_LEVEL)) { + mmu_set_spte(vcpu, sptep, access, + gw->pte_access & access, + user_fault, write_fault, + gw->ptes[gw->level-1] & PT_DIRTY_MASK, + ptwrite, largepage, + gw->ptes[gw->level-1] & PT_GLOBAL_MASK, + gw->gfn, pfn, false); + break; + } - if (is_large_pte(*sptep)) { - set_shadow_pte(sptep, shadow_trap_nonpresent_pte); - kvm_flush_remote_tlbs(vcpu->kvm); - rmap_remove(vcpu->kvm, sptep); - } + if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) + continue; - if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { - metaphysical = 1; - if (!is_dirty_pte(gw->ptes[level - 1])) - access &= ~ACC_WRITE_MASK; - table_gfn = gpte_to_gfn(gw->ptes[level - 1]); - } else { - metaphysical = 0; - table_gfn = gw->table_gfn[level - 2]; - } - shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1, - metaphysical, access, sptep); - if (!metaphysical) { - r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], - &curr_pte, sizeof(curr_pte)); - if (r || curr_pte != gw->ptes[level - 2]) { - kvm_mmu_put_page(shadow_page, sptep); - kvm_release_pfn_clean(sw->pfn); - sw->sptep = NULL; - return 1; + if (is_large_pte(*sptep)) { + rmap_remove(vcpu->kvm, sptep); + set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } - } - spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; - *sptep = spte; - return 0; -} - -static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, - struct guest_walker *guest_walker, - int user_fault, int write_fault, int largepage, - int *ptwrite, pfn_t pfn) -{ - struct shadow_walker walker = { - .walker = { .entry = FNAME(shadow_walk_entry), }, - .guest_walker = guest_walker, - .user_fault = user_fault, - .write_fault = write_fault, - .largepage = largepage, - .ptwrite = ptwrite, - .pfn = pfn, - }; - - if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) - return NULL; + if (level == PT_DIRECTORY_LEVEL + && gw->level == PT_DIRECTORY_LEVEL) { + direct = 1; + if (!is_dirty_pte(gw->ptes[level - 1])) + access &= ~ACC_WRITE_MASK; + table_gfn = gpte_to_gfn(gw->ptes[level - 1]); + } else { + direct = 0; + table_gfn = gw->table_gfn[level - 2]; + } + shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, + direct, access, sptep); + if (!direct) { + r = kvm_read_guest_atomic(vcpu->kvm, + gw->pte_gpa[level - 2], + &curr_pte, sizeof(curr_pte)); + if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); + kvm_release_pfn_clean(pfn); + sptep = NULL; + break; + } + } - walk_shadow(&walker.walker, vcpu, addr); + spte = __pa(shadow_page->spt) + | PT_PRESENT_MASK | PT_ACCESSED_MASK + | PT_WRITABLE_MASK | PT_USER_MASK; + *sptep = spte; + } - return walker.sptep; + return sptep; } /* @@ -465,54 +439,56 @@ out_unlock: return 0; } -static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, - struct kvm_vcpu *vcpu, u64 addr, - u64 *sptep, int level) +static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { - struct shadow_walker *sw = - container_of(_sw, struct shadow_walker, walker); - - /* FIXME: properly handle invlpg on large guest pages */ - if (level == PT_PAGE_TABLE_LEVEL || - ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); + struct kvm_shadow_walk_iterator iterator; + pt_element_t gpte; + gpa_t pte_gpa = -1; + int level; + u64 *sptep; + int need_flush = 0; - sw->pte_gpa = (sp->gfn << PAGE_SHIFT); - sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + spin_lock(&vcpu->kvm->mmu_lock); - if (is_shadow_present_pte(*sptep)) { - rmap_remove(vcpu->kvm, sptep); - if (is_large_pte(*sptep)) - --vcpu->kvm->stat.lpages; + for_each_shadow_entry(vcpu, gva, iterator) { + level = iterator.level; + sptep = iterator.sptep; + + /* FIXME: properly handle invlpg on large guest pages */ + if (level == PT_PAGE_TABLE_LEVEL || + ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + pte_gpa = (sp->gfn << PAGE_SHIFT); + pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + + if (is_shadow_present_pte(*sptep)) { + rmap_remove(vcpu->kvm, sptep); + if (is_large_pte(*sptep)) + --vcpu->kvm->stat.lpages; + need_flush = 1; + } + set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + break; } - set_shadow_pte(sptep, shadow_trap_nonpresent_pte); - return 1; - } - if (!is_shadow_present_pte(*sptep)) - return 1; - return 0; -} -static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) -{ - pt_element_t gpte; - struct shadow_walker walker = { - .walker = { .entry = FNAME(shadow_invlpg_entry), }, - .pte_gpa = -1, - }; + if (!is_shadow_present_pte(*sptep)) + break; + } - spin_lock(&vcpu->kvm->mmu_lock); - walk_shadow(&walker.walker, vcpu, gva); + if (need_flush) + kvm_flush_remote_tlbs(vcpu->kvm); spin_unlock(&vcpu->kvm->mmu_lock); - if (walker.pte_gpa == -1) + + if (pte_gpa == -1) return; - if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, + if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, sizeof(pt_element_t))) return; if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { if (mmu_topup_memory_caches(vcpu)) return; - kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, + kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, sizeof(pt_element_t), 0); } } @@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, pt_element_t pt[256 / sizeof(pt_element_t)]; gpa_t pte_gpa; - if (sp->role.metaphysical + if (sp->role.direct || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { nonpaging_prefetch_page(vcpu, sp); return; @@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) #undef pt_element_t #undef guest_walker -#undef shadow_walker #undef FNAME #undef PT_BASE_ADDR_MASK #undef PT_INDEX diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9e769e4e251..1821c2078199 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -38,9 +38,6 @@ MODULE_LICENSE("GPL"); #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 -#define DR7_GD_MASK (1 << 13) -#define DR6_BD_MASK (1 << 13) - #define SEG_TYPE_LDT 2 #define SEG_TYPE_BUSY_TSS16 3 @@ -50,6 +47,15 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +/* Turn on to get debugging output*/ +/* #define NESTED_DEBUG */ + +#ifdef NESTED_DEBUG +#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) +#else +#define nsvm_printk(fmt, args...) do {} while(0) +#endif + /* enable NPT for AMD64 and X86 with PAE */ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) static bool npt_enabled = true; @@ -60,14 +66,29 @@ static int npt = 1; module_param(npt, int, S_IRUGO); +static int nested = 0; +module_param(nested, int, S_IRUGO); + static void kvm_reput_irq(struct vcpu_svm *svm); static void svm_flush_tlb(struct kvm_vcpu *vcpu); +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); +static int nested_svm_vmexit(struct vcpu_svm *svm); +static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb, + void *arg2, void *opaque); +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + bool has_error_code, u32 error_code); + static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_svm, vcpu); } +static inline bool is_nested(struct vcpu_svm *svm) +{ + return svm->nested_vmcb; +} + static unsigned long iopm_base; struct kvm_ldttss_desc { @@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val) asm volatile ("mov %0, %%cr2" :: "r" (val)); } -static inline unsigned long read_dr6(void) -{ - unsigned long dr6; - - asm volatile ("mov %%dr6, %0" : "=r" (dr6)); - return dr6; -} - -static inline void write_dr6(unsigned long val) -{ - asm volatile ("mov %0, %%dr6" :: "r" (val)); -} - -static inline unsigned long read_dr7(void) -{ - unsigned long dr7; - - asm volatile ("mov %%dr7, %0" : "=r" (dr7)); - return dr7; -} - -static inline void write_dr7(unsigned long val) -{ - asm volatile ("mov %0, %%dr7" :: "r" (val)); -} - static inline void force_new_asid(struct kvm_vcpu *vcpu) { to_svm(vcpu)->asid_generation--; @@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!npt_enabled && !(efer & EFER_LMA)) efer &= ~EFER_LME; - to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; + to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; vcpu->arch.shadow_efer = efer; } @@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, { struct vcpu_svm *svm = to_svm(vcpu); + /* If we are within a nested VM we'd better #VMEXIT and let the + guest handle the exception */ + if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) + return; + svm->vmcb->control.event_inj = nr | SVM_EVTINJ_VALID | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) @@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, svm->next_rip); svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; - vcpu->arch.interrupt_window_open = 1; + vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK); } static int has_svm(void) @@ -250,7 +250,7 @@ static int has_svm(void) const char *msg; if (!cpu_has_svm(&msg)) { - printk(KERN_INFO "has_svn: %s\n", msg); + printk(KERN_INFO "has_svm: %s\n", msg); return 0; } @@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage) svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK); + wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); @@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); + if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) + kvm_enable_efer_bits(EFER_FFXSR); + + if (nested) { + printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); + kvm_enable_efer_bits(EFER_SVME); + } + for_each_online_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - save->efer = MSR_EFER_SVME_MASK; + save->efer = EFER_SVME; save->dr6 = 0xffff0ff0; save->dr7 = 0x400; save->rflags = 2; @@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm) save->cr4 = 0; } force_new_asid(&svm->vcpu); + + svm->nested_vmcb = 0; + svm->vcpu.arch.hflags = HF_GIF_MASK; } static int svm_vcpu_reset(struct kvm_vcpu *vcpu) @@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct vcpu_svm *svm; struct page *page; struct page *msrpm_pages; + struct page *hsave_page; + struct page *nested_msrpm_pages; int err; svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); @@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) goto uninit; + + nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!nested_msrpm_pages) + goto uninit; + svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); + hsave_page = alloc_page(GFP_KERNEL); + if (!hsave_page) + goto uninit; + svm->hsave = page_address(hsave_page); + + svm->nested_msrpm = page_address(nested_msrpm_pages); + svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; - memset(svm->db_regs, 0, sizeof(svm->db_regs)); init_vmcb(svm); fx_init(&svm->vcpu); @@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); + __free_page(virt_to_page(svm->hsave)); + __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } +static void svm_set_vintr(struct vcpu_svm *svm) +{ + svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR; +} + +static void svm_clear_vintr(struct vcpu_svm *svm) +{ + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); +} + static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) { struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; @@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; - /* - * SVM always stores 0 for the 'G' bit in the CS selector in - * the VMCB on a VMEXIT. This hurts cross-vendor migration: - * Intel's VMENTRY has a check on the 'G' bit. - */ - if (seg == VCPU_SREG_CS) + switch (seg) { + case VCPU_SREG_CS: + /* + * SVM always stores 0 for the 'G' bit in the CS selector in + * the VMCB on a VMEXIT. This hurts cross-vendor migration: + * Intel's VMENTRY has a check on the 'G' bit. + */ var->g = s->limit > 0xfffff; - - /* - * Work around a bug where the busy flag in the tr selector - * isn't exposed - */ - if (seg == VCPU_SREG_TR) + break; + case VCPU_SREG_TR: + /* + * Work around a bug where the busy flag in the tr selector + * isn't exposed + */ var->type |= 0x2; + break; + case VCPU_SREG_DS: + case VCPU_SREG_ES: + case VCPU_SREG_FS: + case VCPU_SREG_GS: + /* + * The accessed bit must always be set in the segment + * descriptor cache, although it can be cleared in the + * descriptor, the cached bit always remains at 1. Since + * Intel has a check on this, set it here to support + * cross-vendor migration. + */ + if (!var->unusable) + var->type |= 0x1; + break; + } var->unusable = !var->present; } @@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EOPNOTSUPP; + int old_debug = vcpu->guest_debug; + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->guest_debug = dbg->control; + + svm->vmcb->control.intercept_exceptions &= + ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + svm->vmcb->control.intercept_exceptions |= + 1 << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + svm->vmcb->control.intercept_exceptions |= + 1 << BP_VECTOR; + } else + vcpu->guest_debug = 0; + + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; + else + svm->vmcb->save.dr7 = vcpu->arch.dr7; + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) + svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + + return 0; } static int svm_get_irq(struct kvm_vcpu *vcpu) @@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) { - unsigned long val = to_svm(vcpu)->db_regs[dr]; + struct vcpu_svm *svm = to_svm(vcpu); + unsigned long val; + + switch (dr) { + case 0 ... 3: + val = vcpu->arch.db[dr]; + break; + case 6: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + val = vcpu->arch.dr6; + else + val = svm->vmcb->save.dr6; + break; + case 7: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + val = vcpu->arch.dr7; + else + val = svm->vmcb->save.dr7; + break; + default: + val = 0; + } + KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); return val; } @@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, { struct vcpu_svm *svm = to_svm(vcpu); - *exception = 0; + KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler); - if (svm->vmcb->save.dr7 & DR7_GD_MASK) { - svm->vmcb->save.dr7 &= ~DR7_GD_MASK; - svm->vmcb->save.dr6 |= DR6_BD_MASK; - *exception = DB_VECTOR; - return; - } + *exception = 0; switch (dr) { case 0 ... 3: - svm->db_regs[dr] = value; + vcpu->arch.db[dr] = value; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = value; return; case 4 ... 5: - if (vcpu->arch.cr4 & X86_CR4_DE) { + if (vcpu->arch.cr4 & X86_CR4_DE) *exception = UD_VECTOR; + return; + case 6: + if (value & 0xffffffff00000000ULL) { + *exception = GP_VECTOR; return; } - case 7: { - if (value & ~((1ULL << 32) - 1)) { + vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; + return; + case 7: + if (value & 0xffffffff00000000ULL) { *exception = GP_VECTOR; return; } - svm->vmcb->save.dr7 = value; + vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + svm->vmcb->save.dr7 = vcpu->arch.dr7; + vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); + } return; - } default: + /* FIXME: Possible case? */ printk(KERN_DEBUG "%s: unexpected dr %u\n", __func__, dr); *exception = UD_VECTOR; @@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } +static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (!(svm->vcpu.guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + kvm_queue_exception(&svm->vcpu, DB_VECTOR); + return 1; + } + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = DB_VECTOR; + return 0; +} + +static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = BP_VECTOR; + return 0; +} + static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { int er; @@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, down, in, string, rep; + int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; @@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) in = (io_info & SVM_IOIO_TYPE_MASK) != 0; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - rep = (io_info & SVM_IOIO_REP_MASK) != 0; - down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; skip_emulated_instruction(&svm->vcpu); return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); @@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int nested_svm_check_permissions(struct vcpu_svm *svm) +{ + if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) + || !is_paging(&svm->vcpu)) { + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + + if (svm->vmcb->save.cpl) { + kvm_inject_gp(&svm->vcpu, 0); + return 1; + } + + return 0; +} + +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + bool has_error_code, u32 error_code) +{ + if (is_nested(svm)) { + svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = error_code; + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + if (nested_svm_exit_handled(svm, false)) { + nsvm_printk("VMexit -> EXCP 0x%x\n", nr); + + nested_svm_vmexit(svm); + return 1; + } + } + + return 0; +} + +static inline int nested_svm_intr(struct vcpu_svm *svm) +{ + if (is_nested(svm)) { + if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) + return 0; + + if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) + return 0; + + svm->vmcb->control.exit_code = SVM_EXIT_INTR; + + if (nested_svm_exit_handled(svm, false)) { + nsvm_printk("VMexit -> INTR\n"); + nested_svm_vmexit(svm); + return 1; + } + } + + return 0; +} + +static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa) +{ + struct page *page; + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + if (is_error_page(page)) { + printk(KERN_INFO "%s: could not find page at 0x%llx\n", + __func__, gpa); + kvm_release_page_clean(page); + kvm_inject_gp(&svm->vcpu, 0); + return NULL; + } + return page; +} + +static int nested_svm_do(struct vcpu_svm *svm, + u64 arg1_gpa, u64 arg2_gpa, void *opaque, + int (*handler)(struct vcpu_svm *svm, + void *arg1, + void *arg2, + void *opaque)) +{ + struct page *arg1_page; + struct page *arg2_page = NULL; + void *arg1; + void *arg2 = NULL; + int retval; + + arg1_page = nested_svm_get_page(svm, arg1_gpa); + if(arg1_page == NULL) + return 1; + + if (arg2_gpa) { + arg2_page = nested_svm_get_page(svm, arg2_gpa); + if(arg2_page == NULL) { + kvm_release_page_clean(arg1_page); + return 1; + } + } + + arg1 = kmap_atomic(arg1_page, KM_USER0); + if (arg2_gpa) + arg2 = kmap_atomic(arg2_page, KM_USER1); + + retval = handler(svm, arg1, arg2, opaque); + + kunmap_atomic(arg1, KM_USER0); + if (arg2_gpa) + kunmap_atomic(arg2, KM_USER1); + + kvm_release_page_dirty(arg1_page); + if (arg2_gpa) + kvm_release_page_dirty(arg2_page); + + return retval; +} + +static int nested_svm_exit_handled_real(struct vcpu_svm *svm, + void *arg1, + void *arg2, + void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + bool kvm_overrides = *(bool *)opaque; + u32 exit_code = svm->vmcb->control.exit_code; + + if (kvm_overrides) { + switch (exit_code) { + case SVM_EXIT_INTR: + case SVM_EXIT_NMI: + return 0; + /* For now we are always handling NPFs when using them */ + case SVM_EXIT_NPF: + if (npt_enabled) + return 0; + break; + /* When we're shadowing, trap PFs */ + case SVM_EXIT_EXCP_BASE + PF_VECTOR: + if (!npt_enabled) + return 0; + break; + default: + break; + } + } + + switch (exit_code) { + case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { + u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); + if (nested_vmcb->control.intercept_cr_read & cr_bits) + return 1; + break; + } + case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { + u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); + if (nested_vmcb->control.intercept_cr_write & cr_bits) + return 1; + break; + } + case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { + u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); + if (nested_vmcb->control.intercept_dr_read & dr_bits) + return 1; + break; + } + case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { + u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); + if (nested_vmcb->control.intercept_dr_write & dr_bits) + return 1; + break; + } + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { + u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); + if (nested_vmcb->control.intercept_exceptions & excp_bits) + return 1; + break; + } + default: { + u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); + nsvm_printk("exit code: 0x%x\n", exit_code); + if (nested_vmcb->control.intercept & exit_bits) + return 1; + } + } + + return 0; +} + +static int nested_svm_exit_handled_msr(struct vcpu_svm *svm, + void *arg1, void *arg2, + void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + u8 *msrpm = (u8 *)arg2; + u32 t0, t1; + u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + u32 param = svm->vmcb->control.exit_info_1 & 1; + + if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT))) + return 0; + + switch(msr) { + case 0 ... 0x1fff: + t0 = (msr * 2) % 8; + t1 = msr / 8; + break; + case 0xc0000000 ... 0xc0001fff: + t0 = (8192 + msr - 0xc0000000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + case 0xc0010000 ... 0xc0011fff: + t0 = (16384 + msr - 0xc0010000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + default: + return 1; + break; + } + if (msrpm[t1] & ((1 << param) << t0)) + return 1; + + return 0; +} + +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override) +{ + bool k = kvm_override; + + switch (svm->vmcb->control.exit_code) { + case SVM_EXIT_MSR: + return nested_svm_do(svm, svm->nested_vmcb, + svm->nested_vmcb_msrpm, NULL, + nested_svm_exit_handled_msr); + default: break; + } + + return nested_svm_do(svm, svm->nested_vmcb, 0, &k, + nested_svm_exit_handled_real); +} + +static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + struct vmcb *hsave = svm->hsave; + u64 nested_save[] = { nested_vmcb->save.cr0, + nested_vmcb->save.cr3, + nested_vmcb->save.cr4, + nested_vmcb->save.efer, + nested_vmcb->control.intercept_cr_read, + nested_vmcb->control.intercept_cr_write, + nested_vmcb->control.intercept_dr_read, + nested_vmcb->control.intercept_dr_write, + nested_vmcb->control.intercept_exceptions, + nested_vmcb->control.intercept, + nested_vmcb->control.msrpm_base_pa, + nested_vmcb->control.iopm_base_pa, + nested_vmcb->control.tsc_offset }; + + /* Give the current vmcb to the guest */ + memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb)); + nested_vmcb->save.cr0 = nested_save[0]; + if (!npt_enabled) + nested_vmcb->save.cr3 = nested_save[1]; + nested_vmcb->save.cr4 = nested_save[2]; + nested_vmcb->save.efer = nested_save[3]; + nested_vmcb->control.intercept_cr_read = nested_save[4]; + nested_vmcb->control.intercept_cr_write = nested_save[5]; + nested_vmcb->control.intercept_dr_read = nested_save[6]; + nested_vmcb->control.intercept_dr_write = nested_save[7]; + nested_vmcb->control.intercept_exceptions = nested_save[8]; + nested_vmcb->control.intercept = nested_save[9]; + nested_vmcb->control.msrpm_base_pa = nested_save[10]; + nested_vmcb->control.iopm_base_pa = nested_save[11]; + nested_vmcb->control.tsc_offset = nested_save[12]; + + /* We always set V_INTR_MASKING and remember the old value in hflags */ + if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) + nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; + + if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) && + (nested_vmcb->control.int_vector)) { + nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n", + nested_vmcb->control.int_vector); + } + + /* Restore the original control entries */ + svm->vmcb->control = hsave->control; + + /* Kill any pending exceptions */ + if (svm->vcpu.arch.exception.pending == true) + nsvm_printk("WARNING: Pending Exception\n"); + svm->vcpu.arch.exception.pending = false; + + /* Restore selected save entries */ + svm->vmcb->save.es = hsave->save.es; + svm->vmcb->save.cs = hsave->save.cs; + svm->vmcb->save.ss = hsave->save.ss; + svm->vmcb->save.ds = hsave->save.ds; + svm->vmcb->save.gdtr = hsave->save.gdtr; + svm->vmcb->save.idtr = hsave->save.idtr; + svm->vmcb->save.rflags = hsave->save.rflags; + svm_set_efer(&svm->vcpu, hsave->save.efer); + svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); + svm_set_cr4(&svm->vcpu, hsave->save.cr4); + if (npt_enabled) { + svm->vmcb->save.cr3 = hsave->save.cr3; + svm->vcpu.arch.cr3 = hsave->save.cr3; + } else { + kvm_set_cr3(&svm->vcpu, hsave->save.cr3); + } + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); + kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); + kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); + svm->vmcb->save.dr7 = 0; + svm->vmcb->save.cpl = 0; + svm->vmcb->control.exit_int_info = 0; + + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + /* Exit nested SVM mode */ + svm->nested_vmcb = 0; + + return 0; +} + +static int nested_svm_vmexit(struct vcpu_svm *svm) +{ + nsvm_printk("VMexit\n"); + if (nested_svm_do(svm, svm->nested_vmcb, 0, + NULL, nested_svm_vmexit_real)) + return 1; + + kvm_mmu_reset_context(&svm->vcpu); + kvm_mmu_load(&svm->vcpu); + + return 0; +} + +static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + int i; + u32 *nested_msrpm = (u32*)arg1; + for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) + svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; + svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm); + + return 0; +} + +static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + struct vmcb *hsave = svm->hsave; + + /* nested_vmcb is our indicator if nested SVM is activated */ + svm->nested_vmcb = svm->vmcb->save.rax; + + /* Clear internal status */ + svm->vcpu.arch.exception.pending = false; + + /* Save the old vmcb, so we don't need to pick what we save, but + can restore everything when a VMEXIT occurs */ + memcpy(hsave, svm->vmcb, sizeof(struct vmcb)); + /* We need to remember the original CR3 in the SPT case */ + if (!npt_enabled) + hsave->save.cr3 = svm->vcpu.arch.cr3; + hsave->save.cr4 = svm->vcpu.arch.cr4; + hsave->save.rip = svm->next_rip; + + if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + svm->vcpu.arch.hflags |= HF_HIF_MASK; + else + svm->vcpu.arch.hflags &= ~HF_HIF_MASK; + + /* Load the nested guest state */ + svm->vmcb->save.es = nested_vmcb->save.es; + svm->vmcb->save.cs = nested_vmcb->save.cs; + svm->vmcb->save.ss = nested_vmcb->save.ss; + svm->vmcb->save.ds = nested_vmcb->save.ds; + svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; + svm->vmcb->save.idtr = nested_vmcb->save.idtr; + svm->vmcb->save.rflags = nested_vmcb->save.rflags; + svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); + svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); + svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); + if (npt_enabled) { + svm->vmcb->save.cr3 = nested_vmcb->save.cr3; + svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; + } else { + kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); + kvm_mmu_reset_context(&svm->vcpu); + } + svm->vmcb->save.cr2 = nested_vmcb->save.cr2; + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); + kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); + kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); + /* In case we don't even reach vcpu_run, the fields are not updated */ + svm->vmcb->save.rax = nested_vmcb->save.rax; + svm->vmcb->save.rsp = nested_vmcb->save.rsp; + svm->vmcb->save.rip = nested_vmcb->save.rip; + svm->vmcb->save.dr7 = nested_vmcb->save.dr7; + svm->vmcb->save.dr6 = nested_vmcb->save.dr6; + svm->vmcb->save.cpl = nested_vmcb->save.cpl; + + /* We don't want a nested guest to be more powerful than the guest, + so all intercepts are ORed */ + svm->vmcb->control.intercept_cr_read |= + nested_vmcb->control.intercept_cr_read; + svm->vmcb->control.intercept_cr_write |= + nested_vmcb->control.intercept_cr_write; + svm->vmcb->control.intercept_dr_read |= + nested_vmcb->control.intercept_dr_read; + svm->vmcb->control.intercept_dr_write |= + nested_vmcb->control.intercept_dr_write; + svm->vmcb->control.intercept_exceptions |= + nested_vmcb->control.intercept_exceptions; + + svm->vmcb->control.intercept |= nested_vmcb->control.intercept; + + svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; + + force_new_asid(&svm->vcpu); + svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; + svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; + svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; + if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { + nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", + nested_vmcb->control.int_ctl); + } + if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) + svm->vcpu.arch.hflags |= HF_VINTR_MASK; + else + svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + + nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", + nested_vmcb->control.exit_int_info, + nested_vmcb->control.int_state); + + svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; + svm->vmcb->control.int_state = nested_vmcb->control.int_state; + svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; + if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) + nsvm_printk("Injecting Event: 0x%x\n", + nested_vmcb->control.event_inj); + svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; + svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + + svm->vcpu.arch.hflags |= HF_GIF_MASK; + + return 0; +} + +static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +{ + to_vmcb->save.fs = from_vmcb->save.fs; + to_vmcb->save.gs = from_vmcb->save.gs; + to_vmcb->save.tr = from_vmcb->save.tr; + to_vmcb->save.ldtr = from_vmcb->save.ldtr; + to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; + to_vmcb->save.star = from_vmcb->save.star; + to_vmcb->save.lstar = from_vmcb->save.lstar; + to_vmcb->save.cstar = from_vmcb->save.cstar; + to_vmcb->save.sfmask = from_vmcb->save.sfmask; + to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; + to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; + to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; + + return 1; +} + +static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb, + void *arg2, void *opaque) +{ + return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb); +} + +static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb, + void *arg2, void *opaque) +{ + return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb); +} + +static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload); + + return 1; +} + +static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave); + + return 1; +} + +static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + nsvm_printk("VMrun\n"); + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + if (nested_svm_do(svm, svm->vmcb->save.rax, 0, + NULL, nested_svm_vmrun)) + return 1; + + if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0, + NULL, nested_svm_vmrun_msrpm)) + return 1; + + return 1; +} + +static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + svm->vcpu.arch.hflags |= HF_GIF_MASK; + + return 1; +} + +static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + + /* After a CLGI no interrupts should come */ + svm_clear_vintr(svm); + svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { @@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_LASTINTTOIP: *data = svm->vmcb->save.last_excp_to; break; + case MSR_VM_HSAVE_PA: + *data = svm->hsave_msr; + break; + case MSR_VM_CR: + *data = 0; + break; + case MSR_IA32_UCODE_REV: + *data = 0x01000065; + break; default: return kvm_get_msr_common(vcpu, ecx, data); } @@ -1344,6 +2043,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_VM_HSAVE_PA: + svm->hsave_msr = data; + break; default: return kvm_set_msr_common(vcpu, ecx, data); } @@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm, { KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); - svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); + svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* * If the user space waits to inject interrupts, exit as soon as @@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_WRITE_DR3] = emulate_on_interception, [SVM_EXIT_WRITE_DR5] = emulate_on_interception, [SVM_EXIT_WRITE_DR7] = emulate_on_interception, + [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, + [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, @@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, [SVM_EXIT_SHUTDOWN] = shutdown_interception, - [SVM_EXIT_VMRUN] = invalid_op_interception, + [SVM_EXIT_VMRUN] = vmrun_interception, [SVM_EXIT_VMMCALL] = vmmcall_interception, - [SVM_EXIT_VMLOAD] = invalid_op_interception, - [SVM_EXIT_VMSAVE] = invalid_op_interception, - [SVM_EXIT_STGI] = invalid_op_interception, - [SVM_EXIT_CLGI] = invalid_op_interception, + [SVM_EXIT_VMLOAD] = vmload_interception, + [SVM_EXIT_VMSAVE] = vmsave_interception, + [SVM_EXIT_STGI] = stgi_interception, + [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = invalid_op_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, @@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); + if (is_nested(svm)) { + nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", + exit_code, svm->vmcb->control.exit_info_1, + svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); + if (nested_svm_exit_handled(svm, true)) { + nested_svm_vmexit(svm); + nsvm_printk("-> #VMEXIT\n"); + return 1; + } + } + if (npt_enabled) { int mmu_reload = 0; if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { @@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_svm *svm = to_svm(vcpu); + nested_svm_intr(svm); + svm_inject_irq(svm, irq); } @@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) if (!kvm_cpu_has_interrupt(vcpu)) goto out; + if (nested_svm_intr(svm)) + goto out; + + if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) + goto out; + if (!(vmcb->save.rflags & X86_EFLAGS_IF) || (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { /* unable to deliver irq, set pending irq */ - vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); + svm_set_vintr(svm); svm_inject_irq(svm, 0x0); goto out; } @@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm) } svm->vcpu.arch.interrupt_window_open = - !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); + !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && + (svm->vcpu.arch.hflags & HF_GIF_MASK); } static void svm_do_inject_vector(struct vcpu_svm *svm) @@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; + if (nested_svm_intr(svm)) + return; + svm->vcpu.arch.interrupt_window_open = (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && - (svm->vmcb->save.rflags & X86_EFLAGS_IF)); + (svm->vmcb->save.rflags & X86_EFLAGS_IF) && + (svm->vcpu.arch.hflags & HF_GIF_MASK)); if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) /* @@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, */ if (!svm->vcpu.arch.interrupt_window_open && (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) - control->intercept |= 1ULL << INTERCEPT_VINTR; - else - control->intercept &= ~(1ULL << INTERCEPT_VINTR); + svm_set_vintr(svm); + else + svm_clear_vintr(svm); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void save_db_regs(unsigned long *db_regs) -{ - asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); - asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1])); - asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2])); - asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3])); -} - -static void load_db_regs(unsigned long *db_regs) -{ - asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0])); - asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1])); - asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2])); - asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3])); -} - static void svm_flush_tlb(struct kvm_vcpu *vcpu) { force_new_asid(vcpu); @@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) gs_selector = kvm_read_gs(); ldt_selector = kvm_read_ldt(); svm->host_cr2 = kvm_read_cr2(); - svm->host_dr6 = read_dr6(); - svm->host_dr7 = read_dr7(); - svm->vmcb->save.cr2 = vcpu->arch.cr2; + if (!is_nested(svm)) + svm->vmcb->save.cr2 = vcpu->arch.cr2; /* required for live migration with NPT */ if (npt_enabled) svm->vmcb->save.cr3 = vcpu->arch.cr3; - if (svm->vmcb->save.dr7 & 0xff) { - write_dr7(0); - save_db_regs(svm->host_db_regs); - load_db_regs(svm->db_regs); - } - clgi(); local_irq_enable(); @@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) #endif ); - if ((svm->vmcb->save.dr7 & 0xff)) - load_db_regs(svm->host_db_regs); - vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; - write_dr6(svm->host_dr6); - write_dr7(svm->host_dr7); kvm_write_cr2(svm->host_cr2); kvm_load_fs(fs_selector); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7611af576829..bb481330716f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -91,6 +91,7 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; + enum emulation_result invalid_state_emulation_result; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; @@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_external_interrupt(u32 intr_info) @@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; - if (vcpu->guest_debug.enabled) - eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + eb |= 1u << BP_VECTOR; + } if (vcpu->arch.rmode.active) eb = ~0; if (vm_need_ept()) @@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (has_error_code) + if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = nr; vmx->rmode.irq.rip = kvm_rip_read(vcpu); - if (nr == BP_VECTOR) + if (nr == BP_VECTOR || nr == OF_VECTOR) vmx->rmode.irq.rip++; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - nr | INTR_TYPE_SOFT_INTR - | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) - | INTR_INFO_VALID_MASK); + intr_info |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - nr | INTR_TYPE_EXCEPTION - | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) - | INTR_INFO_VALID_MASK); + if (nr == BP_VECTOR || nr == OF_VECTOR) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + } else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); } static bool vmx_exception_injected(struct kvm_vcpu *vcpu) @@ -856,11 +866,8 @@ static u64 guest_read_tsc(void) * writes 'guest_tsc' into guest's timestamp counter "register" * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc */ -static void guest_write_tsc(u64 guest_tsc) +static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) { - u64 host_tsc; - - rdtscll(host_tsc); vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } @@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr; + u64 host_tsc; int ret = 0; switch (msr_index) { -#ifdef CONFIG_X86_64 case MSR_EFER: vmx_load_host_state(vmx); ret = kvm_set_msr_common(vcpu, msr_index, data); break; +#ifdef CONFIG_X86_64 case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TIME_STAMP_COUNTER: - guest_write_tsc(data); + rdtscll(host_tsc); + guest_write_tsc(data, host_tsc); break; case MSR_P6_PERFCTR0: case MSR_P6_PERFCTR1: @@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - unsigned long dr7 = 0x400; - int old_singlestep; - - old_singlestep = vcpu->guest_debug.singlestep; - - vcpu->guest_debug.enabled = dbg->enabled; - if (vcpu->guest_debug.enabled) { - int i; + int old_debug = vcpu->guest_debug; + unsigned long flags; - dr7 |= 0x200; /* exact */ - for (i = 0; i < 4; ++i) { - if (!dbg->breakpoints[i].enabled) - continue; - vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; - dr7 |= 2 << (i*2); /* global enable */ - dr7 |= 0 << (i*4+16); /* execution breakpoint */ - } + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) + vcpu->guest_debug = 0; - vcpu->guest_debug.singlestep = dbg->singlestep; - } else - vcpu->guest_debug.singlestep = 0; - - if (old_singlestep && !vcpu->guest_debug.singlestep) { - unsigned long flags; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); + else + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - flags = vmcs_readl(GUEST_RFLAGS); + flags = vmcs_readl(GUEST_RFLAGS); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); - } + vmcs_writel(GUEST_RFLAGS, flags); update_exception_bitmap(vcpu); - vmcs_writel(GUEST_DR7, dr7); return 0; } @@ -1433,6 +1430,29 @@ continue_rmode: init_rmode(vcpu->kvm); } +static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + + vcpu->arch.shadow_efer = efer; + if (!msr) + return; + if (efer & EFER_LMA) { + vmcs_write32(VM_ENTRY_CONTROLS, + vmcs_read32(VM_ENTRY_CONTROLS) | + VM_ENTRY_IA32E_MODE); + msr->data = efer; + } else { + vmcs_write32(VM_ENTRY_CONTROLS, + vmcs_read32(VM_ENTRY_CONTROLS) & + ~VM_ENTRY_IA32E_MODE); + + msr->data = efer & ~EFER_LME; + } + setup_msrs(vmx); +} + #ifdef CONFIG_X86_64 static void enter_lmode(struct kvm_vcpu *vcpu) @@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); } - vcpu->arch.shadow_efer |= EFER_LMA; - - find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME; - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) - | VM_ENTRY_IA32E_MODE); + vmx_set_efer(vcpu, vcpu->arch.shadow_efer); } static void exit_lmode(struct kvm_vcpu *vcpu) @@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(GUEST_CR4, hw_cr4); } -static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); - - vcpu->arch.shadow_efer = efer; - if (!msr) - return; - if (efer & EFER_LMA) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) | - VM_ENTRY_IA32E_MODE); - msr->data = efer; - - } else { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) & - ~VM_ENTRY_IA32E_MODE); - - msr->data = efer & ~EFER_LME; - } - setup_msrs(vmx); -} - static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = vmcs_read32(sf->limit); var->selector = vmcs_read16(sf->selector); ar = vmcs_read32(sf->ar_bytes); - if (ar & AR_UNUSABLE_MASK) + if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; var->type = ar & 15; var->s = (ar >> 4) & 1; @@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); cs_rpl = cs.selector & SELECTOR_RPL_MASK; + if (cs.unusable) + return false; if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) return false; if (!cs.s) return false; - if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { + if (cs.type & AR_TYPE_WRITEABLE_MASK) { if (cs.dpl > cs_rpl) return false; - } else if (cs.type & AR_TYPE_CODE_MASK) { + } else { if (cs.dpl != cs_rpl) return false; } @@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); ss_rpl = ss.selector & SELECTOR_RPL_MASK; - if ((ss.type != 3) || (ss.type != 7)) + if (ss.unusable) + return true; + if (ss.type != 3 && ss.type != 7) return false; if (!ss.s) return false; @@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) vmx_get_segment(vcpu, &var, seg); rpl = var.selector & SELECTOR_RPL_MASK; + if (var.unusable) + return true; if (!var.s) return false; if (!var.present) @@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); + if (tr.unusable) + return false; if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ return false; - if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ + if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ return false; if (!tr.present) return false; @@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); + if (ldtr.unusable) + return true; if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ return false; if (ldtr.type != 2) @@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { u32 host_sysenter_cs, msr_low, msr_high; u32 junk; - u64 host_pat; + u64 host_pat, tsc_this, tsc_base; unsigned long a; struct descriptor_table dt; int i; @@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); + tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; + rdtscll(tsc_this); + if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc) + tsc_base = tsc_this; + + guest_write_tsc(0, tsc_base); return 0; } @@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0); kvm_register_write(vcpu, VCPU_REGS_RSP, 0); - /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ vmcs_writel(GUEST_DR7, 0x400); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); - guest_write_tsc(0); - /* Special registers */ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); @@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, { vmx_update_window_states(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); @@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - - set_debugreg(dbg->bp[0], 0); - set_debugreg(dbg->bp[1], 1); - set_debugreg(dbg->bp[2], 2); - set_debugreg(dbg->bp[3], 3); - - if (dbg->singlestep) { - unsigned long flags; - - flags = vmcs_readl(GUEST_RFLAGS); - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - vmcs_writel(GUEST_RFLAGS, flags); - } -} - static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { @@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * the required debugging infrastructure rework. */ switch (vec) { - case DE_VECTOR: case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return 0; + kvm_queue_exception(vcpu, vec); + return 1; case BP_VECTOR: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return 0; + /* fall through */ + case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: case UD_VECTOR: @@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info, error_code; - unsigned long cr2, rip; + u32 intr_info, ex_no, error_code; + unsigned long cr2, rip, dr6; u32 vect_info; enum emulation_result er; @@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } - if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == - (INTR_TYPE_EXCEPTION | 1)) { + ex_no = intr_info & INTR_INFO_VECTOR_MASK; + switch (ex_no) { + case DB_VECTOR: + dr6 = vmcs_readl(EXIT_QUALIFICATION); + if (!(vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + vcpu->arch.dr6 = dr6 | DR6_FIXED_1; + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); + /* fall through */ + case BP_VECTOR: kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; + kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.exception = ex_no; + break; + default: + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = ex_no; + kvm_run->ex.error_code = error_code; + break; } - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; - kvm_run->ex.error_code = error_code; return 0; } @@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { unsigned long exit_qualification; - int size, down, in, string, rep; + int size, in, string; unsigned port; ++vcpu->stat.io_exits; @@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) size = (exit_qualification & 7) + 1; in = (exit_qualification & 8) != 0; - down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; skip_emulated_instruction(vcpu); @@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long val; int dr, reg; - /* - * FIXME: this code assumes the host is debugging the guest. - * need to deal with guest debugging itself too. - */ + dr = vmcs_readl(GUEST_DR7); + if (dr & DR7_GD) { + /* + * As the vm-exit takes precedence over the debug trap, we + * need to emulate the latter, either for the host or the + * guest debugging itself. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { + kvm_run->debug.arch.dr6 = vcpu->arch.dr6; + kvm_run->debug.arch.dr7 = dr; + kvm_run->debug.arch.pc = + vmcs_readl(GUEST_CS_BASE) + + vmcs_readl(GUEST_RIP); + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + return 0; + } else { + vcpu->arch.dr7 &= ~DR7_GD; + vcpu->arch.dr6 |= DR6_BD; + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + } + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & 7; - reg = (exit_qualification >> 8) & 15; - if (exit_qualification & 16) { - /* mov from dr */ + dr = exit_qualification & DEBUG_REG_ACCESS_NUM; + reg = DEBUG_REG_ACCESS_REG(exit_qualification); + if (exit_qualification & TYPE_MOV_FROM_DR) { switch (dr) { + case 0 ... 3: + val = vcpu->arch.db[dr]; + break; case 6: - val = 0xffff0ff0; + val = vcpu->arch.dr6; break; case 7: - val = 0x400; + val = vcpu->arch.dr7; break; default: val = 0; @@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_register_write(vcpu, reg, val); KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { - /* mov to dr */ + val = vcpu->arch.regs[reg]; + switch (dr) { + case 0 ... 3: + vcpu->arch.db[dr] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; + case 4 ... 5: + if (vcpu->arch.cr4 & X86_CR4_DE) + kvm_queue_exception(vcpu, UD_VECTOR); + break; + case 6: + if (val & 0xffffffff00000000ULL) { + kvm_queue_exception(vcpu, GP_VECTOR); + break; + } + vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + break; + case 7: + if (val & 0xffffffff00000000ULL) { + kvm_queue_exception(vcpu, GP_VECTOR); + break; + } + vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + vcpu->arch.switch_db_regs = + (val & DR7_BP_EN_MASK); + } + break; + } + KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler); } skip_emulated_instruction(vcpu); return 1; @@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } tss_selector = exit_qualification; - return kvm_task_switch(vcpu, tss_selector, reason); + if (!kvm_task_switch(vcpu, tss_selector, reason)) + return 0; + + /* clear all local breakpoint enable flags */ + vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); + + /* + * TODO: What about debug traps on tss switch? + * Are we supposed to inject them and update dr6? + */ + + return 1; } static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; - enum emulation_result er; gpa_t gpa; - unsigned long hva; int gla_validity; - int r; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); @@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!kvm_is_error_hva(hva)) { - r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); - if (r < 0) { - printk(KERN_ERR "EPT: Not enough memory!\n"); - return -ENOMEM; - } - return 1; - } else { - /* must be MMIO */ - er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - - if (er == EMULATE_FAIL) { - printk(KERN_ERR - "EPT: Fail to handle EPT violation vmexit!er is %d\n", - er); - printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", - (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), - (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); - printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", - (long unsigned int)exit_qualification); - return -ENOTSUPP; - } else if (er == EMULATE_DO_MMIO) - return 0; - } - return 1; + return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); } static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int err; + enum emulation_result err = EMULATE_DONE; preempt_enable(); local_irq_enable(); @@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - /* Guest state should be valid now except if we need to - * emulate an MMIO */ - if (guest_state_valid(vcpu)) - vmx->emulation_required = 0; + vmx->invalid_state_emulation_result = err; } /* @@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* If we need to emulate an MMIO from handle_invalid_guest_state * we just return 0 */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return 0; + if (vmx->emulation_required && emulate_invalid_guest_state) { + if (guest_state_valid(vcpu)) + vmx->emulation_required = 0; + return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; + } /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ @@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vmx->vcpu.arch.nmi_injected = false; } kvm_clear_exception_queue(&vmx->vcpu); - if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { + if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || + type == INTR_TYPE_SOFT_EXCEPTION)) { if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { error = vmcs_read32(IDT_VECTORING_ERROR_CODE); kvm_queue_exception_e(&vmx->vcpu, vector, error); @@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) vmx_update_window_states(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); @@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) */ vmcs_writel(HOST_CR0, read_cr0()); + set_debugreg(vcpu->arch.dr6, 6); + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); vcpu->arch.regs_dirty = 0; + get_debugreg(vcpu->arch.dr6, 6); + vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); @@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_put = vmx_vcpu_put, .set_guest_debug = set_guest_debug, - .guest_debug_pre = kvm_guest_debug_pre, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 758b7a155ae9..8ca100a9ecac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -36,6 +36,7 @@ #include <linux/highmem.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> +#include <linux/cpufreq.h> #include <asm/uaccess.h> #include <asm/msr.h> @@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function, u32 index); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 error_code) { ++vcpu->stat.pf_guest; + if (vcpu->arch.exception.pending) { if (vcpu->arch.exception.nr == PF_VECTOR) { printk(KERN_DEBUG "kvm: inject_page_fault:" @@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; + vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } @@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. @@ -456,7 +466,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT + MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; @@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) return; } + if (efer & EFER_FFXSR) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { + printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } + + if (efer & EFER_SVME) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { + printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } + kvm_x86_ops->set_efer(vcpu, efer); efer &= ~EFER_LMA; @@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info * hv_clock->tsc_to_system_mul); } +static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) if ((!vcpu->time_page)) return; - if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { - kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); - vcpu->hv_clock_tsc_khz = tsc_khz; + if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { + kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); } /* Keep irq disabled to prevent changes to the clock */ @@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static int kvm_request_guest_time_update(struct kvm_vcpu *v) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + + if (!vcpu->time_page) + return 0; + set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); + return 1; +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: + case MSR_VM_HSAVE_PA: break; case 0x200 ... 0x2ff: return set_msr_mtrr(vcpu, msr, data); @@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) vcpu->arch.time_page = NULL; } - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); break; } default: @@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: + case MSR_VM_HSAVE_PA: data = 0; break; case MSR_MTRRcap: @@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: + case KVM_CAP_REINJECT_CONTROL: + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; - case KVM_CAP_CLOCKSOURCE: - r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); - break; default: r = 0; break; @@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; @@ -1064,7 +1110,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -1142,8 +1188,8 @@ out: } static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, - struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -1162,8 +1208,8 @@ out: } static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, - struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, goto out; r = -EFAULT; if (copy_to_user(entries, &vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) goto out; return 0; @@ -1181,18 +1227,13 @@ out: return r; } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index) + u32 index) { entry->function = function; entry->index = index; cpuid_count(entry->function, entry->index, - &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); + &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); entry->flags = 0; } @@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #ifdef CONFIG_X86_64 bit(X86_FEATURE_LM) | #endif + bit(X86_FEATURE_FXSR_OPT) | bit(X86_FEATURE_MMXEXT) | bit(X86_FEATURE_3DNOWEXT) | bit(X86_FEATURE_3DNOW); const u32 kvm_supported_word3_x86_features = bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); const u32 kvm_supported_word6_x86_features = - bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); + bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | + bit(X86_FEATURE_SVM); - /* all func 2 cpuid_count() should be called on the same cpu */ + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); ++*nent; @@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG; @@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[0].eax; for (func = 1; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, - &nent, cpuid->nent); + &nent, cpuid->nent); r = -E2BIG; if (nent >= cpuid->nent) goto out_free; @@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, - &nent, cpuid->nent); + &nent, cpuid->nent); r = -EFAULT; if (copy_to_user(entries, cpuid_entries, - nent * sizeof(struct kvm_cpuid_entry2))) + nent * sizeof(struct kvm_cpuid_entry2))) goto out_free; cpuid->nent = nent; r = 0; @@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; break; @@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; r = -EFAULT; @@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) return r; } +static int kvm_vm_ioctl_reinject(struct kvm *kvm, + struct kvm_reinject_control *control) +{ + if (!kvm->arch.vpit) + return -ENXIO; + kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + return 0; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp, } } else goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vpic); + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_CREATE_PIT: + mutex_lock(&kvm->lock); + r = -EEXIST; + if (kvm->arch.vpit) + goto create_pit_unlock; r = -ENOMEM; kvm->arch.vpit = kvm_create_pit(kvm); if (kvm->arch.vpit) r = 0; + create_pit_unlock: + mutex_unlock(&kvm->lock); break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; @@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_REINJECT_CONTROL: { + struct kvm_reinject_control control; + r = -EFAULT; + if (copy_from_user(&control, argp, sizeof(control))) + goto out; + r = kvm_vm_ioctl_reinject(kvm, &control); + if (r) + goto out; + r = 0; + break; + } default: ; } @@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } -int emulator_read_std(unsigned long addr, - void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu) +static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) +{ + void *data = val; + int r = X86EMUL_CONTINUE; + + while (bytes) { + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); + unsigned offset = addr & (PAGE_SIZE-1); + unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); + int ret; + + if (gpa == UNMAPPED_GVA) { + r = X86EMUL_PROPAGATE_FAULT; + goto out; + } + ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); + if (ret < 0) { + r = X86EMUL_UNHANDLEABLE; + goto out; + } + + bytes -= toread; + data += toread; + addr += toread; + } +out: + return r; +} + +static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; @@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr, while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); - unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); + unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; if (gpa == UNMAPPED_GVA) { r = X86EMUL_PROPAGATE_FAULT; goto out; } - ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); + ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); if (ret < 0) { r = X86EMUL_UNHANDLEABLE; goto out; } - bytes -= tocopy; - data += tocopy; - addr += tocopy; + bytes -= towrite; + data += towrite; + addr += towrite; } out: return r; } -EXPORT_SYMBOL_GPL(emulator_read_std); + static int emulator_read_emulated(unsigned long addr, void *val, @@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (emulator_read_std(addr, val, bytes, vcpu) - == X86EMUL_CONTINUE) + if (kvm_read_guest_virt(addr, val, bytes, vcpu) + == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; @@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); - emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); + kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); @@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); static struct x86_emulate_ops emulate_ops = { - .read_std = emulator_read_std, + .read_std = kvm_read_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, @@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); -static void free_pio_guest_pages(struct kvm_vcpu *vcpu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i) - if (vcpu->arch.pio.guest_pages[i]) { - kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]); - vcpu->arch.pio.guest_pages[i] = NULL; - } -} - static int pio_copy_data(struct kvm_vcpu *vcpu) { void *p = vcpu->arch.pio_data; - void *q; + gva_t q = vcpu->arch.pio.guest_gva; unsigned bytes; - int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1; + int ret; - q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE, - PAGE_KERNEL); - if (!q) { - free_pio_guest_pages(vcpu); - return -ENOMEM; - } - q += vcpu->arch.pio.guest_page_offset; bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; if (vcpu->arch.pio.in) - memcpy(q, p, bytes); + ret = kvm_write_guest_virt(q, p, bytes, vcpu); else - memcpy(p, q, bytes); - q -= vcpu->arch.pio.guest_page_offset; - vunmap(q); - free_pio_guest_pages(vcpu); - return 0; + ret = kvm_read_guest_virt(q, p, bytes, vcpu); + return ret; } int complete_pio(struct kvm_vcpu *vcpu) @@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.in = in; vcpu->arch.pio.string = 0; vcpu->arch.pio.down = 0; - vcpu->arch.pio.guest_page_offset = 0; vcpu->arch.pio.rep = 0; if (vcpu->run->io.direction == KVM_EXIT_IO_IN) @@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, gva_t address, int rep, unsigned port) { unsigned now, in_page; - int i, ret = 0; - int nr_pages = 1; - struct page *page; + int ret = 0; struct kvm_io_device *pio_dev; vcpu->run->exit_reason = KVM_EXIT_IO; @@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.in = in; vcpu->arch.pio.string = 1; vcpu->arch.pio.down = down; - vcpu->arch.pio.guest_page_offset = offset_in_page(address); vcpu->arch.pio.rep = rep; if (vcpu->run->io.direction == KVM_EXIT_IO_IN) @@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, else in_page = offset_in_page(address) + size; now = min(count, (unsigned long)in_page / size); - if (!now) { - /* - * String I/O straddles page boundary. Pin two guest pages - * so that we satisfy atomicity constraints. Do just one - * transaction to avoid complexity. - */ - nr_pages = 2; + if (!now) now = 1; - } if (down) { /* * String I/O in reverse. Yuck. Kill the guest, fix later. @@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) kvm_x86_ops->skip_emulated_instruction(vcpu); - for (i = 0; i < nr_pages; ++i) { - page = gva_to_page(vcpu, address + i * PAGE_SIZE); - vcpu->arch.pio.guest_pages[i] = page; - if (!page) { - kvm_inject_gp(vcpu, 0); - free_pio_guest_pages(vcpu); - return 1; - } - } + vcpu->arch.pio.guest_gva = address; pio_dev = vcpu_find_pio_dev(vcpu, port, vcpu->arch.pio.cur_count, @@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); - if (ret >= 0 && pio_dev) { + if (ret == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if (ret == 0 && pio_dev) { pio_string_write(pio_dev, vcpu); complete_pio(vcpu); if (vcpu->arch.pio.count == 0) @@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +static void bounce_off(void *info) +{ + /* nothing */ +} + +static unsigned int ref_freq; +static unsigned long tsc_khz_ref; + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i, send_ipi = 0; + + if (!ref_freq) + ref_freq = freq->old; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->cpu != freq->cpu) + continue; + if (!kvm_request_guest_time_update(vcpu)) + continue; + if (vcpu->cpu != smp_processor_id()) + send_ipi++; + } + } + spin_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* + * We upscale the frequency. Must make the guest + * doesn't see old kvmclock values while running with + * the new frequency, otherwise we risk the guest sees + * time go backwards. + * + * In case we update the frequency for another cpu + * (which might be in guest context) send an interrupt + * to kick the cpu out of guest context. Next time + * guest context is entered kvmclock will be updated, + * so the guest will not see stale values. + */ + smp_call_function_single(freq->cpu, bounce_off, NULL, 1); + } + return 0; +} + +static struct notifier_block kvmclock_cpufreq_notifier_block = { + .notifier_call = kvmclock_cpufreq_notifier +}; + int kvm_arch_init(void *opaque) { - int r; + int r, cpu; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { @@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + tsc_khz_ref = tsc_khz; + cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + return 0; out: @@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) return 0; if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && - !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) + !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) return 0; return 1; } -void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function, u32 index) { int i; - u32 function, index; - struct kvm_cpuid_entry2 *e, *best; + struct kvm_cpuid_entry2 *best = NULL; - function = kvm_register_read(vcpu, VCPU_REGS_RAX); - index = kvm_register_read(vcpu, VCPU_REGS_RCX); - kvm_register_write(vcpu, VCPU_REGS_RAX, 0); - kvm_register_write(vcpu, VCPU_REGS_RBX, 0); - kvm_register_write(vcpu, VCPU_REGS_RCX, 0); - kvm_register_write(vcpu, VCPU_REGS_RDX, 0); - best = NULL; for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { + struct kvm_cpuid_entry2 *e; + e = &vcpu->arch.cpuid_entries[i]; if (is_matching_cpuid_entry(e, function, index)) { if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) @@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) if (!best || e->function > best->function) best = e; } + return best; +} + +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +{ + u32 function, index; + struct kvm_cpuid_entry2 *best; + + function = kvm_register_read(vcpu, VCPU_REGS_RAX); + index = kvm_register_read(vcpu, VCPU_REGS_RCX); + kvm_register_write(vcpu, VCPU_REGS_RAX, 0); + kvm_register_write(vcpu, VCPU_REGS_RBX, 0); + kvm_register_write(vcpu, VCPU_REGS_RCX, 0); + kvm_register_write(vcpu, VCPU_REGS_RDX, 0); + best = kvm_find_cpuid_entry(vcpu, function, index); if (best) { kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); @@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) + kvm_write_guest_time(vcpu); if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) kvm_mmu_sync_roots(vcpu); if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) @@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - if (vcpu->guest_debug.enabled) - kvm_x86_ops->guest_debug_pre(vcpu); - vcpu->guest_mode = 1; /* * Make sure that guest_mode assignment won't happen after @@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_guest_enter(); + get_debugreg(vcpu->arch.host_dr6, 6); + get_debugreg(vcpu->arch.host_dr7, 7); + if (unlikely(vcpu->arch.switch_db_regs)) { + get_debugreg(vcpu->arch.host_db[0], 0); + get_debugreg(vcpu->arch.host_db[1], 1); + get_debugreg(vcpu->arch.host_db[2], 2); + get_debugreg(vcpu->arch.host_db[3], 3); + + set_debugreg(0, 7); + set_debugreg(vcpu->arch.eff_db[0], 0); + set_debugreg(vcpu->arch.eff_db[1], 1); + set_debugreg(vcpu->arch.eff_db[2], 2); + set_debugreg(vcpu->arch.eff_db[3], 3); + } KVMTRACE_0D(VMENTRY, vcpu, entryexit); kvm_x86_ops->run(vcpu, kvm_run); + if (unlikely(vcpu->arch.switch_db_regs)) { + set_debugreg(0, 7); + set_debugreg(vcpu->arch.host_db[0], 0); + set_debugreg(vcpu->arch.host_db[1], 1); + set_debugreg(vcpu->arch.host_db[2], 2); + set_debugreg(vcpu->arch.host_db[3], 3); + } + set_debugreg(vcpu->arch.host_dr6, 6); + set_debugreg(vcpu->arch.host_dr7, 7); + vcpu->guest_mode = 0; local_irq_enable(); @@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) /* * Don't leak debug flags in case they were set for guest debugging */ - if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); vcpu_put(vcpu); @@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { - int r; + int i, r; vcpu_load(vcpu); + if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { + for (i = 0; i < KVM_NR_DB_REGS; ++i) + vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; + vcpu->arch.switch_db_regs = + (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); + } else { + for (i = 0; i < KVM_NR_DB_REGS; i++) + vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); + } + r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_queue_exception(vcpu, DB_VECTOR); + else if (dbg->control & KVM_GUESTDBG_INJECT_BP) + kvm_queue_exception(vcpu, BP_VECTOR); + vcpu_put(vcpu); return r; @@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = false; + vcpu->arch.switch_db_regs = 0; + memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); + vcpu->arch.dr6 = DR6_FIXED_1; + vcpu->arch.dr7 = DR7_FIXED_1; + return kvm_x86_ops->vcpu_reset(vcpu); } @@ -4100,6 +4302,8 @@ struct kvm *kvm_arch_create_vm(void) /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + rdtscll(kvm->arch.vm_init_tsc); + return kvm; } diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index d174db7a3370..ca91749d2083 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -178,7 +178,7 @@ static u32 opcode_table[256] = { 0, ImplicitOps | Stack, 0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, /* 0xC8 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, /* 0xD0 - 0xD7 */ ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, @@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) + struct x86_emulate_ops *ops, + void *dest, int len) { struct decode_cache *c = &ctxt->decode; int rc; rc = ops->read_emulated(register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]), - &c->src.val, c->src.bytes, ctxt->vcpu); + dest, len, ctxt->vcpu); if (rc != 0) return rc; - register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); return rc; } @@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int rc; - c->src.bytes = c->dst.bytes; - rc = emulate_pop(ctxt, ops); + rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); if (rc != 0) return rc; - c->dst.val = c->src.val; return 0; } @@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, return 0; } +static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned long cs; + + rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + if (rc) + return rc; + if (c->op_bytes == 4) + c->eip = (u32)c->eip; + rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + if (rc) + return rc; + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); + return rc; +} + static inline int writeback(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1467,11 +1485,9 @@ special_insn: break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - c->src.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ops); + rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); if (rc != 0) goto done; - c->dst.val = c->src.val; break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) @@ -1738,6 +1754,11 @@ special_insn: mov: c->dst.val = c->src.val; break; + case 0xcb: /* ret far */ + rc = emulate_ret_far(ctxt, ops); + if (rc) + goto done; + break; case 0xd0 ... 0xd1: /* Grp2 */ c->src.val = 1; emulate_grp2(ctxt); @@ -1908,11 +1929,16 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3 && c->modrm_rm == 1) { - rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc) - goto done; - kvm_emulate_hypercall(ctxt->vcpu); + if (c->modrm_mod == 3) { + switch (c->modrm_rm) { + case 1: + rc = kvm_fix_hypercall(ctxt->vcpu); + if (rc) + goto done; + break; + default: + goto cannot_emulate; + } } else { rc = read_descriptor(ctxt, ops, c->src.ptr, &size, &address, diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index 6100682b1da2..dd1a7a4a1cea 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -65,4 +65,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _XTENSA_SOCKET_H */ diff --git a/arch/xtensa/include/asm/swab.h b/arch/xtensa/include/asm/swab.h index f50b697eb601..226a39162310 100644 --- a/arch/xtensa/include/asm/swab.h +++ b/arch/xtensa/include/asm/swab.h @@ -11,7 +11,7 @@ #ifndef _XTENSA_SWAB_H #define _XTENSA_SWAB_H -#include <asm/types.h> +#include <linux/types.h> #include <linux/compiler.h> #define __SWAB_64_THRU_32__ diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 5fbcde59a92d..f3b66fba5b8f 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %14s", irq_desc[i].chip->typename); seq_printf(p, " %s", action->name); |