diff options
Diffstat (limited to 'arch')
169 files changed, 8343 insertions, 1633 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index bc1acdda7a5e..63e77e3944ce 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -145,27 +145,24 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, long __user *, basep) { int error; - struct file *file; + struct fd arg = fdget(fd); struct osf_dirent_callback buf; - error = -EBADF; - file = fget(fd); - if (!file) - goto out; + if (!arg.file) + return -EBADF; buf.dirent = dirent; buf.basep = basep; buf.count = count; buf.error = 0; - error = vfs_readdir(file, osf_filldir, &buf); + error = vfs_readdir(arg.file, osf_filldir, &buf); if (error >= 0) error = buf.error; if (count != buf.count) error = count - buf.count; - fput(file); - out: + fdput(arg); return error; } @@ -278,8 +275,8 @@ linux_to_osf_stat(struct kstat *lstat, struct osf_stat __user *osf_stat) tmp.st_dev = lstat->dev; tmp.st_mode = lstat->mode; tmp.st_nlink = lstat->nlink; - tmp.st_uid = lstat->uid; - tmp.st_gid = lstat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), lstat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), lstat->gid); tmp.st_rdev = lstat->rdev; tmp.st_ldev = lstat->rdev; tmp.st_size = lstat->size; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9d72ed67e432..7bab17ed2972 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -891,6 +891,7 @@ config ARCH_NOMADIK select COMMON_CLK select GENERIC_CLOCKEVENTS select PINCTRL + select PINCTRL_STN8815 select MIGHT_HAVE_CACHE_L2X0 select ARCH_REQUIRE_GPIOLIB help diff --git a/arch/arm/boot/dts/highbank.dts b/arch/arm/boot/dts/highbank.dts index 9fecf1ae777b..0c6fc34821f9 100644 --- a/arch/arm/boot/dts/highbank.dts +++ b/arch/arm/boot/dts/highbank.dts @@ -121,6 +121,10 @@ compatible = "calxeda,hb-ahci"; reg = <0xffe08000 0x10000>; interrupts = <0 83 4>; + calxeda,port-phys = <&combophy5 0 &combophy0 0 + &combophy0 1 &combophy0 2 + &combophy0 3>; + dma-coherent; }; sdhci@ffe0e000 { @@ -306,5 +310,19 @@ reg = <0xfff51000 0x1000>; interrupts = <0 80 4 0 81 4 0 82 4>; }; + + combophy0: combo-phy@fff58000 { + compatible = "calxeda,hb-combophy"; + #phy-cells = <1>; + reg = <0xfff58000 0x1000>; + phydev = <5>; + }; + + combophy5: combo-phy@fff5d000 { + compatible = "calxeda,hb-combophy"; + #phy-cells = <1>; + reg = <0xfff5d000 0x1000>; + phydev = <31>; + }; }; }; diff --git a/arch/arm/configs/kzm9g_defconfig b/arch/arm/configs/kzm9g_defconfig index 5d0c66708960..c88b57886e79 100644 --- a/arch/arm/configs/kzm9g_defconfig +++ b/arch/arm/configs/kzm9g_defconfig @@ -23,7 +23,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARCH_SHMOBILE=y -CONFIG_KEYBOARD_GPIO_POLLED=y CONFIG_ARCH_SH73A0=y CONFIG_MACH_KZM9G=y CONFIG_MEMORY_START=0x41000000 @@ -71,6 +70,7 @@ CONFIG_INPUT_SPARSEKMAP=y # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ST1232=y diff --git a/arch/arm/configs/sam9_l9260_defconfig b/arch/arm/configs/sam9_l9260_defconfig index ecf2531523a1..b4384af1bea6 100644 --- a/arch/arm/configs/sam9_l9260_defconfig +++ b/arch/arm/configs/sam9_l9260_defconfig @@ -39,7 +39,7 @@ CONFIG_MTD_NAND=y CONFIG_MTD_NAND_ATMEL=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_BEB_RESERVE=3 +CONFIG_MTD_UBI_BEB_LIMIT=25 CONFIG_MTD_UBI_GLUEBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 05112380dc53..8dcd9c702d90 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -44,10 +44,9 @@ #define rmb() dsb() #define wmb() mb() #else -#include <asm/memory.h> -#define mb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) -#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) -#define wmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) +#define mb() barrier() +#define rmb() barrier() +#define wmb() barrier() #endif #ifndef CONFIG_SMP diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 5c44dcb0987b..23004847bb05 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -13,6 +13,7 @@ #define DMA_ERROR_CODE (~0) extern struct dma_map_ops arm_dma_ops; +extern struct dma_map_ops arm_coherent_dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 5f6ddcc56452..73cf03aa981e 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -275,14 +275,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) -/* - * Optional coherency support. Currently used only by selected - * Intel XSC3-based systems. - */ -#ifndef arch_is_coherent -#define arch_is_coherent() 0 -#endif - #endif #include <asm-generic/memory_model.h> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index aa4ffe6e5ecf..dea7a925c7e2 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -24,6 +24,7 @@ #include <linux/percpu.h> #include <linux/clockchips.h> #include <linux/completion.h> +#include <linux/cpufreq.h> #include <linux/atomic.h> #include <asm/smp.h> @@ -650,3 +651,56 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, l_p_j_ref); +static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); +static unsigned long global_l_p_j_ref; +static unsigned long global_l_p_j_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + if (!per_cpu(l_p_j_ref, cpu)) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!global_l_p_j_ref) { + global_l_p_j_ref = loops_per_jiffy; + global_l_p_j_ref_freq = freq->old; + } + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { + loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, + global_l_p_j_ref_freq, + freq->new); + per_cpu(cpu_data, cpu).loops_per_jiffy = + cpufreq_scale(per_cpu(l_p_j_ref, cpu), + per_cpu(l_p_j_ref_freq, cpu), + freq->new); + } + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index af1da34ccf9d..40e36a50304c 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -15,6 +15,7 @@ */ #include <linux/clk.h> #include <linux/clkdev.h> +#include <linux/dma-mapping.h> #include <linux/io.h> #include <linux/irq.h> #include <linux/irqdomain.h> @@ -23,6 +24,7 @@ #include <linux/of_platform.h> #include <linux/of_address.h> #include <linux/smp.h> +#include <linux/amba/bus.h> #include <asm/cacheflush.h> #include <asm/smp_plat.h> @@ -149,11 +151,61 @@ static void highbank_power_off(void) cpu_do_idle(); } +static int highbank_platform_notifier(struct notifier_block *nb, + unsigned long event, void *__dev) +{ + struct resource *res; + int reg = -1; + struct device *dev = __dev; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (of_device_is_compatible(dev->of_node, "calxeda,hb-ahci")) + reg = 0xc; + else if (of_device_is_compatible(dev->of_node, "calxeda,hb-sdhci")) + reg = 0x18; + else if (of_device_is_compatible(dev->of_node, "arm,pl330")) + reg = 0x20; + else if (of_device_is_compatible(dev->of_node, "calxeda,hb-xgmac")) { + res = platform_get_resource(to_platform_device(dev), + IORESOURCE_MEM, 0); + if (res) { + if (res->start == 0xfff50000) + reg = 0; + else if (res->start == 0xfff51000) + reg = 4; + } + } + + if (reg < 0) + return NOTIFY_DONE; + + if (of_property_read_bool(dev->of_node, "dma-coherent")) { + writel(0xff31, sregs_base + reg); + set_dma_ops(dev, &arm_coherent_dma_ops); + } else + writel(0, sregs_base + reg); + + return NOTIFY_OK; +} + +static struct notifier_block highbank_amba_nb = { + .notifier_call = highbank_platform_notifier, +}; + +static struct notifier_block highbank_platform_nb = { + .notifier_call = highbank_platform_notifier, +}; + static void __init highbank_init(void) { pm_power_off = highbank_power_off; highbank_pm_init(); + bus_register_notifier(&platform_bus_type, &highbank_platform_nb); + bus_register_notifier(&amba_bustype, &highbank_amba_nb); + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c index 92a00260d074..bfa1eab91f41 100644 --- a/arch/arm/mach-nomadik/board-nhk8815.c +++ b/arch/arm/mach-nomadik/board-nhk8815.c @@ -23,6 +23,7 @@ #include <linux/mtd/partitions.h> #include <linux/i2c.h> #include <linux/io.h> +#include <linux/pinctrl/machine.h> #include <asm/hardware/vic.h> #include <asm/sizes.h> #include <asm/mach-types.h> @@ -33,6 +34,7 @@ #include <plat/gpio-nomadik.h> #include <plat/mtu.h> +#include <plat/pincfg.h> #include <linux/platform_data/mtd-nomadik-nand.h> #include <mach/fsmc.h> @@ -290,8 +292,42 @@ static struct i2c_board_info __initdata nhk8815_i2c2_devices[] = { }, }; +static unsigned long out_low[] = { PIN_OUTPUT_LOW }; +static unsigned long out_high[] = { PIN_OUTPUT_HIGH }; +static unsigned long in_nopull[] = { PIN_INPUT_NOPULL }; +static unsigned long in_pullup[] = { PIN_INPUT_PULLUP }; + +static struct pinctrl_map __initdata nhk8815_pinmap[] = { + PIN_MAP_MUX_GROUP_DEFAULT("uart0", "pinctrl-stn8815", "u0_a_1", "u0"), + PIN_MAP_MUX_GROUP_DEFAULT("uart1", "pinctrl-stn8815", "u1_a_1", "u1"), + /* Hog in MMC/SD card mux */ + PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-stn8815", "mmcsd_a_1", "mmcsd"), + /* MCCLK */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO8_B10", out_low), + /* MCCMD */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO9_A10", in_pullup), + /* MCCMDDIR */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO10_C11", out_high), + /* MCDAT3-0 */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO11_B11", in_pullup), + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO12_A11", in_pullup), + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO13_C12", in_pullup), + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO14_B12", in_pullup), + /* MCDAT0DIR */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO15_A12", out_high), + /* MCDAT31DIR */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO16_C13", out_high), + /* MCMSFBCLK */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO24_C15", in_pullup), + /* CD input GPIO */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO111_H21", in_nopull), + /* CD bias drive */ + PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-stn8815", "GPIO112_J21", out_low), +}; + static void __init nhk8815_platform_init(void) { + pinctrl_register_mappings(nhk8815_pinmap, ARRAY_SIZE(nhk8815_pinmap)); cpu8815_platform_init(); nhk8815_onenand_init(); platform_add_devices(nhk8815_platform_devices, diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c index 6fd8e46567a4..b617eaed0ce5 100644 --- a/arch/arm/mach-nomadik/cpu-8815.c +++ b/arch/arm/mach-nomadik/cpu-8815.c @@ -83,6 +83,18 @@ void cpu8815_add_gpios(resource_size_t *base, int num, int irq, } } +static inline void +cpu8815_add_pinctrl(struct device *parent, const char *name) +{ + struct platform_device_info pdevinfo = { + .parent = parent, + .name = name, + .id = -1, + }; + + platform_device_register_full(&pdevinfo); +} + static int __init cpu8815_init(void) { struct nmk_gpio_platform_data pdata = { @@ -91,6 +103,7 @@ static int __init cpu8815_init(void) cpu8815_add_gpios(cpu8815_gpio_base, ARRAY_SIZE(cpu8815_gpio_base), IRQ_GPIO0, &pdata); + cpu8815_add_pinctrl(NULL, "pinctrl-stn8815"); amba_apb_device_add(NULL, "rng", NOMADIK_RNG_BASE, SZ_4K, 0, 0, NULL, 0); amba_apb_device_add(NULL, "rtc-pl031", NOMADIK_RTC_BASE, SZ_4K, IRQ_RTC_RTT, 0, NULL, 0); return 0; diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index 9a154bad1984..5a406f794798 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -579,8 +579,8 @@ static int sharpsl_ac_check(void) static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state) { sharpsl_pm.flags |= SHARPSL_SUSPENDED; - flush_delayed_work_sync(&toggle_charger); - flush_delayed_work_sync(&sharpsl_bat); + flush_delayed_work(&toggle_charger); + flush_delayed_work(&sharpsl_bat); if (sharpsl_pm.charge_mode == CHRG_ON) sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG; diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index 0df5ae6740c6..fe2c97c179d1 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -3,7 +3,7 @@ # # Common objects -obj-y := timer.o console.o clock.o common.o +obj-y := timer.o console.o clock.o # CPU objects obj-$(CONFIG_ARCH_SH7367) += setup-sh7367.o clock-sh7367.o intc-sh7367.o diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index bc3b5da59e25..790dc68c4312 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -1231,6 +1231,15 @@ static struct i2c_board_info i2c1_devices[] = { #define USCCR1 IOMEM(0xE6058144) static void __init ap4evb_init(void) { + struct pm_domain_device domain_devices[] = { + { "A4LC", &lcdc1_device, }, + { "A4LC", &lcdc_device, }, + { "A4MP", &fsi_device, }, + { "A3SP", &sh_mmcif_device, }, + { "A3SP", &sdhi0_device, }, + { "A3SP", &sdhi1_device, }, + { "A4R", &ceu_device, }, + }; u32 srcr4; struct clk *clk; @@ -1463,14 +1472,8 @@ static void __init ap4evb_init(void) platform_add_devices(ap4evb_devices, ARRAY_SIZE(ap4evb_devices)); - rmobile_add_device_to_domain(&sh7372_pd_a4lc, &lcdc1_device); - rmobile_add_device_to_domain(&sh7372_pd_a4lc, &lcdc_device); - rmobile_add_device_to_domain(&sh7372_pd_a4mp, &fsi_device); - - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sh_mmcif_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sdhi0_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sdhi1_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &ceu_device); + rmobile_add_devices_to_domains(domain_devices, + ARRAY_SIZE(domain_devices)); hdmi_init_pm_clock(); fsi_init_pm_clock(); @@ -1485,6 +1488,6 @@ MACHINE_START(AP4EVB, "ap4evb") .init_irq = sh7372_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = ap4evb_init, - .init_late = shmobile_init_late, + .init_late = sh7372_pm_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index c6593d394273..2912eab3b967 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -1209,10 +1209,10 @@ static void __init eva_init(void) eva_clock_init(); - rmobile_add_device_to_domain(&r8a7740_pd_a4lc, &lcdc0_device); - rmobile_add_device_to_domain(&r8a7740_pd_a4lc, &hdmi_lcdc_device); + rmobile_add_device_to_domain("A4LC", &lcdc0_device); + rmobile_add_device_to_domain("A4LC", &hdmi_lcdc_device); if (usb) - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, usb); + rmobile_add_device_to_domain("A3SP", usb); } static void __init eva_earlytimer_init(void) diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 773a2b95a4e0..0a43f3189c21 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -482,12 +482,10 @@ static struct gpio_keys_button gpio_buttons[] = { static struct gpio_keys_platform_data gpio_key_info = { .buttons = gpio_buttons, .nbuttons = ARRAY_SIZE(gpio_buttons), - .poll_interval = 250, /* poling at this point */ }; static struct platform_device gpio_keys_device = { - /* gpio-pcf857x.c driver doesn't support gpio_to_irq() */ - .name = "gpio-keys-polled", + .name = "gpio-keys", .dev = { .platform_data = &gpio_key_info, }, @@ -550,6 +548,7 @@ static struct platform_device fsi_ak4648_device = { /* I2C */ static struct pcf857x_platform_data pcf8575_pdata = { .gpio_base = GPIO_PCF8575_BASE, + .irq = intcs_evt2irq(0x3260), /* IRQ19 */ }; static struct i2c_board_info i2c0_devices[] = { diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 62783b5d8813..0c27c810cf99 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1412,6 +1412,22 @@ static struct i2c_board_info i2c1_devices[] = { #define USCCR1 IOMEM(0xE6058144) static void __init mackerel_init(void) { + struct pm_domain_device domain_devices[] = { + { "A4LC", &lcdc_device, }, + { "A4LC", &hdmi_lcdc_device, }, + { "A4LC", &meram_device, }, + { "A4MP", &fsi_device, }, + { "A3SP", &usbhs0_device, }, + { "A3SP", &usbhs1_device, }, + { "A3SP", &nand_flash_device, }, + { "A3SP", &sh_mmcif_device, }, + { "A3SP", &sdhi0_device, }, +#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) + { "A3SP", &sdhi1_device, }, +#endif + { "A3SP", &sdhi2_device, }, + { "A4R", &ceu_device, }, + }; u32 srcr4; struct clk *clk; @@ -1626,20 +1642,8 @@ static void __init mackerel_init(void) platform_add_devices(mackerel_devices, ARRAY_SIZE(mackerel_devices)); - rmobile_add_device_to_domain(&sh7372_pd_a4lc, &lcdc_device); - rmobile_add_device_to_domain(&sh7372_pd_a4lc, &hdmi_lcdc_device); - rmobile_add_device_to_domain(&sh7372_pd_a4lc, &meram_device); - rmobile_add_device_to_domain(&sh7372_pd_a4mp, &fsi_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &usbhs0_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &usbhs1_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &nand_flash_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sh_mmcif_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sdhi0_device); -#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sdhi1_device); -#endif - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &sdhi2_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &ceu_device); + rmobile_add_devices_to_domains(domain_devices, + ARRAY_SIZE(domain_devices)); hdmi_init_pm_clock(); sh7372_pm_init(); @@ -1653,6 +1657,6 @@ MACHINE_START(MACKEREL, "mackerel") .init_irq = sh7372_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = mackerel_init, - .init_late = shmobile_init_late, + .init_late = sh7372_pm_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/common.c b/arch/arm/mach-shmobile/common.c deleted file mode 100644 index 608aba9d60d7..000000000000 --- a/arch/arm/mach-shmobile/common.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <mach/common.h> - -void __init shmobile_init_late(void) -{ - shmobile_suspend_init(); - shmobile_cpuidle_init(); -} diff --git a/arch/arm/mach-shmobile/cpuidle.c b/arch/arm/mach-shmobile/cpuidle.c index 7b541e911ab4..9e050268cde4 100644 --- a/arch/arm/mach-shmobile/cpuidle.c +++ b/arch/arm/mach-shmobile/cpuidle.c @@ -16,51 +16,38 @@ #include <asm/cpuidle.h> #include <asm/io.h> -static void shmobile_enter_wfi(void) +int shmobile_enter_wfi(struct cpuidle_device *dev, struct cpuidle_driver *drv, + int index) { cpu_do_idle(); -} - -void (*shmobile_cpuidle_modes[CPUIDLE_STATE_MAX])(void) = { - shmobile_enter_wfi, /* regular sleep mode */ -}; - -static int shmobile_cpuidle_enter(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) -{ - shmobile_cpuidle_modes[index](); - - return index; + return 0; } static struct cpuidle_device shmobile_cpuidle_dev; -static struct cpuidle_driver shmobile_cpuidle_driver = { +static struct cpuidle_driver shmobile_cpuidle_default_driver = { .name = "shmobile_cpuidle", .owner = THIS_MODULE, .en_core_tk_irqen = 1, .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[0].enter = shmobile_enter_wfi, .safe_state_index = 0, /* C1 */ .state_count = 1, }; -void (*shmobile_cpuidle_setup)(struct cpuidle_driver *drv); +static struct cpuidle_driver *cpuidle_drv = &shmobile_cpuidle_default_driver; + +void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv) +{ + cpuidle_drv = drv; +} int shmobile_cpuidle_init(void) { struct cpuidle_device *dev = &shmobile_cpuidle_dev; - struct cpuidle_driver *drv = &shmobile_cpuidle_driver; - int i; - - for (i = 0; i < CPUIDLE_STATE_MAX; i++) - drv->states[i].enter = shmobile_cpuidle_enter; - - if (shmobile_cpuidle_setup) - shmobile_cpuidle_setup(drv); - cpuidle_register_driver(drv); + cpuidle_register_driver(cpuidle_drv); - dev->state_count = drv->state_count; + dev->state_count = cpuidle_drv->state_count; cpuidle_register_device(dev); return 0; diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index f80f9c549393..ed77ab8c9143 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -13,8 +13,10 @@ extern int shmobile_clk_init(void); extern void shmobile_handle_irq_intc(struct pt_regs *); extern struct platform_suspend_ops shmobile_suspend_ops; struct cpuidle_driver; -extern void (*shmobile_cpuidle_modes[])(void); -extern void (*shmobile_cpuidle_setup)(struct cpuidle_driver *drv); +struct cpuidle_device; +extern int shmobile_enter_wfi(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index); +extern void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv); extern void sh7367_init_irq(void); extern void sh7367_map_io(void); @@ -75,8 +77,6 @@ extern void r8a7740_meram_workaround(void); extern void r8a7779_register_twd(void); -extern void shmobile_init_late(void); - #ifdef CONFIG_SUSPEND int shmobile_suspend_init(void); #else @@ -100,4 +100,10 @@ static inline int shmobile_cpu_is_dead(unsigned int cpu) { return 1; } extern void shmobile_smp_init_cpus(unsigned int ncores); +static inline void shmobile_init_late(void) +{ + shmobile_suspend_init(); + shmobile_cpuidle_init(); +} + #endif /* __ARCH_MACH_COMMON_H */ diff --git a/arch/arm/mach-shmobile/include/mach/pm-rmobile.h b/arch/arm/mach-shmobile/include/mach/pm-rmobile.h index 5a402840fe28..690553a06887 100644 --- a/arch/arm/mach-shmobile/include/mach/pm-rmobile.h +++ b/arch/arm/mach-shmobile/include/mach/pm-rmobile.h @@ -12,6 +12,8 @@ #include <linux/pm_domain.h> +#define DEFAULT_DEV_LATENCY_NS 250000 + struct platform_device; struct rmobile_pm_domain { @@ -29,16 +31,33 @@ struct rmobile_pm_domain *to_rmobile_pd(struct generic_pm_domain *d) return container_of(d, struct rmobile_pm_domain, genpd); } +struct pm_domain_device { + const char *domain_name; + struct platform_device *pdev; +}; + #ifdef CONFIG_PM -extern void rmobile_init_pm_domain(struct rmobile_pm_domain *rmobile_pd); -extern void rmobile_add_device_to_domain(struct rmobile_pm_domain *rmobile_pd, - struct platform_device *pdev); -extern void rmobile_pm_add_subdomain(struct rmobile_pm_domain *rmobile_pd, - struct rmobile_pm_domain *rmobile_sd); +extern void rmobile_init_domains(struct rmobile_pm_domain domains[], int num); +extern void rmobile_add_device_to_domain_td(const char *domain_name, + struct platform_device *pdev, + struct gpd_timing_data *td); + +static inline void rmobile_add_device_to_domain(const char *domain_name, + struct platform_device *pdev) +{ + rmobile_add_device_to_domain_td(domain_name, pdev, NULL); +} + +extern void rmobile_add_devices_to_domains(struct pm_domain_device data[], + int size); #else -#define rmobile_init_pm_domain(pd) do { } while (0) -#define rmobile_add_device_to_domain(pd, pdev) do { } while (0) -#define rmobile_pm_add_subdomain(pd, sd) do { } while (0) + +#define rmobile_init_domains(domains, num) do { } while (0) +#define rmobile_add_device_to_domain_td(name, pdev, td) do { } while (0) +#define rmobile_add_device_to_domain(name, pdev) do { } while (0) + +static inline void rmobile_add_devices_to_domains(struct pm_domain_device d[], + int size) {} #endif /* CONFIG_PM */ #endif /* PM_RMOBILE_H */ diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h index 7143147780df..59d252f4cf97 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7740.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h @@ -607,9 +607,9 @@ enum { }; #ifdef CONFIG_PM -extern struct rmobile_pm_domain r8a7740_pd_a4s; -extern struct rmobile_pm_domain r8a7740_pd_a3sp; -extern struct rmobile_pm_domain r8a7740_pd_a4lc; +extern void __init r8a7740_init_pm_domains(void); +#else +static inline void r8a7740_init_pm_domains(void) {} #endif /* CONFIG_PM */ #endif /* __ASM_R8A7740_H__ */ diff --git a/arch/arm/mach-shmobile/include/mach/r8a7779.h b/arch/arm/mach-shmobile/include/mach/r8a7779.h index f504c5e81b47..499f52d2a4a1 100644 --- a/arch/arm/mach-shmobile/include/mach/r8a7779.h +++ b/arch/arm/mach-shmobile/include/mach/r8a7779.h @@ -347,17 +347,9 @@ extern int r8a7779_sysc_power_down(struct r8a7779_pm_ch *r8a7779_ch); extern int r8a7779_sysc_power_up(struct r8a7779_pm_ch *r8a7779_ch); #ifdef CONFIG_PM -extern struct r8a7779_pm_domain r8a7779_sh4a; -extern struct r8a7779_pm_domain r8a7779_sgx; -extern struct r8a7779_pm_domain r8a7779_vdp1; -extern struct r8a7779_pm_domain r8a7779_impx3; - -extern void r8a7779_init_pm_domain(struct r8a7779_pm_domain *r8a7779_pd); -extern void r8a7779_add_device_to_domain(struct r8a7779_pm_domain *r8a7779_pd, - struct platform_device *pdev); +extern void __init r8a7779_init_pm_domains(void); #else -#define r8a7779_init_pm_domain(pd) do { } while (0) -#define r8a7779_add_device_to_domain(pd, pdev) do { } while (0) +static inline void r8a7779_init_pm_domains(void) {} #endif /* CONFIG_PM */ extern struct smp_operations r8a7779_smp_ops; diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index b59048e6d8fd..eb98b45c5089 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -478,21 +478,17 @@ extern struct clk sh7372_fsibck_clk; extern struct clk sh7372_fsidiva_clk; extern struct clk sh7372_fsidivb_clk; -#ifdef CONFIG_PM -extern struct rmobile_pm_domain sh7372_pd_a4lc; -extern struct rmobile_pm_domain sh7372_pd_a4mp; -extern struct rmobile_pm_domain sh7372_pd_d4; -extern struct rmobile_pm_domain sh7372_pd_a4r; -extern struct rmobile_pm_domain sh7372_pd_a3rv; -extern struct rmobile_pm_domain sh7372_pd_a3ri; -extern struct rmobile_pm_domain sh7372_pd_a4s; -extern struct rmobile_pm_domain sh7372_pd_a3sp; -extern struct rmobile_pm_domain sh7372_pd_a3sg; -#endif /* CONFIG_PM */ - extern void sh7372_intcs_suspend(void); extern void sh7372_intcs_resume(void); extern void sh7372_intca_suspend(void); extern void sh7372_intca_resume(void); +#ifdef CONFIG_PM +extern void __init sh7372_init_pm_domains(void); +#else +static inline void sh7372_init_pm_domains(void) {} +#endif + +extern void __init sh7372_pm_init_late(void); + #endif /* __ASM_SH7372_H__ */ diff --git a/arch/arm/mach-shmobile/pm-r8a7740.c b/arch/arm/mach-shmobile/pm-r8a7740.c index 893504d012a6..21e5316d2d88 100644 --- a/arch/arm/mach-shmobile/pm-r8a7740.c +++ b/arch/arm/mach-shmobile/pm-r8a7740.c @@ -21,14 +21,6 @@ static int r8a7740_pd_a4s_suspend(void) return -EBUSY; } -struct rmobile_pm_domain r8a7740_pd_a4s = { - .genpd.name = "A4S", - .bit_shift = 10, - .gov = &pm_domain_always_on_gov, - .no_debug = true, - .suspend = r8a7740_pd_a4s_suspend, -}; - static int r8a7740_pd_a3sp_suspend(void) { /* @@ -38,17 +30,31 @@ static int r8a7740_pd_a3sp_suspend(void) return console_suspend_enabled ? 0 : -EBUSY; } -struct rmobile_pm_domain r8a7740_pd_a3sp = { - .genpd.name = "A3SP", - .bit_shift = 11, - .gov = &pm_domain_always_on_gov, - .no_debug = true, - .suspend = r8a7740_pd_a3sp_suspend, +static struct rmobile_pm_domain r8a7740_pm_domains[] = { + { + .genpd.name = "A4S", + .bit_shift = 10, + .gov = &pm_domain_always_on_gov, + .no_debug = true, + .suspend = r8a7740_pd_a4s_suspend, + }, + { + .genpd.name = "A3SP", + .bit_shift = 11, + .gov = &pm_domain_always_on_gov, + .no_debug = true, + .suspend = r8a7740_pd_a3sp_suspend, + }, + { + .genpd.name = "A4LC", + .bit_shift = 1, + }, }; -struct rmobile_pm_domain r8a7740_pd_a4lc = { - .genpd.name = "A4LC", - .bit_shift = 1, -}; +void __init r8a7740_init_pm_domains(void) +{ + rmobile_init_domains(r8a7740_pm_domains, ARRAY_SIZE(r8a7740_pm_domains)); + pm_genpd_add_subdomain_names("A4S", "A3SP"); +} #endif /* CONFIG_PM */ diff --git a/arch/arm/mach-shmobile/pm-r8a7779.c b/arch/arm/mach-shmobile/pm-r8a7779.c index a18a4ae16d2b..d50a8e9b94a4 100644 --- a/arch/arm/mach-shmobile/pm-r8a7779.c +++ b/arch/arm/mach-shmobile/pm-r8a7779.c @@ -183,7 +183,7 @@ static bool pd_active_wakeup(struct device *dev) return true; } -void r8a7779_init_pm_domain(struct r8a7779_pm_domain *r8a7779_pd) +static void r8a7779_init_pm_domain(struct r8a7779_pm_domain *r8a7779_pd) { struct generic_pm_domain *genpd = &r8a7779_pd->genpd; @@ -199,43 +199,44 @@ void r8a7779_init_pm_domain(struct r8a7779_pm_domain *r8a7779_pd) pd_power_up(&r8a7779_pd->genpd); } -void r8a7779_add_device_to_domain(struct r8a7779_pm_domain *r8a7779_pd, - struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - - pm_genpd_add_device(&r8a7779_pd->genpd, dev); - if (pm_clk_no_clocks(dev)) - pm_clk_add(dev, NULL); -} - -struct r8a7779_pm_domain r8a7779_sh4a = { - .ch = { - .chan_offs = 0x80, /* PWRSR1 .. PWRER1 */ - .isr_bit = 16, /* SH4A */ - } -}; - -struct r8a7779_pm_domain r8a7779_sgx = { - .ch = { - .chan_offs = 0xc0, /* PWRSR2 .. PWRER2 */ - .isr_bit = 20, /* SGX */ - } +static struct r8a7779_pm_domain r8a7779_pm_domains[] = { + { + .genpd.name = "SH4A", + .ch = { + .chan_offs = 0x80, /* PWRSR1 .. PWRER1 */ + .isr_bit = 16, /* SH4A */ + }, + }, + { + .genpd.name = "SGX", + .ch = { + .chan_offs = 0xc0, /* PWRSR2 .. PWRER2 */ + .isr_bit = 20, /* SGX */ + }, + }, + { + .genpd.name = "VDP1", + .ch = { + .chan_offs = 0x100, /* PWRSR3 .. PWRER3 */ + .isr_bit = 21, /* VDP */ + }, + }, + { + .genpd.name = "IMPX3", + .ch = { + .chan_offs = 0x140, /* PWRSR4 .. PWRER4 */ + .isr_bit = 24, /* IMP */ + }, + }, }; -struct r8a7779_pm_domain r8a7779_vdp1 = { - .ch = { - .chan_offs = 0x100, /* PWRSR3 .. PWRER3 */ - .isr_bit = 21, /* VDP */ - } -}; +void __init r8a7779_init_pm_domains(void) +{ + int j; -struct r8a7779_pm_domain r8a7779_impx3 = { - .ch = { - .chan_offs = 0x140, /* PWRSR4 .. PWRER4 */ - .isr_bit = 24, /* IMP */ - } -}; + for (j = 0; j < ARRAY_SIZE(r8a7779_pm_domains); j++) + r8a7779_init_pm_domain(&r8a7779_pm_domains[j]); +} #endif /* CONFIG_PM */ diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c index 32e177275e47..1fc05d9453d0 100644 --- a/arch/arm/mach-shmobile/pm-rmobile.c +++ b/arch/arm/mach-shmobile/pm-rmobile.c @@ -134,7 +134,7 @@ static int rmobile_pd_start_dev(struct device *dev) return ret; } -void rmobile_init_pm_domain(struct rmobile_pm_domain *rmobile_pd) +static void rmobile_init_pm_domain(struct rmobile_pm_domain *rmobile_pd) { struct generic_pm_domain *genpd = &rmobile_pd->genpd; struct dev_power_governor *gov = rmobile_pd->gov; @@ -149,19 +149,38 @@ void rmobile_init_pm_domain(struct rmobile_pm_domain *rmobile_pd) __rmobile_pd_power_up(rmobile_pd, false); } -void rmobile_add_device_to_domain(struct rmobile_pm_domain *rmobile_pd, - struct platform_device *pdev) +void rmobile_init_domains(struct rmobile_pm_domain domains[], int num) +{ + int j; + + for (j = 0; j < num; j++) + rmobile_init_pm_domain(&domains[j]); +} + +void rmobile_add_device_to_domain_td(const char *domain_name, + struct platform_device *pdev, + struct gpd_timing_data *td) { struct device *dev = &pdev->dev; - pm_genpd_add_device(&rmobile_pd->genpd, dev); + __pm_genpd_name_add_device(domain_name, dev, td); if (pm_clk_no_clocks(dev)) pm_clk_add(dev, NULL); } -void rmobile_pm_add_subdomain(struct rmobile_pm_domain *rmobile_pd, - struct rmobile_pm_domain *rmobile_sd) +void rmobile_add_devices_to_domains(struct pm_domain_device data[], + int size) { - pm_genpd_add_subdomain(&rmobile_pd->genpd, &rmobile_sd->genpd); + struct gpd_timing_data latencies = { + .stop_latency_ns = DEFAULT_DEV_LATENCY_NS, + .start_latency_ns = DEFAULT_DEV_LATENCY_NS, + .save_state_latency_ns = DEFAULT_DEV_LATENCY_NS, + .restore_state_latency_ns = DEFAULT_DEV_LATENCY_NS, + }; + int j; + + for (j = 0; j < size; j++) + rmobile_add_device_to_domain_td(data[j].domain_name, + data[j].pdev, &latencies); } #endif /* CONFIG_PM */ diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 162121842a2b..a0826a48dd08 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -21,6 +21,7 @@ #include <linux/irq.h> #include <linux/bitrev.h> #include <linux/console.h> +#include <asm/cpuidle.h> #include <asm/io.h> #include <asm/tlbflush.h> #include <asm/suspend.h> @@ -72,20 +73,7 @@ #ifdef CONFIG_PM -struct rmobile_pm_domain sh7372_pd_a4lc = { - .genpd.name = "A4LC", - .bit_shift = 1, -}; - -struct rmobile_pm_domain sh7372_pd_a4mp = { - .genpd.name = "A4MP", - .bit_shift = 2, -}; - -struct rmobile_pm_domain sh7372_pd_d4 = { - .genpd.name = "D4", - .bit_shift = 3, -}; +#define PM_DOMAIN_ON_OFF_LATENCY_NS 250000 static int sh7372_a4r_pd_suspend(void) { @@ -94,39 +82,25 @@ static int sh7372_a4r_pd_suspend(void) return 0; } -struct rmobile_pm_domain sh7372_pd_a4r = { - .genpd.name = "A4R", - .bit_shift = 5, - .suspend = sh7372_a4r_pd_suspend, - .resume = sh7372_intcs_resume, -}; +static bool a4s_suspend_ready; -struct rmobile_pm_domain sh7372_pd_a3rv = { - .genpd.name = "A3RV", - .bit_shift = 6, -}; - -struct rmobile_pm_domain sh7372_pd_a3ri = { - .genpd.name = "A3RI", - .bit_shift = 8, -}; - -static int sh7372_pd_a4s_suspend(void) +static int sh7372_a4s_pd_suspend(void) { /* * The A4S domain contains the CPU core and therefore it should - * only be turned off if the CPU is in use. + * only be turned off if the CPU is not in use. This may happen + * during system suspend, when SYSC is going to be used for generating + * resume signals and a4s_suspend_ready is set to let + * sh7372_enter_suspend() know that it can turn A4S off. */ + a4s_suspend_ready = true; return -EBUSY; } -struct rmobile_pm_domain sh7372_pd_a4s = { - .genpd.name = "A4S", - .bit_shift = 10, - .gov = &pm_domain_always_on_gov, - .no_debug = true, - .suspend = sh7372_pd_a4s_suspend, -}; +static void sh7372_a4s_pd_resume(void) +{ + a4s_suspend_ready = false; +} static int sh7372_a3sp_pd_suspend(void) { @@ -137,18 +111,80 @@ static int sh7372_a3sp_pd_suspend(void) return console_suspend_enabled ? 0 : -EBUSY; } -struct rmobile_pm_domain sh7372_pd_a3sp = { - .genpd.name = "A3SP", - .bit_shift = 11, - .gov = &pm_domain_always_on_gov, - .no_debug = true, - .suspend = sh7372_a3sp_pd_suspend, +static struct rmobile_pm_domain sh7372_pm_domains[] = { + { + .genpd.name = "A4LC", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 1, + }, + { + .genpd.name = "A4MP", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 2, + }, + { + .genpd.name = "D4", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 3, + }, + { + .genpd.name = "A4R", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 5, + .suspend = sh7372_a4r_pd_suspend, + .resume = sh7372_intcs_resume, + }, + { + .genpd.name = "A3RV", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 6, + }, + { + .genpd.name = "A3RI", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 8, + }, + { + .genpd.name = "A4S", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 10, + .gov = &pm_domain_always_on_gov, + .no_debug = true, + .suspend = sh7372_a4s_pd_suspend, + .resume = sh7372_a4s_pd_resume, + }, + { + .genpd.name = "A3SP", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 11, + .gov = &pm_domain_always_on_gov, + .no_debug = true, + .suspend = sh7372_a3sp_pd_suspend, + }, + { + .genpd.name = "A3SG", + .genpd.power_on_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .genpd.power_off_latency_ns = PM_DOMAIN_ON_OFF_LATENCY_NS, + .bit_shift = 13, + }, }; -struct rmobile_pm_domain sh7372_pd_a3sg = { - .genpd.name = "A3SG", - .bit_shift = 13, -}; +void __init sh7372_init_pm_domains(void) +{ + rmobile_init_domains(sh7372_pm_domains, ARRAY_SIZE(sh7372_pm_domains)); + pm_genpd_add_subdomain_names("A4LC", "A3RV"); + pm_genpd_add_subdomain_names("A4R", "A4LC"); + pm_genpd_add_subdomain_names("A4S", "A3SG"); + pm_genpd_add_subdomain_names("A4S", "A3SP"); +} #endif /* CONFIG_PM */ @@ -304,6 +340,21 @@ static void sh7372_enter_a3sm_common(int pllc0_on) sh7372_set_reset_vector(__pa(sh7372_resume_core_standby_sysc)); sh7372_enter_sysc(pllc0_on, 1 << 12); } + +static void sh7372_enter_a4s_common(int pllc0_on) +{ + sh7372_intca_suspend(); + sh7372_set_reset_vector(SMFRAM); + sh7372_enter_sysc(pllc0_on, 1 << 10); + sh7372_intca_resume(); +} + +static void sh7372_pm_setup_smfram(void) +{ + memcpy((void *)SMFRAM, sh7372_resume_core_standby_sysc, 0x100); +} +#else +static inline void sh7372_pm_setup_smfram(void) {} #endif /* CONFIG_SUSPEND || CONFIG_CPU_IDLE */ #ifdef CONFIG_CPU_IDLE @@ -313,7 +364,8 @@ static int sh7372_do_idle_core_standby(unsigned long unused) return 0; } -static void sh7372_enter_core_standby(void) +static int sh7372_enter_core_standby(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { sh7372_set_reset_vector(__pa(sh7372_resume_core_standby_sysc)); @@ -324,83 +376,102 @@ static void sh7372_enter_core_standby(void) /* disable reset vector translation */ __raw_writel(0, SBAR); + + return 1; } -static void sh7372_enter_a3sm_pll_on(void) +static int sh7372_enter_a3sm_pll_on(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { sh7372_enter_a3sm_common(1); + return 2; } -static void sh7372_enter_a3sm_pll_off(void) +static int sh7372_enter_a3sm_pll_off(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { sh7372_enter_a3sm_common(0); + return 3; } -static void sh7372_cpuidle_setup(struct cpuidle_driver *drv) +static int sh7372_enter_a4s(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { - struct cpuidle_state *state = &drv->states[drv->state_count]; - - snprintf(state->name, CPUIDLE_NAME_LEN, "C2"); - strncpy(state->desc, "Core Standby Mode", CPUIDLE_DESC_LEN); - state->exit_latency = 10; - state->target_residency = 20 + 10; - state->flags = CPUIDLE_FLAG_TIME_VALID; - shmobile_cpuidle_modes[drv->state_count] = sh7372_enter_core_standby; - drv->state_count++; - - state = &drv->states[drv->state_count]; - snprintf(state->name, CPUIDLE_NAME_LEN, "C3"); - strncpy(state->desc, "A3SM PLL ON", CPUIDLE_DESC_LEN); - state->exit_latency = 20; - state->target_residency = 30 + 20; - state->flags = CPUIDLE_FLAG_TIME_VALID; - shmobile_cpuidle_modes[drv->state_count] = sh7372_enter_a3sm_pll_on; - drv->state_count++; - - state = &drv->states[drv->state_count]; - snprintf(state->name, CPUIDLE_NAME_LEN, "C4"); - strncpy(state->desc, "A3SM PLL OFF", CPUIDLE_DESC_LEN); - state->exit_latency = 120; - state->target_residency = 30 + 120; - state->flags = CPUIDLE_FLAG_TIME_VALID; - shmobile_cpuidle_modes[drv->state_count] = sh7372_enter_a3sm_pll_off; - drv->state_count++; + unsigned long msk, msk2; + + if (!sh7372_sysc_valid(&msk, &msk2)) + return sh7372_enter_a3sm_pll_off(dev, drv, index); + + sh7372_setup_sysc(msk, msk2); + sh7372_enter_a4s_common(0); + return 4; } +static struct cpuidle_driver sh7372_cpuidle_driver = { + .name = "sh7372_cpuidle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, + .state_count = 5, + .safe_state_index = 0, /* C1 */ + .states[0] = ARM_CPUIDLE_WFI_STATE, + .states[0].enter = shmobile_enter_wfi, + .states[1] = { + .name = "C2", + .desc = "Core Standby Mode", + .exit_latency = 10, + .target_residency = 20 + 10, + .flags = CPUIDLE_FLAG_TIME_VALID, + .enter = sh7372_enter_core_standby, + }, + .states[2] = { + .name = "C3", + .desc = "A3SM PLL ON", + .exit_latency = 20, + .target_residency = 30 + 20, + .flags = CPUIDLE_FLAG_TIME_VALID, + .enter = sh7372_enter_a3sm_pll_on, + }, + .states[3] = { + .name = "C4", + .desc = "A3SM PLL OFF", + .exit_latency = 120, + .target_residency = 30 + 120, + .flags = CPUIDLE_FLAG_TIME_VALID, + .enter = sh7372_enter_a3sm_pll_off, + }, + .states[4] = { + .name = "C5", + .desc = "A4S PLL OFF", + .exit_latency = 240, + .target_residency = 30 + 240, + .flags = CPUIDLE_FLAG_TIME_VALID, + .enter = sh7372_enter_a4s, + .disabled = true, + }, +}; + static void sh7372_cpuidle_init(void) { - shmobile_cpuidle_setup = sh7372_cpuidle_setup; + shmobile_cpuidle_set_driver(&sh7372_cpuidle_driver); } #else static void sh7372_cpuidle_init(void) {} #endif #ifdef CONFIG_SUSPEND -static void sh7372_enter_a4s_common(int pllc0_on) -{ - sh7372_intca_suspend(); - memcpy((void *)SMFRAM, sh7372_resume_core_standby_sysc, 0x100); - sh7372_set_reset_vector(SMFRAM); - sh7372_enter_sysc(pllc0_on, 1 << 10); - sh7372_intca_resume(); -} - static int sh7372_enter_suspend(suspend_state_t suspend_state) { unsigned long msk, msk2; /* check active clocks to determine potential wakeup sources */ - if (sh7372_sysc_valid(&msk, &msk2)) { - if (!console_suspend_enabled && - sh7372_pd_a4s.genpd.status == GPD_STATE_POWER_OFF) { - /* convert INTC mask/sense to SYSC mask/sense */ - sh7372_setup_sysc(msk, msk2); - - /* enter A4S sleep with PLLC0 off */ - pr_debug("entering A4S\n"); - sh7372_enter_a4s_common(0); - return 0; - } + if (sh7372_sysc_valid(&msk, &msk2) && a4s_suspend_ready) { + /* convert INTC mask/sense to SYSC mask/sense */ + sh7372_setup_sysc(msk, msk2); + + /* enter A4S sleep with PLLC0 off */ + pr_debug("entering A4S\n"); + sh7372_enter_a4s_common(0); + return 0; } /* default to enter A3SM sleep with PLLC0 off */ @@ -426,7 +497,7 @@ static int sh7372_pm_notifier_fn(struct notifier_block *notifier, * executed during system suspend and resume, respectively, so * that those functions don't crash while accessing the INTCS. */ - pm_genpd_poweron(&sh7372_pd_a4r.genpd); + pm_genpd_name_poweron("A4R"); break; case PM_POST_SUSPEND: pm_genpd_poweroff_unused(); @@ -455,6 +526,14 @@ void __init sh7372_pm_init(void) /* do not convert A3SM, A3SP, A3SG, A4R power down into A4S */ __raw_writel(0, PDNSEL); + sh7372_pm_setup_smfram(); + sh7372_suspend_init(); sh7372_cpuidle_init(); } + +void __init sh7372_pm_init_late(void) +{ + shmobile_init_late(); + pm_genpd_name_attach_cpuidle("A4S", 4); +} diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c index 78948a9dba0e..11bb1d984197 100644 --- a/arch/arm/mach-shmobile/setup-r8a7740.c +++ b/arch/arm/mach-shmobile/setup-r8a7740.c @@ -673,12 +673,7 @@ void __init r8a7740_add_standard_devices(void) r8a7740_i2c_workaround(&i2c0_device); r8a7740_i2c_workaround(&i2c1_device); - /* PM domain */ - rmobile_init_pm_domain(&r8a7740_pd_a4s); - rmobile_init_pm_domain(&r8a7740_pd_a3sp); - rmobile_init_pm_domain(&r8a7740_pd_a4lc); - - rmobile_pm_add_subdomain(&r8a7740_pd_a4s, &r8a7740_pd_a3sp); + r8a7740_init_pm_domains(); /* add devices */ platform_add_devices(r8a7740_early_devices, @@ -688,16 +683,16 @@ void __init r8a7740_add_standard_devices(void) /* add devices to PM domain */ - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif0_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif1_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif2_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif3_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif4_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif5_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif6_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scif7_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &scifb_device); - rmobile_add_device_to_domain(&r8a7740_pd_a3sp, &i2c1_device); + rmobile_add_device_to_domain("A3SP", &scif0_device); + rmobile_add_device_to_domain("A3SP", &scif1_device); + rmobile_add_device_to_domain("A3SP", &scif2_device); + rmobile_add_device_to_domain("A3SP", &scif3_device); + rmobile_add_device_to_domain("A3SP", &scif4_device); + rmobile_add_device_to_domain("A3SP", &scif5_device); + rmobile_add_device_to_domain("A3SP", &scif6_device); + rmobile_add_device_to_domain("A3SP", &scif7_device); + rmobile_add_device_to_domain("A3SP", &scifb_device); + rmobile_add_device_to_domain("A3SP", &i2c1_device); } static void __init r8a7740_earlytimer_init(void) diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index e98e46f6cf55..2917668f0091 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -251,10 +251,7 @@ void __init r8a7779_add_standard_devices(void) #endif r8a7779_pm_init(); - r8a7779_init_pm_domain(&r8a7779_sh4a); - r8a7779_init_pm_domain(&r8a7779_sgx); - r8a7779_init_pm_domain(&r8a7779_vdp1); - r8a7779_init_pm_domain(&r8a7779_impx3); + r8a7779_init_pm_domains(); platform_add_devices(r8a7779_early_devices, ARRAY_SIZE(r8a7779_early_devices)); diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index 838a87be1d5c..a07954fbcd22 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -1001,21 +1001,34 @@ static struct platform_device *sh7372_late_devices[] __initdata = { void __init sh7372_add_standard_devices(void) { - rmobile_init_pm_domain(&sh7372_pd_a4lc); - rmobile_init_pm_domain(&sh7372_pd_a4mp); - rmobile_init_pm_domain(&sh7372_pd_d4); - rmobile_init_pm_domain(&sh7372_pd_a4r); - rmobile_init_pm_domain(&sh7372_pd_a3rv); - rmobile_init_pm_domain(&sh7372_pd_a3ri); - rmobile_init_pm_domain(&sh7372_pd_a4s); - rmobile_init_pm_domain(&sh7372_pd_a3sp); - rmobile_init_pm_domain(&sh7372_pd_a3sg); - - rmobile_pm_add_subdomain(&sh7372_pd_a4lc, &sh7372_pd_a3rv); - rmobile_pm_add_subdomain(&sh7372_pd_a4r, &sh7372_pd_a4lc); - - rmobile_pm_add_subdomain(&sh7372_pd_a4s, &sh7372_pd_a3sg); - rmobile_pm_add_subdomain(&sh7372_pd_a4s, &sh7372_pd_a3sp); + struct pm_domain_device domain_devices[] = { + { "A3RV", &vpu_device, }, + { "A4MP", &spu0_device, }, + { "A4MP", &spu1_device, }, + { "A3SP", &scif0_device, }, + { "A3SP", &scif1_device, }, + { "A3SP", &scif2_device, }, + { "A3SP", &scif3_device, }, + { "A3SP", &scif4_device, }, + { "A3SP", &scif5_device, }, + { "A3SP", &scif6_device, }, + { "A3SP", &iic1_device, }, + { "A3SP", &dma0_device, }, + { "A3SP", &dma1_device, }, + { "A3SP", &dma2_device, }, + { "A3SP", &usb_dma0_device, }, + { "A3SP", &usb_dma1_device, }, + { "A4R", &iic0_device, }, + { "A4R", &veu0_device, }, + { "A4R", &veu1_device, }, + { "A4R", &veu2_device, }, + { "A4R", &veu3_device, }, + { "A4R", &jpu_device, }, + { "A4R", &tmu00_device, }, + { "A4R", &tmu01_device, }, + }; + + sh7372_init_pm_domains(); platform_add_devices(sh7372_early_devices, ARRAY_SIZE(sh7372_early_devices)); @@ -1023,30 +1036,8 @@ void __init sh7372_add_standard_devices(void) platform_add_devices(sh7372_late_devices, ARRAY_SIZE(sh7372_late_devices)); - rmobile_add_device_to_domain(&sh7372_pd_a3rv, &vpu_device); - rmobile_add_device_to_domain(&sh7372_pd_a4mp, &spu0_device); - rmobile_add_device_to_domain(&sh7372_pd_a4mp, &spu1_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif0_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif1_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif2_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif3_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif4_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif5_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &scif6_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &iic1_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &dma0_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &dma1_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &dma2_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &usb_dma0_device); - rmobile_add_device_to_domain(&sh7372_pd_a3sp, &usb_dma1_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &iic0_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &veu0_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &veu1_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &veu2_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &veu3_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &jpu_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &tmu00_device); - rmobile_add_device_to_domain(&sh7372_pd_a4r, &tmu01_device); + rmobile_add_devices_to_domains(domain_devices, + ARRAY_SIZE(domain_devices)); } static void __init sh7372_earlytimer_init(void) diff --git a/arch/arm/mach-tegra/include/mach/kbc.h b/arch/arm/mach-tegra/include/mach/kbc.h deleted file mode 100644 index a13025612939..000000000000 --- a/arch/arm/mach-tegra/include/mach/kbc.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Platform definitions for tegra-kbc keyboard input driver - * - * Copyright (c) 2010-2011, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef ASMARM_ARCH_TEGRA_KBC_H -#define ASMARM_ARCH_TEGRA_KBC_H - -#include <linux/types.h> -#include <linux/input/matrix_keypad.h> - -#define KBC_MAX_GPIO 24 -#define KBC_MAX_KPENT 8 - -#define KBC_MAX_ROW 16 -#define KBC_MAX_COL 8 -#define KBC_MAX_KEY (KBC_MAX_ROW * KBC_MAX_COL) - -enum tegra_pin_type { - PIN_CFG_IGNORE, - PIN_CFG_COL, - PIN_CFG_ROW, -}; - -struct tegra_kbc_pin_cfg { - enum tegra_pin_type type; - unsigned char num; -}; - -struct tegra_kbc_wake_key { - u8 row:4; - u8 col:4; -}; - -struct tegra_kbc_platform_data { - unsigned int debounce_cnt; - unsigned int repeat_cnt; - - struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO]; - const struct matrix_keymap_data *keymap_data; - - u32 wakeup_key; - bool wakeup; - bool use_fn_map; - bool use_ghost_filter; -}; -#endif diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index ef6f602b7e48..b8efac4daed8 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -1557,9 +1557,6 @@ static struct u300_mux_hog u300_mux_hogs[] = { .dev = &uart0_device.dev, }, { - .dev = &pl022_device.dev, - }, - { .dev = &mmcsd_device.dev, }, }; diff --git a/arch/arm/mach-ux500/board-mop500-pins.c b/arch/arm/mach-ux500/board-mop500-pins.c index 32fd99204464..a267c6d30e37 100644 --- a/arch/arm/mach-ux500/board-mop500-pins.c +++ b/arch/arm/mach-ux500/board-mop500-pins.c @@ -30,16 +30,15 @@ static enum custom_pin_cfg_t pinsfor; #define BIAS(a,b) static unsigned long a[] = { b } BIAS(pd, PIN_PULL_DOWN); -BIAS(slpm_gpio_nopull, PIN_SLPM_GPIO|PIN_SLPM_INPUT_NOPULL); BIAS(in_nopull, PIN_INPUT_NOPULL); -BIAS(in_nopull_sleep_nowkup, PIN_INPUT_NOPULL|PIN_SLPM_WAKEUP_DISABLE); +BIAS(in_nopull_slpm_nowkup, PIN_INPUT_NOPULL|PIN_SLPM_WAKEUP_DISABLE); BIAS(in_pu, PIN_INPUT_PULLUP); BIAS(in_pd, PIN_INPUT_PULLDOWN); BIAS(in_pd_slpm_in_pu, PIN_INPUT_PULLDOWN|PIN_SLPM_INPUT_PULLUP); BIAS(in_pu_slpm_out_lo, PIN_INPUT_PULLUP|PIN_SLPM_OUTPUT_LOW); BIAS(out_hi, PIN_OUTPUT_HIGH); BIAS(out_lo, PIN_OUTPUT_LOW); -BIAS(out_lo_sleep_nowkup, PIN_OUTPUT_LOW|PIN_SLPM_WAKEUP_DISABLE); +BIAS(out_lo_slpm_nowkup, PIN_OUTPUT_LOW|PIN_SLPM_WAKEUP_DISABLE); /* These also force them into GPIO mode */ BIAS(gpio_in_pu, PIN_INPUT_PULLUP|PIN_GPIOMODE_ENABLED); BIAS(gpio_in_pd, PIN_INPUT_PULLDOWN|PIN_GPIOMODE_ENABLED); @@ -48,23 +47,32 @@ BIAS(gpio_in_pd_slpm_gpio_nopull, PIN_INPUT_PULLDOWN|PIN_GPIOMODE_ENABLED|PIN_SL BIAS(gpio_out_hi, PIN_OUTPUT_HIGH|PIN_GPIOMODE_ENABLED); BIAS(gpio_out_lo, PIN_OUTPUT_LOW|PIN_GPIOMODE_ENABLED); /* Sleep modes */ -BIAS(sleep_in_wkup_pdis, PIN_SLPM_DIR_INPUT|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); -BIAS(sleep_in_nopull_wkup, PIN_INPUT_NOPULL|PIN_SLPM_WAKEUP_ENABLE); -BIAS(sleep_out_hi_wkup_pdis, PIN_SLPM_OUTPUT_HIGH|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); -BIAS(sleep_out_lo_wkup, PIN_OUTPUT_LOW|PIN_SLPM_WAKEUP_ENABLE); -BIAS(sleep_out_wkup_pdis, PIN_SLPM_DIR_OUTPUT|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); +BIAS(slpm_in_wkup_pdis, PIN_SLEEPMODE_ENABLED|PIN_SLPM_DIR_INPUT|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); +BIAS(slpm_in_nopull_wkup, PIN_SLEEPMODE_ENABLED|PIN_SLPM_DIR_INPUT|PIN_SLPM_PULL_NONE|PIN_SLPM_WAKEUP_ENABLE); +BIAS(slpm_wkup_pdis, PIN_SLEEPMODE_ENABLED|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); +BIAS(slpm_out_hi_wkup_pdis, PIN_SLEEPMODE_ENABLED|PIN_SLPM_OUTPUT_HIGH|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); +BIAS(slpm_out_wkup_pdis, PIN_SLEEPMODE_ENABLED|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); +BIAS(slpm_out_lo_wkup, PIN_SLEEPMODE_ENABLED|PIN_SLPM_OUTPUT_LOW|PIN_SLPM_WAKEUP_ENABLE); +BIAS(slpm_out_lo_wkup_pdis, PIN_SLEEPMODE_ENABLED|PIN_SLPM_OUTPUT_LOW|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); +BIAS(slpm_in_nopull_wkup_pdis, PIN_SLEEPMODE_ENABLED|PIN_SLPM_INPUT_NOPULL|PIN_SLPM_WAKEUP_ENABLE|PIN_SLPM_PDIS_DISABLED); /* We use these to define hog settings that are always done on boot */ #define DB8500_MUX_HOG(group,func) \ PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-db8500", group, func) #define DB8500_PIN_HOG(pin,conf) \ PIN_MAP_CONFIGS_PIN_HOG_DEFAULT("pinctrl-db8500", pin, conf) +#define DB8500_PIN_SLEEP(pin, conf, dev) \ + PIN_MAP_CONFIGS_PIN(dev, PINCTRL_STATE_SLEEP, "pinctrl-db8500", \ + pin, conf) /* These are default states associated with device and changed runtime */ #define DB8500_MUX(group,func,dev) \ PIN_MAP_MUX_GROUP_DEFAULT(dev, "pinctrl-db8500", group, func) #define DB8500_PIN(pin,conf,dev) \ PIN_MAP_CONFIGS_PIN_DEFAULT(dev, "pinctrl-db8500", pin, conf) +#define DB8500_PIN_SLEEP(pin, conf, dev) \ + PIN_MAP_CONFIGS_PIN(dev, PINCTRL_STATE_SLEEP, "pinctrl-db8500", \ + pin, conf) #define DB8500_PIN_SLEEP(pin,conf,dev) \ PIN_MAP_CONFIGS_PIN(dev, PINCTRL_STATE_SLEEP, "pinctrl-db8500", \ @@ -134,40 +142,47 @@ static struct pinctrl_map __initdata mop500_family_pinmap[] = { DB8500_PIN("GPIO2_AH4", in_pu, "uart0"), /* RXD */ DB8500_PIN("GPIO3_AH3", out_hi, "uart0"), /* TXD */ /* UART0 sleep state */ - DB8500_PIN_SLEEP("GPIO0_AJ5", sleep_in_wkup_pdis, "uart0"), - DB8500_PIN_SLEEP("GPIO1_AJ3", sleep_out_hi_wkup_pdis, "uart0"), - DB8500_PIN_SLEEP("GPIO2_AH4", sleep_in_wkup_pdis, "uart0"), - DB8500_PIN_SLEEP("GPIO3_AH3", sleep_out_wkup_pdis, "uart0"), + DB8500_PIN_SLEEP("GPIO0_AJ5", slpm_in_wkup_pdis, "uart0"), + DB8500_PIN_SLEEP("GPIO1_AJ3", slpm_out_hi_wkup_pdis, "uart0"), + DB8500_PIN_SLEEP("GPIO2_AH4", slpm_in_wkup_pdis, "uart0"), + DB8500_PIN_SLEEP("GPIO3_AH3", slpm_out_wkup_pdis, "uart0"), /* MSP1 for ALSA codec */ DB8500_MUX("msp1txrx_a_1", "msp1", "ux500-msp-i2s.1"), DB8500_MUX("msp1_a_1", "msp1", "ux500-msp-i2s.1"), - DB8500_PIN("GPIO33_AF2", out_lo_sleep_nowkup, "ux500-msp-i2s.1"), - DB8500_PIN("GPIO34_AE1", in_nopull_sleep_nowkup, "ux500-msp-i2s.1"), - DB8500_PIN("GPIO35_AE2", in_nopull_sleep_nowkup, "ux500-msp-i2s.1"), - DB8500_PIN("GPIO36_AG2", in_nopull_sleep_nowkup, "ux500-msp-i2s.1"), + DB8500_PIN("GPIO33_AF2", out_lo_slpm_nowkup, "ux500-msp-i2s.1"), + DB8500_PIN("GPIO34_AE1", in_nopull_slpm_nowkup, "ux500-msp-i2s.1"), + DB8500_PIN("GPIO35_AE2", in_nopull_slpm_nowkup, "ux500-msp-i2s.1"), + DB8500_PIN("GPIO36_AG2", in_nopull_slpm_nowkup, "ux500-msp-i2s.1"), /* MSP1 sleep state */ - DB8500_PIN_SLEEP("GPIO33_AF2", sleep_out_lo_wkup, "ux500-msp-i2s.1"), - DB8500_PIN_SLEEP("GPIO34_AE1", sleep_in_nopull_wkup, "ux500-msp-i2s.1"), - DB8500_PIN_SLEEP("GPIO35_AE2", sleep_in_nopull_wkup, "ux500-msp-i2s.1"), - DB8500_PIN_SLEEP("GPIO36_AG2", sleep_in_nopull_wkup, "ux500-msp-i2s.1"), + DB8500_PIN_SLEEP("GPIO33_AF2", slpm_out_lo_wkup, "ux500-msp-i2s.1"), + DB8500_PIN_SLEEP("GPIO34_AE1", slpm_in_nopull_wkup, "ux500-msp-i2s.1"), + DB8500_PIN_SLEEP("GPIO35_AE2", slpm_in_nopull_wkup, "ux500-msp-i2s.1"), + DB8500_PIN_SLEEP("GPIO36_AG2", slpm_in_nopull_wkup, "ux500-msp-i2s.1"), /* Mux in LCD data lines 8 thru 11 and LCDA CLK for MCDE TVOUT */ DB8500_MUX("lcd_d8_d11_a_1", "lcd", "mcde-tvout"), DB8500_MUX("lcdaclk_b_1", "lcda", "mcde-tvout"), /* Mux in LCD VSI1 and pull it up for MCDE HDMI output */ DB8500_MUX("lcdvsi1_a_1", "lcd", "av8100-hdmi"), - /* Mux in I2C blocks, put pins into GPIO in sleepmode no pull-up */ + /* Mux in i2c0 block, default state */ DB8500_MUX("i2c0_a_1", "i2c0", "nmk-i2c.0"), - DB8500_PIN("GPIO147_C15", slpm_gpio_nopull, "nmk-i2c.0"), - DB8500_PIN("GPIO148_B16", slpm_gpio_nopull, "nmk-i2c.0"), + /* i2c0 sleep state */ + DB8500_PIN_SLEEP("GPIO147_C15", slpm_in_nopull_wkup_pdis, "nmk-i2c.0"), /* SDA */ + DB8500_PIN_SLEEP("GPIO148_B16", slpm_in_nopull_wkup_pdis, "nmk-i2c.0"), /* SCL */ + /* Mux in i2c1 block, default state */ DB8500_MUX("i2c1_b_2", "i2c1", "nmk-i2c.1"), - DB8500_PIN("GPIO16_AD3", slpm_gpio_nopull, "nmk-i2c.1"), - DB8500_PIN("GPIO17_AD4", slpm_gpio_nopull, "nmk-i2c.1"), + /* i2c1 sleep state */ + DB8500_PIN_SLEEP("GPIO16_AD3", slpm_in_nopull_wkup_pdis, "nmk-i2c.1"), /* SDA */ + DB8500_PIN_SLEEP("GPIO17_AD4", slpm_in_nopull_wkup_pdis, "nmk-i2c.1"), /* SCL */ + /* Mux in i2c2 block, default state */ DB8500_MUX("i2c2_b_2", "i2c2", "nmk-i2c.2"), - DB8500_PIN("GPIO10_AF5", slpm_gpio_nopull, "nmk-i2c.2"), - DB8500_PIN("GPIO11_AG4", slpm_gpio_nopull, "nmk-i2c.2"), + /* i2c2 sleep state */ + DB8500_PIN_SLEEP("GPIO10_AF5", slpm_in_nopull_wkup_pdis, "nmk-i2c.2"), /* SDA */ + DB8500_PIN_SLEEP("GPIO11_AG4", slpm_in_nopull_wkup_pdis, "nmk-i2c.2"), /* SCL */ + /* Mux in i2c3 block, default state */ DB8500_MUX("i2c3_c_2", "i2c3", "nmk-i2c.3"), - DB8500_PIN("GPIO229_AG7", slpm_gpio_nopull, "nmk-i2c.3"), - DB8500_PIN("GPIO230_AF7", slpm_gpio_nopull, "nmk-i2c.3"), + /* i2c3 sleep state */ + DB8500_PIN_SLEEP("GPIO229_AG7", slpm_in_nopull_wkup_pdis, "nmk-i2c.3"), /* SDA */ + DB8500_PIN_SLEEP("GPIO230_AF7", slpm_in_nopull_wkup_pdis, "nmk-i2c.3"), /* SCL */ /* Mux in SDI0 (here called MC0) used for removable MMC/SD/SDIO cards */ DB8500_MUX("mc0_a_1", "mc0", "sdi0"), DB8500_PIN("GPIO18_AC2", out_hi, "sdi0"), /* CMDDIR */ @@ -219,11 +234,15 @@ static struct pinctrl_map __initdata mop500_family_pinmap[] = { DB8500_MUX("usb_a_1", "usb", "musb-ux500.0"), DB8500_PIN("GPIO257_AE29", out_hi, "musb-ux500.0"), /* STP */ /* Mux in SPI2 pins on the "other C1" altfunction */ - DB8500_MUX("spi2_oc1_1", "spi2", "spi2"), + DB8500_MUX("spi2_oc1_2", "spi2", "spi2"), DB8500_PIN("GPIO216_AG12", gpio_out_hi, "spi2"), /* FRM */ DB8500_PIN("GPIO218_AH11", in_pd, "spi2"), /* RXD */ DB8500_PIN("GPIO215_AH13", out_lo, "spi2"), /* TXD */ DB8500_PIN("GPIO217_AH12", out_lo, "spi2"), /* CLK */ + /* SPI2 sleep state */ + DB8500_PIN_SLEEP("GPIO218_AH11", slpm_in_wkup_pdis, "spi2"), /* RXD */ + DB8500_PIN_SLEEP("GPIO215_AH13", slpm_out_lo_wkup_pdis, "spi2"), /* TXD */ + DB8500_PIN_SLEEP("GPIO217_AH12", slpm_wkup_pdis, "spi2"), /* CLK */ }; /* @@ -410,7 +429,7 @@ static struct pinctrl_map __initdata u9500_pinmap[] = { DB8500_PIN_HOG("GPIO144_B13", gpio_in_pu), /* HSI */ DB8500_MUX_HOG("hsir_a_1", "hsi"), - DB8500_MUX_HOG("hsit_a_1", "hsi"), + DB8500_MUX_HOG("hsit_a_2", "hsi"), DB8500_PIN_HOG("GPIO219_AG10", in_pd), /* RX FLA0 */ DB8500_PIN_HOG("GPIO220_AH10", in_pd), /* RX DAT0 */ DB8500_PIN_HOG("GPIO221_AJ11", out_lo), /* RX RDY0 */ @@ -418,7 +437,7 @@ static struct pinctrl_map __initdata u9500_pinmap[] = { DB8500_PIN_HOG("GPIO223_AH9", out_lo), /* TX DAT0 */ DB8500_PIN_HOG("GPIO224_AG9", in_pd), /* TX RDY0 */ DB8500_PIN_HOG("GPIO225_AG8", in_pd), /* CAWAKE0 */ - DB8500_PIN_HOG("GPIO226_AF8", out_hi), /* ACWAKE0 */ + DB8500_PIN_HOG("GPIO226_AF8", gpio_out_hi), /* ACWAKE0 */ }; static struct pinctrl_map __initdata u8500_pinmap[] = { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 13f555d62491..477a2d23ddf1 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + /** * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } @@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_dma_sync_single_for_device(struct device *dev, @@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(page, offset, size, dir); + __dma_page_cpu_to_dev(page, offset, size, dir); } static int arm_dma_set_mask(struct device *dev, u64 dma_mask); @@ -138,6 +143,22 @@ struct dma_map_ops arm_dma_ops = { }; EXPORT_SYMBOL(arm_dma_ops); +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +struct dma_map_ops arm_coherent_dma_ops = { + .alloc = arm_coherent_dma_alloc, + .free = arm_coherent_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_coherent_dma_map_page, + .map_sg = arm_dma_map_sg, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_coherent_dma_ops); + static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)arm_dma_limit; @@ -586,7 +607,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, const void *caller) + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page; @@ -619,7 +640,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) + if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (gfp & GFP_ATOMIC) addr = __alloc_from_pool(size, &page); @@ -647,7 +668,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, prot, + return __dma_alloc(dev, size, handle, gfp, prot, false, + __builtin_return_address(0)); +} + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, true, __builtin_return_address(0)); } @@ -684,8 +718,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, /* * Free a buffer as defined by the above mapping. */ -void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, + bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); @@ -694,7 +729,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) { + if (is_coherent || nommu()) { __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; @@ -710,6 +745,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } } +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); +} + +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); +} + int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, struct dma_attrs *attrs) @@ -1012,11 +1059,12 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t if (!pages[i]) goto error; - if (order) + if (order) { split_page(pages[i], order); - j = 1 << order; - while (--j) - pages[i + j] = pages[i] + j; + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } __dma_clear_buffer(pages[i], PAGE_SIZE << order); i += 1 << order; @@ -1303,7 +1351,8 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, */ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t iova, iova_base; @@ -1322,8 +1371,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, phys_addr_t phys = page_to_phys(sg_page(s)); unsigned int len = PAGE_ALIGN(s->offset + s->length); - if (!arch_is_coherent() && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); ret = iommu_map(mapping->domain, iova, phys, len, 0); @@ -1341,20 +1390,9 @@ fail: return ret; } -/** - * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA - * @dev: valid struct device pointer - * @sg: list of buffers - * @nents: number of buffers to map - * @dir: DMA transfer direction - * - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * The scatter gather list elements are merged together (if possible) and - * tagged with the appropriate dma address and length. They are obtained via - * sg_dma_{address,length}. - */ -int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s = sg, *dma = sg, *start = sg; int i, count = 0; @@ -1370,7 +1408,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { if (__map_sg_chunk(dev, start, size, &dma->dma_address, - dir, attrs) < 0) + dir, attrs, is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1383,7 +1421,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, } size += s->length; } - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0) + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1398,17 +1437,44 @@ bad_mapping: } /** - * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer * @sg: list of buffers - * @nents: number of buffers to unmap (same as was passed to dma_map_sg) - * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * @nents: number of buffers to map + * @dir: DMA transfer direction * - * Unmap a set of streaming mode DMA translations. Again, CPU access - * rules concerning calls here are the same as for dma_unmap_single(). + * Map a set of i/o coherent buffers described by scatterlist in streaming + * mode for DMA. The scatter gather list elements are merged together (if + * possible) and tagged with the appropriate dma address and length. They are + * obtained via sg_dma_{address,length}. */ -void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, false); +} + +static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s; int i; @@ -1417,7 +1483,7 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, if (sg_dma_len(s)) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - if (!arch_is_coherent() && + if (!is_coherent && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); @@ -1425,6 +1491,38 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, } /** + * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); +} + +/** * arm_iommu_sync_sg_for_cpu * @dev: valid struct device pointer * @sg: list of buffers @@ -1438,8 +1536,7 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } @@ -1457,22 +1554,21 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); } /** - * arm_iommu_map_page + * arm_coherent_iommu_map_page * @dev: valid struct device pointer * @page: page that buffer resides in * @offset: offset into page for start of buffer * @size: size of buffer to map * @dir: DMA transfer direction * - * IOMMU aware version of arm_dma_map_page() + * Coherent IOMMU aware version of arm_dma_map_page() */ -static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { @@ -1480,9 +1576,6 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, dma_addr_t dma_addr; int ret, len = PAGE_ALIGN(size + offset); - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) - __dma_page_cpu_to_dev(page, offset, size, dir); - dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; @@ -1498,6 +1591,51 @@ fail: } /** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_coherent_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Coherent IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** * arm_iommu_unmap_page * @dev: valid struct device pointer * @handle: DMA address of buffer @@ -1519,7 +1657,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); @@ -1537,8 +1675,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, if (!iova) return; - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_iommu_sync_single_for_device(struct device *dev, @@ -1572,6 +1709,19 @@ struct dma_map_ops iommu_ops = { .sync_sg_for_device = arm_iommu_sync_sg_for_device, }; +struct dma_map_ops iommu_coherent_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_coherent_iommu_map_page, + .unmap_page = arm_coherent_iommu_unmap_page, + + .map_sg = arm_coherent_iommu_map_sg, + .unmap_sg = arm_coherent_iommu_unmap_sg, +}; + /** * arm_iommu_create_mapping * @bus: pointer to the bus holding the client device (for IOMMU calls) @@ -1665,7 +1815,7 @@ int arm_iommu_attach_device(struct device *dev, dev->archdata.mapping = mapping; set_dma_ops(dev, &iommu_ops); - pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 18144e6a3115..941dfb9e9a78 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -423,17 +423,6 @@ static void __init build_mem_type_table(void) vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; /* - * Enable CPU-specific coherency if supported. - * (Only available on XSC3 at the moment.) - */ - if (arch_is_coherent() && cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; - } - /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c index 5e13c3884aa4..42377ef9ea3d 100644 --- a/arch/arm/plat-omap/mailbox.c +++ b/arch/arm/plat-omap/mailbox.c @@ -310,7 +310,7 @@ static void omap_mbox_fini(struct omap_mbox *mbox) omap_mbox_disable_irq(mbox, IRQ_RX); free_irq(mbox->irq, mbox); tasklet_kill(&mbox->txq->tasklet); - flush_work_sync(&mbox->rxq->work); + flush_work(&mbox->rxq->work); mbox_queue_free(mbox->txq); mbox_queue_free(mbox->rxq); } diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index e92215428a37..72bd5ae50a89 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -138,11 +138,6 @@ config CRIS_MACH_ARTPEC3 endchoice -config ETRAX_VCS_SIM - bool "VCS Simulator" - help - Setup hardware to be run in the VCS simulator. - config ETRAX_ARCH_V10 bool default y if ETRAX100LX || ETRAX100LX_V2 diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index b34438e026be..1b6ad6247204 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -329,7 +329,6 @@ static int __init init_axis_flash(void) } #endif -#ifndef CONFIG_ETRAX_VCS_SIM main_mtd = flash_probe(); if (main_mtd) printk(KERN_INFO "%s: 0x%08x bytes of NOR flash memory.\n", @@ -603,34 +602,7 @@ static int __init init_axis_flash(void) "partition %d\n", part); } } -#endif /* CONFIG_EXTRAX_VCS_SIM */ -#ifdef CONFIG_ETRAX_VCS_SIM - /* For simulator, always use a RAM partition. - * The rootfs will be found after the kernel in RAM, - * with romfs_start and romfs_end indicating location and size. - */ - struct mtd_info *mtd_ram; - - mtd_ram = kmalloc(sizeof(struct mtd_info), GFP_KERNEL); - if (!mtd_ram) { - panic("axisflashmap: Couldn't allocate memory for " - "mtd_info!\n"); - } - - printk(KERN_INFO "axisflashmap: Adding RAM partition for romfs, " - "at %u, size %u\n", - (unsigned) romfs_start, (unsigned) romfs_length); - - err = mtdram_init_device(mtd_ram, (void *)romfs_start, - romfs_length, "romfs"); - if (err) { - panic("axisflashmap: Could not initialize MTD RAM " - "device!\n"); - } -#endif /* CONFIG_EXTRAX_VCS_SIM */ - -#ifndef CONFIG_ETRAX_VCS_SIM if (aux_mtd) { aux_partition.size = aux_mtd->size; err = mtd_device_register(aux_mtd, &aux_partition, 1); @@ -639,7 +611,6 @@ static int __init init_axis_flash(void) "aux mtd device!\n"); } -#endif /* CONFIG_EXTRAX_VCS_SIM */ return err; } diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c index 5b1ee82f63c5..e3dfc72d0cfd 100644 --- a/arch/cris/arch-v32/drivers/pci/bios.c +++ b/arch/cris/arch-v32/drivers/pci/bios.c @@ -97,28 +97,3 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) pcibios_enable_irq(dev); return 0; } - -int pcibios_assign_resources(void) -{ - struct pci_dev *dev = NULL; - int idx; - struct resource *r; - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - int class = dev->class >> 8; - - /* Don't touch classless devices and host bridges */ - if (!class || class == PCI_CLASS_BRIDGE_HOST) - continue; - - for(idx=0; idx<6; idx++) { - r = &dev->resource[idx]; - - if (!r->start && r->end) - pci_assign_resource(dev, idx); - } - } - return 0; -} - -EXPORT_SYMBOL(pcibios_assign_resources); diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S index 5d502b9ab56d..51e34165ece7 100644 --- a/arch/cris/arch-v32/kernel/head.S +++ b/arch/cris/arch-v32/kernel/head.S @@ -36,13 +36,6 @@ .global nand_boot .global swapper_pg_dir - ;; Dummy section to make it bootable with current VCS simulator -#ifdef CONFIG_ETRAX_VCS_SIM - .section ".boot", "ax" - ba tstart - nop -#endif - .text tstart: ;; This is the entry point of the kernel. The CPU is currently in @@ -75,17 +68,10 @@ secondary_cpu_entry: /* Entry point for secondary CPUs */ | REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 4) \ | REG_FIELD(mmu, rw_mm_kbase_hi, base_d, 5) \ | REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb), $r0 -#elif !defined(CONFIG_ETRAX_VCS_SIM) - move.d REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 8) \ - | REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 4) \ - | REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb), $r0 #else - ;; Map the virtual DRAM to the RW eprom area at address 0. - ;; Also map 0xa for the hook calls, move.d REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 8) \ | REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 4) \ - | REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb) \ - | REG_FIELD(mmu, rw_mm_kbase_hi, base_a, 0xa), $r0 + | REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb), $r0 #endif ;; Temporary map of 0x40 -> 0x40 and 0x00 -> 0x00. @@ -126,27 +112,6 @@ secondary_cpu_entry: /* Entry point for secondary CPUs */ | REG_STATE(mmu, rw_mm_cfg, seg_2, page) \ | REG_STATE(mmu, rw_mm_cfg, seg_1, page) \ | REG_STATE(mmu, rw_mm_cfg, seg_0, linear), $r2 -#elif !defined(CONFIG_ETRAX_VCS_SIM) - move.d REG_STATE(mmu, rw_mm_cfg, we, on) \ - | REG_STATE(mmu, rw_mm_cfg, acc, on) \ - | REG_STATE(mmu, rw_mm_cfg, ex, on) \ - | REG_STATE(mmu, rw_mm_cfg, inv, on) \ - | REG_STATE(mmu, rw_mm_cfg, seg_f, linear) \ - | REG_STATE(mmu, rw_mm_cfg, seg_e, linear) \ - | REG_STATE(mmu, rw_mm_cfg, seg_d, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_c, linear) \ - | REG_STATE(mmu, rw_mm_cfg, seg_b, linear) \ - | REG_STATE(mmu, rw_mm_cfg, seg_a, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_9, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_8, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_7, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_6, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_5, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_4, linear) \ - | REG_STATE(mmu, rw_mm_cfg, seg_3, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_2, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_1, page) \ - | REG_STATE(mmu, rw_mm_cfg, seg_0, linear), $r2 #else move.d REG_STATE(mmu, rw_mm_cfg, we, on) \ | REG_STATE(mmu, rw_mm_cfg, acc, on) \ @@ -157,7 +122,7 @@ secondary_cpu_entry: /* Entry point for secondary CPUs */ | REG_STATE(mmu, rw_mm_cfg, seg_d, page) \ | REG_STATE(mmu, rw_mm_cfg, seg_c, linear) \ | REG_STATE(mmu, rw_mm_cfg, seg_b, linear) \ - | REG_STATE(mmu, rw_mm_cfg, seg_a, linear) \ + | REG_STATE(mmu, rw_mm_cfg, seg_a, page) \ | REG_STATE(mmu, rw_mm_cfg, seg_9, page) \ | REG_STATE(mmu, rw_mm_cfg, seg_8, page) \ | REG_STATE(mmu, rw_mm_cfg, seg_7, page) \ @@ -226,7 +191,6 @@ master_cpu: move.d secondary_cpu_entry, $r1 move.d $r1, [$r0] #endif -#ifndef CONFIG_ETRAX_VCS_SIM ; Check if starting from DRAM (network->RAM boot or unpacked ; compressed kernel), or directly from flash. lapcq ., $r0 @@ -234,7 +198,6 @@ master_cpu: cmp.d 0x10000, $r0 ; Arbitrary, something above this code. blo _inflash0 nop -#endif jump _inram ; Jump to cached RAM. nop @@ -326,7 +289,6 @@ move_cramfs: move.d romfs_length, $r1 move.d $r0, [$r1] -#ifndef CONFIG_ETRAX_VCS_SIM ;; The kernel could have been unpacked to DRAM by the loader, but ;; the cramfs image could still be in the flash immediately ;; following the compressed kernel image. The loader passes the address @@ -335,10 +297,6 @@ move_cramfs: cmp.d 0x0ffffff8, $r9 bhs _no_romfs_in_flash ; R9 points outside the flash area. nop -#else - ba _no_romfs_in_flash - nop -#endif ;; cramfs rootfs might to be in flash. Check for it. move.d [$r9], $r0 ; cramfs_super.magic cmp.d CRAMFS_MAGIC, $r0 @@ -396,7 +354,6 @@ _no_romfs_in_flash: move.d romfs_length, $r3 move.d $r2, [$r3] ; store size at romfs_length -#ifndef CONFIG_ETRAX_VCS_SIM add.d $r2, $r0 ; copy from end and downwards add.d $r2, $r1 @@ -410,7 +367,6 @@ _no_romfs_in_flash: subq 1, $r2 bne 1b nop -#endif 4: ;; BSS move done. @@ -455,7 +411,6 @@ no_command_line: move.d etrax_irv, $r1 ; Set the exception base register and pointer. move.d $r0, [$r1] -#ifndef CONFIG_ETRAX_VCS_SIM ;; Clear the BSS region from _bss_start to _end. move.d __bss_start, $r0 move.d _end, $r1 @@ -463,15 +418,6 @@ no_command_line: cmp.d $r1, $r0 blo 1b nop -#endif - -#ifdef CONFIG_ETRAX_VCS_SIM - /* Set the watchdog timeout to something big. Will be removed when */ - /* watchdog can be disabled with command line option */ - move.d 0x7fffffff, $r10 - jsr CPU_WATCHDOG_TIMEOUT - nop -#endif ; Initialize registers to increase determinism move.d __bss_start, $r0 diff --git a/arch/cris/arch-v32/kernel/kgdb.c b/arch/cris/arch-v32/kernel/kgdb.c index 8c1d35cdf00a..b06813aeb120 100644 --- a/arch/cris/arch-v32/kernel/kgdb.c +++ b/arch/cris/arch-v32/kernel/kgdb.c @@ -381,23 +381,9 @@ static int read_register(char regno, unsigned int *valptr); /* Serial port, reads one character. ETRAX 100 specific. from debugport.c */ int getDebugChar(void); -#ifdef CONFIG_ETRAX_VCS_SIM -int getDebugChar(void) -{ - return socketread(); -} -#endif - /* Serial port, writes one character. ETRAX 100 specific. from debugport.c */ void putDebugChar(int val); -#ifdef CONFIG_ETRAX_VCS_SIM -void putDebugChar(int val) -{ - socketwrite((char *)&val, 1); -} -#endif - /* Returns the integer equivalent of a hexadecimal character. */ static int hex(char ch); diff --git a/arch/cris/arch-v32/mach-a3/Makefile b/arch/cris/arch-v32/mach-a3/Makefile index 41fa6a6893a9..d366e0891988 100644 --- a/arch/cris/arch-v32/mach-a3/Makefile +++ b/arch/cris/arch-v32/mach-a3/Makefile @@ -1,10 +1,8 @@ -# $Id: Makefile,v 1.3 2007/03/13 11:57:46 starvik Exp $ # # Makefile for the linux kernel. # obj-y := dma.o pinmux.o io.o arbiter.o -obj-$(CONFIG_ETRAX_VCS_SIM) += vcs_hook.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o clean: diff --git a/arch/cris/arch-v32/mach-a3/vcs_hook.c b/arch/cris/arch-v32/mach-a3/vcs_hook.c deleted file mode 100644 index 58b1a5469fd7..000000000000 --- a/arch/cris/arch-v32/mach-a3/vcs_hook.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Simulator hook mechanism - */ - -#include "vcs_hook.h" -#include <asm/io.h> -#include <stdarg.h> - -#define HOOK_TRIG_ADDR 0xb7000000 -#define HOOK_MEM_BASE_ADDR 0xce000000 - -static volatile unsigned *hook_base; - -#define HOOK_DATA(offset) hook_base[offset] -#define VHOOK_DATA(offset) hook_base[offset] -#define HOOK_TRIG(funcid) \ - do { \ - *((unsigned *) HOOK_TRIG_ADDR) = funcid; \ - } while (0) -#define HOOK_DATA_BYTE(offset) ((unsigned char *)hook_base)[offset] - -static void hook_init(void) -{ - static int first = 1; - if (first) { - first = 0; - hook_base = ioremap(HOOK_MEM_BASE_ADDR, 8192); - } -} - -static unsigned hook_trig(unsigned id) -{ - unsigned ret; - - /* preempt_disable(); */ - - /* Dummy read from mem to make sure data has propagated to memory - * before trigging */ - ret = *hook_base; - - /* trigger hook */ - HOOK_TRIG(id); - - /* wait for call to finish */ - while (VHOOK_DATA(0) > 0) ; - - /* extract return value */ - - ret = VHOOK_DATA(1); - - return ret; -} - -int hook_call(unsigned id, unsigned pcnt, ...) -{ - va_list ap; - int i; - unsigned ret; - - hook_init(); - - HOOK_DATA(0) = id; - - va_start(ap, pcnt); - for (i = 1; i <= pcnt; i++) - HOOK_DATA(i) = va_arg(ap, unsigned); - va_end(ap); - - ret = hook_trig(id); - - return ret; -} - -int hook_call_str(unsigned id, unsigned size, const char *str) -{ - int i; - unsigned ret; - - hook_init(); - - HOOK_DATA(0) = id; - HOOK_DATA(1) = size; - - for (i = 0; i < size; i++) - HOOK_DATA_BYTE(8 + i) = str[i]; - HOOK_DATA_BYTE(8 + i) = 0; - - ret = hook_trig(id); - - return ret; -} - -void print_str(const char *str) -{ - int i; - /* find null at end of string */ - for (i = 1; str[i]; i++) ; - hook_call(hook_print_str, i, str); -} - -void CPU_WATCHDOG_TIMEOUT(unsigned t) -{ -} diff --git a/arch/cris/arch-v32/mach-a3/vcs_hook.h b/arch/cris/arch-v32/mach-a3/vcs_hook.h deleted file mode 100644 index 8b73d0e8392d..000000000000 --- a/arch/cris/arch-v32/mach-a3/vcs_hook.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Simulator hook call mechanism - */ - -#ifndef __hook_h__ -#define __hook_h__ - -int hook_call(unsigned id, unsigned pcnt, ...); -int hook_call_str(unsigned id, unsigned size, const char *str); - -enum hook_ids { - hook_debug_on = 1, - hook_debug_off, - hook_stop_sim_ok, - hook_stop_sim_fail, - hook_alloc_shared, - hook_ptr_shared, - hook_free_shared, - hook_file2shared, - hook_cmp_shared, - hook_print_params, - hook_sim_time, - hook_stop_sim, - hook_kick_dog, - hook_dog_timeout, - hook_rand, - hook_srand, - hook_rand_range, - hook_print_str, - hook_print_hex, - hook_cmp_offset_shared, - hook_fill_random_shared, - hook_alloc_random_data, - hook_calloc_random_data, - hook_print_int, - hook_print_uint, - hook_fputc, - hook_init_fd, - hook_sbrk, - hook_print_context_descr, - hook_print_data_descr, - hook_print_group_descr, - hook_fill_shared, - hook_sl_srand, - hook_sl_rand_irange, - hook_sl_rand_urange, - hook_sl_sh_malloc_aligned, - hook_sl_sh_calloc_aligned, - hook_sl_sh_alloc_random_data, - hook_sl_sh_file2mem, - hook_sl_vera_mbox_handle, - hook_sl_vera_mbox_put, - hook_sl_vera_mbox_get, - hook_sl_system, - hook_sl_sh_hexdump -}; - -#endif diff --git a/arch/cris/arch-v32/mach-fs/Makefile b/arch/cris/arch-v32/mach-fs/Makefile index 41fa6a6893a9..d366e0891988 100644 --- a/arch/cris/arch-v32/mach-fs/Makefile +++ b/arch/cris/arch-v32/mach-fs/Makefile @@ -1,10 +1,8 @@ -# $Id: Makefile,v 1.3 2007/03/13 11:57:46 starvik Exp $ # # Makefile for the linux kernel. # obj-y := dma.o pinmux.o io.o arbiter.o -obj-$(CONFIG_ETRAX_VCS_SIM) += vcs_hook.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o clean: diff --git a/arch/cris/arch-v32/mach-fs/vcs_hook.c b/arch/cris/arch-v32/mach-fs/vcs_hook.c deleted file mode 100644 index b11594ae0cb6..000000000000 --- a/arch/cris/arch-v32/mach-fs/vcs_hook.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Call simulator hook. This is the part running in the - * simulated program. - */ - -#include "vcs_hook.h" -#include <stdarg.h> -#include <arch-v32/hwregs/reg_map.h> -#include <arch-v32/hwregs/intr_vect_defs.h> - -#define HOOK_TRIG_ADDR 0xb7000000 /* hook cvlog model reg address */ -#define HOOK_MEM_BASE_ADDR 0xa0000000 /* csp4 (shared mem) base addr */ - -#define HOOK_DATA(offset) ((unsigned *)HOOK_MEM_BASE_ADDR)[offset] -#define VHOOK_DATA(offset) ((volatile unsigned *)HOOK_MEM_BASE_ADDR)[offset] -#define HOOK_TRIG(funcid) \ - do { \ - *((unsigned *) HOOK_TRIG_ADDR) = funcid; \ - } while (0) -#define HOOK_DATA_BYTE(offset) ((unsigned char *)HOOK_MEM_BASE_ADDR)[offset] - -int hook_call(unsigned id, unsigned pcnt, ...) -{ - va_list ap; - unsigned i; - unsigned ret; -#ifdef USING_SOS - PREEMPT_OFF_SAVE(); -#endif - - /* pass parameters */ - HOOK_DATA(0) = id; - - /* Have to make hook_print_str a special case since we call with a - * parameter of byte type. Should perhaps be a separate - * hook_call. */ - - if (id == hook_print_str) { - int i; - char *str; - - HOOK_DATA(1) = pcnt; - - va_start(ap, pcnt); - str = (char *)va_arg(ap, unsigned); - - for (i = 0; i != pcnt; i++) - HOOK_DATA_BYTE(8 + i) = str[i]; - - HOOK_DATA_BYTE(8 + i) = 0; /* null byte */ - } else { - va_start(ap, pcnt); - for (i = 1; i <= pcnt; i++) - HOOK_DATA(i) = va_arg(ap, unsigned); - va_end(ap); - } - - /* read from mem to make sure data has propagated to memory before - * trigging */ - ret = *((volatile unsigned *)HOOK_MEM_BASE_ADDR); - - /* trigger hook */ - HOOK_TRIG(id); - - /* wait for call to finish */ - while (VHOOK_DATA(0) > 0) ; - - /* extract return value */ - - ret = VHOOK_DATA(1); - -#ifdef USING_SOS - PREEMPT_RESTORE(); -#endif - return ret; -} - -unsigned hook_buf(unsigned i) -{ - return (HOOK_DATA(i)); -} - -void print_str(const char *str) -{ - int i; - /* find null at end of string */ - for (i = 1; str[i]; i++) ; - hook_call(hook_print_str, i, str); -} - -void CPU_KICK_DOG(void) -{ - (void)hook_call(hook_kick_dog, 0); -} - -void CPU_WATCHDOG_TIMEOUT(unsigned t) -{ - (void)hook_call(hook_dog_timeout, 1, t); -} - diff --git a/arch/cris/arch-v32/mach-fs/vcs_hook.h b/arch/cris/arch-v32/mach-fs/vcs_hook.h deleted file mode 100644 index c000b9fece41..000000000000 --- a/arch/cris/arch-v32/mach-fs/vcs_hook.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Call simulator hook functions - */ - -#ifndef HOOK_H -#define HOOK_H - -int hook_call(unsigned id, unsigned pcnt, ...); - -enum hook_ids { - hook_debug_on = 1, - hook_debug_off, - hook_stop_sim_ok, - hook_stop_sim_fail, - hook_alloc_shared, - hook_ptr_shared, - hook_free_shared, - hook_file2shared, - hook_cmp_shared, - hook_print_params, - hook_sim_time, - hook_stop_sim, - hook_kick_dog, - hook_dog_timeout, - hook_rand, - hook_srand, - hook_rand_range, - hook_print_str, - hook_print_hex, - hook_cmp_offset_shared, - hook_fill_random_shared, - hook_alloc_random_data, - hook_calloc_random_data, - hook_print_int, - hook_print_uint, - hook_fputc, - hook_init_fd, - hook_sbrk - -}; - -#endif diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c index 0768bc409ca8..3deca5253d91 100644 --- a/arch/cris/arch-v32/mm/init.c +++ b/arch/cris/arch-v32/mm/init.c @@ -73,11 +73,7 @@ void __init cris_mmu_init(void) #endif REG_STATE(mmu, rw_mm_cfg, seg_c, linear) | REG_STATE(mmu, rw_mm_cfg, seg_b, linear) | -#ifndef CONFIG_ETRAX_VCS_SIM REG_STATE(mmu, rw_mm_cfg, seg_a, page) | -#else - REG_STATE(mmu, rw_mm_cfg, seg_a, linear) | -#endif REG_STATE(mmu, rw_mm_cfg, seg_9, page) | REG_STATE(mmu, rw_mm_cfg, seg_8, page) | REG_STATE(mmu, rw_mm_cfg, seg_7, page) | @@ -100,11 +96,7 @@ void __init cris_mmu_init(void) #endif REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 0x4) | REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb) | -#ifndef CONFIG_ETRAX_VCS_SIM REG_FIELD(mmu, rw_mm_kbase_hi, base_a, 0x0) | -#else - REG_FIELD(mmu, rw_mm_kbase_hi, base_a, 0xa) | -#endif REG_FIELD(mmu, rw_mm_kbase_hi, base_9, 0x0) | REG_FIELD(mmu, rw_mm_kbase_hi, base_8, 0x0)); diff --git a/arch/cris/include/arch-v32/arch/page.h b/arch/cris/include/arch-v32/arch/page.h index 20f1b4806bfe..e5b5aab52de8 100644 --- a/arch/cris/include/arch-v32/arch/page.h +++ b/arch/cris/include/arch-v32/arch/page.h @@ -11,13 +11,8 @@ * selected bit it's possible to convert between KSEG_x and 0x40000000 where the * DRAM really resides. DRAM is virtually at 0xc. */ -#ifndef CONFIG_ETRAX_VCS_SIM #define __pa(x) ((unsigned long)(x) & 0x7fffffff) #define __va(x) ((void *)((unsigned long)(x) | 0x80000000)) -#else -#define __pa(x) ((unsigned long)(x) & 0x3fffffff) -#define __va(x) ((void *)((unsigned long)(x) | 0xc0000000)) -#endif #define VM_STACK_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE) diff --git a/arch/cris/include/arch-v32/arch/processor.h b/arch/cris/include/arch-v32/arch/processor.h index 9603c907fbc4..a024b7d32fed 100644 --- a/arch/cris/include/arch-v32/arch/processor.h +++ b/arch/cris/include/arch-v32/arch/processor.h @@ -21,13 +21,9 @@ struct thread_struct { /* * User-space process size. This is hardcoded into a few places, so don't - * changed it unless everything's clear! + * change it unless everything's clear! */ -#ifndef CONFIG_ETRAX_VCS_SIM #define TASK_SIZE (0xB0000000UL) -#else -#define TASK_SIZE (0xA0000000UL) -#endif /* CCS I=1, enable interrupts. */ #define INIT_THREAD { 0, 0, (1 << I_CCS_BITNR) } diff --git a/arch/cris/include/arch-v32/mach-fs/mach/startup.inc b/arch/cris/include/arch-v32/mach-fs/mach/startup.inc index dd1abbdcbc7a..96c3b0fb62c1 100644 --- a/arch/cris/include/arch-v32/mach-fs/mach/startup.inc +++ b/arch/cris/include/arch-v32/mach-fs/mach/startup.inc @@ -71,12 +71,6 @@ move.d REG_ADDR(bif_core, regi_bif_core, rw_grp4_cfg), $r0 move.d CONFIG_ETRAX_MEM_GRP4_CONFIG, $r1 move.d $r1, [$r0] -#ifdef CONFIG_ETRAX_VCS_SIM - ;; Set up minimal flash waitstates - move.d 0, $r10 - move.d REG_ADDR(bif_core, regi_bif_core, rw_grp1_cfg), $r11 - move.d $r10, [$r11] -#endif .endm #endif diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h index 9f1cd56da28c..146da904cdd8 100644 --- a/arch/cris/include/asm/pci.h +++ b/arch/cris/include/asm/pci.h @@ -19,7 +19,6 @@ extern unsigned long pci_mem_start; void pcibios_config_init(void); struct pci_bus * pcibios_scan_root(int bus); -int pcibios_assign_resources(void); void pcibios_set_master(struct pci_dev *dev); void pcibios_penalize_isa_irq(int irq); diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index 09d5f7fd9db1..3d52a5bbd857 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -67,6 +67,10 @@ #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that we could have one ABI that works for 32 and 64 bit + * guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -79,7 +83,6 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); -typedef unsigned long xen_pfn_t; DEFINE_GUEST_HANDLE(xen_pfn_t); #define PRI_xen_pfn "lx" #endif @@ -265,6 +268,8 @@ typedef struct xen_callback xen_callback_t; #endif /* !__ASSEMBLY__ */ +#include <asm/pvclock-abi.h> + /* Size of the shared_info area (this is not related to page size). */ #define XSI_SHIFT 14 #define XSI_SIZE (1 << XSI_SHIFT) diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 1c2e89406721..9392e021c93b 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -158,7 +158,8 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", - raw_smp_processor_id(), current->pid, current_uid(), + raw_smp_processor_id(), current->pid, + from_kuid(&init_user_ns, current_uid()), iip, ipsr, paddr, current->comm); spin_lock(&mca_bh_lock); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3fa4bc536953..f388b4e18a37 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2306,7 +2306,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t * partially initialize the vma for the sampling buffer */ vma->vm_mm = mm; - vma->vm_file = filp; + vma->vm_file = get_file(filp); vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ @@ -2345,8 +2345,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t goto error; } - get_file(filp); - /* * now insert the vma in the vm list for the process, must be * done with mmap lock held @@ -2380,8 +2378,8 @@ static int pfm_bad_permissions(struct task_struct *task) { const struct cred *tcred; - uid_t uid = current_uid(); - gid_t gid = current_gid(); + kuid_t uid = current_uid(); + kgid_t gid = current_gid(); int ret; rcu_read_lock(); @@ -2389,20 +2387,20 @@ pfm_bad_permissions(struct task_struct *task) /* inspired by ptrace_attach() */ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", - uid, - gid, - tcred->euid, - tcred->suid, - tcred->uid, - tcred->egid, - tcred->sgid)); - - ret = ((uid != tcred->euid) - || (uid != tcred->suid) - || (uid != tcred->uid) - || (gid != tcred->egid) - || (gid != tcred->sgid) - || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid), + from_kuid(&init_user_ns, tcred->euid), + from_kuid(&init_user_ns, tcred->suid), + from_kuid(&init_user_ns, tcred->uid), + from_kgid(&init_user_ns, tcred->egid), + from_kgid(&init_user_ns, tcred->sgid))); + + ret = ((!uid_eq(uid, tcred->euid)) + || (!uid_eq(uid, tcred->suid)) + || (!uid_eq(uid, tcred->uid)) + || (!gid_eq(gid, tcred->egid)) + || (!gid_eq(gid, tcred->sgid)) + || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); rcu_read_unlock(); return ret; @@ -4782,7 +4780,7 @@ recheck: asmlinkage long sys_perfmonctl (int fd, int cmd, void __user *arg, int count) { - struct file *file = NULL; + struct fd f = {NULL, 0}; pfm_context_t *ctx = NULL; unsigned long flags = 0UL; void *args_k = NULL; @@ -4879,17 +4877,17 @@ restart_args: ret = -EBADF; - file = fget(fd); - if (unlikely(file == NULL)) { + f = fdget(fd); + if (unlikely(f.file == NULL)) { DPRINT(("invalid fd %d\n", fd)); goto error_args; } - if (unlikely(PFM_IS_FILE(file) == 0)) { + if (unlikely(PFM_IS_FILE(f.file) == 0)) { DPRINT(("fd %d not related to perfmon\n", fd)); goto error_args; } - ctx = file->private_data; + ctx = f.file->private_data; if (unlikely(ctx == NULL)) { DPRINT(("no context for fd %d\n", fd)); goto error_args; @@ -4919,8 +4917,8 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (file) - fput(file); + if (f.file) + fdput(f); kfree(args_k); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index a199be1fe619..37dd79511cbe 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -220,7 +220,7 @@ ia64_rt_sigreturn (struct sigscratch *scr) si.si_errno = 0; si.si_code = SI_KERNEL; si.si_pid = task_pid_vnr(current); - si.si_uid = current_uid(); + si.si_uid = from_kuid_munged(current_user_ns(), current_uid()); si.si_addr = sc; force_sig_info(SIGSEGV, &si, current); return retval; @@ -317,7 +317,7 @@ force_sigsegv_info (int sig, void __user *addr) si.si_errno = 0; si.si_code = SI_KERNEL; si.si_pid = task_pid_vnr(current); - si.si_uid = current_uid(); + si.si_uid = from_kuid_munged(current_user_ns(), current_uid()); si.si_addr = addr; force_sig_info(SIGSEGV, &si, current); return 0; diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index e93fdae10b23..90d3109c82f4 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -67,7 +67,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 66b26c1e848c..8f4f657fdbc6 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -67,7 +67,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 151332515980..4571d33903fe 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -65,7 +65,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 67bb6fc117f4..12f211733ba0 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -65,7 +65,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 3e35ce5fa467..215389a5407f 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -66,7 +66,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index ae81e2d190c3..cb9dfb30b674 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -61,7 +61,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 55d394edf633..8d5def4a31e0 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -80,7 +80,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index af773743ee11..e2af46f530c1 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -64,7 +64,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index cdb70d66e535..7c9402b2097f 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -65,7 +65,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 46bed78d0656..19d23db690a4 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -61,7 +61,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 86f7772bafbe..ca6c0b4cab77 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -62,7 +62,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 288261456e1f..c80941c7759e 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -62,7 +62,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index 6cd5a519ce5c..80e012fa409c 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig @@ -56,7 +56,6 @@ CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m CONFIG_NETFILTER_XT_MATCH_MAC=m diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig index ad15fb10322b..b6fde2bb51b6 100644 --- a/arch/mips/configs/bcm47xx_defconfig +++ b/arch/mips/configs/bcm47xx_defconfig @@ -96,7 +96,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index d1606569b001..936ec5a5ed8d 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -87,7 +87,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 92a60aecad5c..0315ee37a20b 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -60,7 +60,6 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 5527abbb7dea..cd732e5b4fd5 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -86,7 +86,6 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig index 9c9a123016c0..636f82b89fd3 100644 --- a/arch/mips/configs/markeins_defconfig +++ b/arch/mips/configs/markeins_defconfig @@ -59,7 +59,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index 28c6b276c216..84624b17b769 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -108,7 +108,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index 138f698d7c00..44b473420d51 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -109,7 +109,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 2c0230e76d20..59d9d2fdcd48 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -68,7 +68,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index c71eb6c79897..6785de7bd2a0 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -109,33 +109,32 @@ Efault: int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count) { - struct file * file; + struct fd arg; struct hpux_dirent __user * lastdirent; struct getdents_callback buf; - int error = -EBADF; + int error; - file = fget(fd); - if (!file) - goto out; + arg = fdget(fd); + if (!arg.file) + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir, &buf); + error = vfs_readdir(arg.file, filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(arg.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput(file); -out: + fdput(arg); return error; } diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index f8b394a76ac3..29767a8dfea5 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -55,7 +55,6 @@ CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index db27c82e0542..06b56245d78c 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -92,7 +92,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 7bd1763877ba..f55c27609fc6 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -66,7 +66,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c47f2becfbc3..be1cb6ea3a36 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -167,7 +167,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 559ae1ee6706..840838769853 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -189,7 +189,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) +SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) @@ -229,7 +229,7 @@ COMPAT_SYS_SPU(sched_setaffinity) COMPAT_SYS_SPU(sched_getaffinity) SYSCALL(ni_syscall) SYSCALL(ni_syscall) -SYS32ONLY(sendfile64) +SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64) COMPAT_SYS_SPU(io_setup) SYSCALL_SPU(io_destroy) COMPAT_SYS_SPU(io_getevents) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index bd377a368611..c683fa350add 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -419,6 +419,7 @@ #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_NEWFSTATAT +#define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif /* diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0794a3017b1b..e144498bcddd 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1624,6 +1624,63 @@ static void __init prom_instantiate_rtas(void) #ifdef CONFIG_PPC64 /* + * Allocate room for and instantiate Stored Measurement Log (SML) + */ +static void __init prom_instantiate_sml(void) +{ + phandle ibmvtpm_node; + ihandle ibmvtpm_inst; + u32 entry = 0, size = 0; + u64 base; + + prom_debug("prom_instantiate_sml: start...\n"); + + ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/ibm,vtpm")); + prom_debug("ibmvtpm_node: %x\n", ibmvtpm_node); + if (!PHANDLE_VALID(ibmvtpm_node)) + return; + + ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/ibm,vtpm")); + if (!IHANDLE_VALID(ibmvtpm_inst)) { + prom_printf("opening vtpm package failed (%x)\n", ibmvtpm_inst); + return; + } + + if (call_prom_ret("call-method", 2, 2, &size, + ADDR("sml-get-handover-size"), + ibmvtpm_inst) != 0 || size == 0) { + prom_printf("SML get handover size failed\n"); + return; + } + + base = alloc_down(size, PAGE_SIZE, 0); + if (base == 0) + prom_panic("Could not allocate memory for sml\n"); + + prom_printf("instantiating sml at 0x%x...", base); + + if (call_prom_ret("call-method", 4, 2, &entry, + ADDR("sml-handover"), + ibmvtpm_inst, size, base) != 0 || entry == 0) { + prom_printf("SML handover failed\n"); + return; + } + prom_printf(" done\n"); + + reserve_mem(base, size); + + prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-base", + &base, sizeof(base)); + prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-size", + &size, sizeof(size)); + + prom_debug("sml base = 0x%x\n", base); + prom_debug("sml size = 0x%x\n", (long)size); + + prom_debug("prom_instantiate_sml: end...\n"); +} + +/* * Allocate room for and initialize TCE tables */ static void __init prom_initialize_tce_table(void) @@ -2916,6 +2973,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_instantiate_opal(); #endif +#ifdef CONFIG_PPC64 + /* instantiate sml */ + prom_instantiate_sml(); +#endif + /* * On non-powermacs, put all CPUs in spin-loops. * diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 81c570633ead..abd1112da54f 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -143,48 +143,17 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) * and the register representation of a signed int (msr in 64-bit mode) is performed. */ -asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) +asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd, + compat_off_t __user *offset, u32 count) { - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - off_t __user *up; - - if (offset && get_user(of, offset)) - return -EFAULT; - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs(KERNEL_DS); - up = offset ? (off_t __user *) &of : NULL; - ret = sys_sendfile((int)out_fd, (int)in_fd, up, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; + return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count); } -asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) +asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, + compat_loff_t __user *offset, u32 count) { - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - loff_t __user *up; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs(KERNEL_DS); - up = offset ? (loff_t __user *) &lof : NULL; - ret = sys_sendfile64(out_fd, in_fd, up, count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; + return sys_sendfile((int)out_fd, (int)in_fd, + (off_t __user *)offset, count); } long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08ffcf52a856..e5f028b5794e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -470,7 +470,7 @@ bad_area_nosemaphore: if (is_exec && (error_code & DSISR_PROTFAULT)) printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected" " page (%lx) - exploit attempt? (uid: %d)\n", - address, current_uid()); + address, from_kuid(&init_user_ns, current_uid())); return SIGSEGV; diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 23bc9db4317e..82607d621aca 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -76,7 +76,7 @@ static void spu_gov_work(struct work_struct *work) static void spu_gov_init_work(struct spu_gov_info_struct *info) { int delay = usecs_to_jiffies(info->poll_int); - INIT_DELAYED_WORK_DEFERRABLE(&info->work, spu_gov_work); + INIT_DEFERRABLE_WORK(&info->work, spu_gov_work); schedule_delayed_work_on(info->policy->cpu, &info->work, delay); } diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 714bbfc3162c..db4e638cf408 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -69,8 +69,6 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, umode_t, mode, int, neighbor_fd) { long ret; - struct file *neighbor; - int fput_needed; struct spufs_calls *calls; calls = spufs_calls_get(); @@ -78,11 +76,11 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, return -ENOSYS; if (flags & SPU_CREATE_AFFINITY_SPU) { + struct fd neighbor = fdget(neighbor_fd); ret = -EBADF; - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = calls->create_thread(name, flags, mode, neighbor); - fput_light(neighbor, fput_needed); + if (neighbor.file) { + ret = calls->create_thread(name, flags, mode, neighbor.file); + fdput(neighbor); } } else ret = calls->create_thread(name, flags, mode, NULL); @@ -94,8 +92,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) { long ret; - struct file *filp; - int fput_needed; + struct fd arg; struct spufs_calls *calls; calls = spufs_calls_get(); @@ -103,10 +100,10 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) return -ENOSYS; ret = -EBADF; - filp = fget_light(fd, &fput_needed); - if (filp) { - ret = calls->spu_run(filp, unpc, ustatus); - fput_light(filp, fput_needed); + arg = fdget(fd); + if (arg.file) { + ret = calls->spu_run(arg.file, unpc, ustatus); + fdput(arg); } spufs_calls_put(calls); diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index c2c5b078ba80..657e3f233a64 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -106,6 +106,17 @@ static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) return total; } +static int match_context(const void *v, struct file *file, unsigned fd) +{ + struct spu_context *ctx; + if (file->f_op != &spufs_context_fops) + return 0; + ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; + if (ctx->flags & SPU_CREATE_NOSCHED) + return 0; + return fd + 1; +} + /* * The additional architecture-specific notes for Cell are various * context files in the spu context. @@ -115,29 +126,18 @@ static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) * internal functionality to dump them without needing to actually * open the files. */ +/* + * descriptor table is not shared, so files can't change or go away. + */ static struct spu_context *coredump_next_context(int *fd) { - struct fdtable *fdt = files_fdtable(current->files); struct file *file; - struct spu_context *ctx = NULL; - - for (; *fd < fdt->max_fds; (*fd)++) { - if (!fd_is_open(*fd, fdt)) - continue; - - file = fcheck(*fd); - - if (!file || file->f_op != &spufs_context_fops) - continue; - - ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; - if (ctx->flags & SPU_CREATE_NOSCHED) - continue; - - break; - } - - return ctx; + int n = iterate_fd(current->files, *fd, match_context, NULL); + if (!n) + return NULL; + *fd = n - 1; + file = fcheck(*fd); + return SPUFS_I(file->f_dentry->d_inode)->i_ctx; } int spufs_coredump_extra_notes_size(void) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6767b437a103..06ea69bd387a 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -31,8 +31,8 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb, struct dentry *dir); struct hypfs_sb_info { - uid_t uid; /* uid used for files and dirs */ - gid_t gid; /* gid used for files and dirs */ + kuid_t uid; /* uid used for files and dirs */ + kgid_t gid; /* gid used for files and dirs */ struct dentry *update_file; /* file to trigger update */ time_t last_update; /* last update time in secs since 1970 */ struct mutex lock; /* lock to protect update process */ @@ -72,8 +72,6 @@ static void hypfs_remove(struct dentry *dentry) struct dentry *parent; parent = dentry->d_parent; - if (!parent || !parent->d_inode) - return; mutex_lock(&parent->d_inode->i_mutex); if (hypfs_positive(dentry)) { if (S_ISDIR(dentry->d_inode->i_mode)) @@ -229,6 +227,8 @@ static int hypfs_parse_options(char *options, struct super_block *sb) { char *str; substring_t args[MAX_OPT_ARGS]; + kuid_t uid; + kgid_t gid; if (!options) return 0; @@ -243,12 +243,18 @@ static int hypfs_parse_options(char *options, struct super_block *sb) case opt_uid: if (match_int(&args[0], &option)) return -EINVAL; - hypfs_info->uid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + hypfs_info->uid = uid; break; case opt_gid: if (match_int(&args[0], &option)) return -EINVAL; - hypfs_info->gid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + hypfs_info->gid = gid; break; case opt_err: default: @@ -263,8 +269,8 @@ static int hypfs_show_options(struct seq_file *s, struct dentry *root) { struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; - seq_printf(s, ",uid=%u", hypfs_info->uid); - seq_printf(s, ",gid=%u", hypfs_info->gid); + seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid)); + seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid)); return 0; } diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index f606d935f495..189963c90c6e 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -131,13 +131,19 @@ asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) low2highuid(suid)); } -asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid) +asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __user *suidp) { + const struct cred *cred = current_cred(); int retval; + u16 ruid, euid, suid; - if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && - !(retval = put_user(high2lowuid(current->cred->euid), euid))) - retval = put_user(high2lowuid(current->cred->suid), suid); + ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); + euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); + suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); + + if (!(retval = put_user(ruid, ruidp)) && + !(retval = put_user(euid, euidp))) + retval = put_user(suid, suidp); return retval; } @@ -148,13 +154,19 @@ asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) low2highgid(sgid)); } -asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid) +asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp) { + const struct cred *cred = current_cred(); int retval; + u16 rgid, egid, sgid; + + rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); + egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); + sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); - if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && - !(retval = put_user(high2lowgid(current->cred->egid), egid))) - retval = put_user(high2lowgid(current->cred->sgid), sgid); + if (!(retval = put_user(rgid, rgidp)) && + !(retval = put_user(egid, egidp))) + retval = put_user(sgid, sgidp); return retval; } @@ -258,22 +270,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) asmlinkage long sys32_getuid16(void) { - return high2lowuid(current->cred->uid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); } asmlinkage long sys32_geteuid16(void) { - return high2lowuid(current->cred->euid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); } asmlinkage long sys32_getgid16(void) { - return high2lowgid(current->cred->gid); + return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); } asmlinkage long sys32_getegid16(void) { - return high2lowgid(current->cred->egid); + return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } /* diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 637b79b09657..5bfb341cc5c4 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -107,7 +107,7 @@ static int switch_drv_remove(struct platform_device *pdev) device_remove_file(&pdev->dev, &dev_attr_switch); platform_set_drvdata(pdev, NULL); - flush_work_sync(&psw->work); + flush_work(&psw->work); del_timer_sync(&psw->debounce); free_irq(irq, pdev); diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild index 5cd01161fd00..675afa285ddb 100644 --- a/arch/sparc/Kbuild +++ b/arch/sparc/Kbuild @@ -6,3 +6,4 @@ obj-y += kernel/ obj-y += mm/ obj-y += math-emu/ obj-y += net/ +obj-y += crypto/ diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile new file mode 100644 index 000000000000..6ae1ad5e502b --- /dev/null +++ b/arch/sparc/crypto/Makefile @@ -0,0 +1,25 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o +obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o +obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o +obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o + +obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o + +obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o + +sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o +sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o +sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o +md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o + +aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o +des-sparc64-y := des_asm.o des_glue.o crop_devid.o +camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o + +crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S new file mode 100644 index 000000000000..23f6cbb910d3 --- /dev/null +++ b/arch/sparc/crypto/aes_asm.S @@ -0,0 +1,1535 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \ + AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \ + AES_EROUND23(KEY_BASE + 6, T2, T3, I3) + +#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \ + AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \ + AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3) + + /* 10 rounds */ +#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + +#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 12 rounds */ +#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 14 rounds */ +#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ + ldd [%o0 + 0xd0], %f56; \ + ldd [%o0 + 0xd8], %f58; \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + ldd [%o0 + 0xe0], %f60; \ + ldd [%o0 + 0xe8], %f62; \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ + AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \ + AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \ + AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \ + AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \ + ldd [%o0 + 0x10], %f8; \ + ldd [%o0 + 0x18], %f10; \ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \ + ldd [%o0 + 0x20], %f12; \ + ldd [%o0 + 0x28], %f14; + +#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \ + AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \ + AES_DROUND01(KEY_BASE + 6, T2, T3, I2) + +#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \ + AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \ + AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) + + /* 10 rounds */ +#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + +#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 12 rounds */ +#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 14 rounds */ +#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ + ldd [%o0 + 0x18], %f56; \ + ldd [%o0 + 0x10], %f58; \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + ldd [%o0 + 0x08], %f60; \ + ldd [%o0 + 0x00], %f62; \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ + AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \ + AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \ + AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \ + AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \ + ldd [%o0 + 0xd8], %f8; \ + ldd [%o0 + 0xd0], %f10; \ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \ + ldd [%o0 + 0xc8], %f12; \ + ldd [%o0 + 0xc0], %f14; + + .align 32 +ENTRY(aes_sparc64_key_expand) + /* %o0=input_key, %o1=output_key, %o2=key_len */ + VISEntry + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + + std %f0, [%o1 + 0x00] + std %f2, [%o1 + 0x08] + add %o1, 0x10, %o1 + + cmp %o2, 24 + bl 2f + nop + + be 1f + nop + + /* 256-bit key expansion */ + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + ld [%o0 + 0x18], %f6 + ld [%o0 + 0x1c], %f7 + + std %f4, [%o1 + 0x00] + std %f6, [%o1 + 0x08] + add %o1, 0x10, %o1 + + AES_KEXPAND1(0, 6, 0x0, 8) + AES_KEXPAND2(2, 8, 10) + AES_KEXPAND0(4, 10, 12) + AES_KEXPAND2(6, 12, 14) + AES_KEXPAND1(8, 14, 0x1, 16) + AES_KEXPAND2(10, 16, 18) + AES_KEXPAND0(12, 18, 20) + AES_KEXPAND2(14, 20, 22) + AES_KEXPAND1(16, 22, 0x2, 24) + AES_KEXPAND2(18, 24, 26) + AES_KEXPAND0(20, 26, 28) + AES_KEXPAND2(22, 28, 30) + AES_KEXPAND1(24, 30, 0x3, 32) + AES_KEXPAND2(26, 32, 34) + AES_KEXPAND0(28, 34, 36) + AES_KEXPAND2(30, 36, 38) + AES_KEXPAND1(32, 38, 0x4, 40) + AES_KEXPAND2(34, 40, 42) + AES_KEXPAND0(36, 42, 44) + AES_KEXPAND2(38, 44, 46) + AES_KEXPAND1(40, 46, 0x5, 48) + AES_KEXPAND2(42, 48, 50) + AES_KEXPAND0(44, 50, 52) + AES_KEXPAND2(46, 52, 54) + AES_KEXPAND1(48, 54, 0x6, 56) + AES_KEXPAND2(50, 56, 58) + + std %f8, [%o1 + 0x00] + std %f10, [%o1 + 0x08] + std %f12, [%o1 + 0x10] + std %f14, [%o1 + 0x18] + std %f16, [%o1 + 0x20] + std %f18, [%o1 + 0x28] + std %f20, [%o1 + 0x30] + std %f22, [%o1 + 0x38] + std %f24, [%o1 + 0x40] + std %f26, [%o1 + 0x48] + std %f28, [%o1 + 0x50] + std %f30, [%o1 + 0x58] + std %f32, [%o1 + 0x60] + std %f34, [%o1 + 0x68] + std %f36, [%o1 + 0x70] + std %f38, [%o1 + 0x78] + std %f40, [%o1 + 0x80] + std %f42, [%o1 + 0x88] + std %f44, [%o1 + 0x90] + std %f46, [%o1 + 0x98] + std %f48, [%o1 + 0xa0] + std %f50, [%o1 + 0xa8] + std %f52, [%o1 + 0xb0] + std %f54, [%o1 + 0xb8] + std %f56, [%o1 + 0xc0] + ba,pt %xcc, 80f + std %f58, [%o1 + 0xc8] + +1: + /* 192-bit key expansion */ + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + + std %f4, [%o1 + 0x00] + add %o1, 0x08, %o1 + + AES_KEXPAND1(0, 4, 0x0, 6) + AES_KEXPAND2(2, 6, 8) + AES_KEXPAND2(4, 8, 10) + AES_KEXPAND1(6, 10, 0x1, 12) + AES_KEXPAND2(8, 12, 14) + AES_KEXPAND2(10, 14, 16) + AES_KEXPAND1(12, 16, 0x2, 18) + AES_KEXPAND2(14, 18, 20) + AES_KEXPAND2(16, 20, 22) + AES_KEXPAND1(18, 22, 0x3, 24) + AES_KEXPAND2(20, 24, 26) + AES_KEXPAND2(22, 26, 28) + AES_KEXPAND1(24, 28, 0x4, 30) + AES_KEXPAND2(26, 30, 32) + AES_KEXPAND2(28, 32, 34) + AES_KEXPAND1(30, 34, 0x5, 36) + AES_KEXPAND2(32, 36, 38) + AES_KEXPAND2(34, 38, 40) + AES_KEXPAND1(36, 40, 0x6, 42) + AES_KEXPAND2(38, 42, 44) + AES_KEXPAND2(40, 44, 46) + AES_KEXPAND1(42, 46, 0x7, 48) + AES_KEXPAND2(44, 48, 50) + + std %f6, [%o1 + 0x00] + std %f8, [%o1 + 0x08] + std %f10, [%o1 + 0x10] + std %f12, [%o1 + 0x18] + std %f14, [%o1 + 0x20] + std %f16, [%o1 + 0x28] + std %f18, [%o1 + 0x30] + std %f20, [%o1 + 0x38] + std %f22, [%o1 + 0x40] + std %f24, [%o1 + 0x48] + std %f26, [%o1 + 0x50] + std %f28, [%o1 + 0x58] + std %f30, [%o1 + 0x60] + std %f32, [%o1 + 0x68] + std %f34, [%o1 + 0x70] + std %f36, [%o1 + 0x78] + std %f38, [%o1 + 0x80] + std %f40, [%o1 + 0x88] + std %f42, [%o1 + 0x90] + std %f44, [%o1 + 0x98] + std %f46, [%o1 + 0xa0] + std %f48, [%o1 + 0xa8] + ba,pt %xcc, 80f + std %f50, [%o1 + 0xb0] + +2: + /* 128-bit key expansion */ + AES_KEXPAND1(0, 2, 0x0, 4) + AES_KEXPAND2(2, 4, 6) + AES_KEXPAND1(4, 6, 0x1, 8) + AES_KEXPAND2(6, 8, 10) + AES_KEXPAND1(8, 10, 0x2, 12) + AES_KEXPAND2(10, 12, 14) + AES_KEXPAND1(12, 14, 0x3, 16) + AES_KEXPAND2(14, 16, 18) + AES_KEXPAND1(16, 18, 0x4, 20) + AES_KEXPAND2(18, 20, 22) + AES_KEXPAND1(20, 22, 0x5, 24) + AES_KEXPAND2(22, 24, 26) + AES_KEXPAND1(24, 26, 0x6, 28) + AES_KEXPAND2(26, 28, 30) + AES_KEXPAND1(28, 30, 0x7, 32) + AES_KEXPAND2(30, 32, 34) + AES_KEXPAND1(32, 34, 0x8, 36) + AES_KEXPAND2(34, 36, 38) + AES_KEXPAND1(36, 38, 0x9, 40) + AES_KEXPAND2(38, 40, 42) + + std %f4, [%o1 + 0x00] + std %f6, [%o1 + 0x08] + std %f8, [%o1 + 0x10] + std %f10, [%o1 + 0x18] + std %f12, [%o1 + 0x20] + std %f14, [%o1 + 0x28] + std %f16, [%o1 + 0x30] + std %f18, [%o1 + 0x38] + std %f20, [%o1 + 0x40] + std %f22, [%o1 + 0x48] + std %f24, [%o1 + 0x50] + std %f26, [%o1 + 0x58] + std %f28, [%o1 + 0x60] + std %f30, [%o1 + 0x68] + std %f32, [%o1 + 0x70] + std %f34, [%o1 + 0x78] + std %f36, [%o1 + 0x80] + std %f38, [%o1 + 0x88] + std %f40, [%o1 + 0x90] + std %f42, [%o1 + 0x98] +80: + retl + VISExit +ENDPROC(aes_sparc64_key_expand) + + .align 32 +ENTRY(aes_sparc64_encrypt_128) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + ENCRYPT_128(12, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_encrypt_128) + + .align 32 +ENTRY(aes_sparc64_encrypt_192) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + + + ENCRYPT_128(12, 4, 6, 0, 2) + + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(aes_sparc64_encrypt_192) + + .align 32 +ENTRY(aes_sparc64_encrypt_256) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + ldd [%o0 + 0x10], %f8 + + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f8 + + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + + ENCRYPT_128(12, 4, 6, 0, 2) + + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(aes_sparc64_encrypt_256) + + .align 32 +ENTRY(aes_sparc64_decrypt_128) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xa0], %f8 + ldd [%o0 + 0xa8], %f10 + ldd [%o0 + 0x98], %f12 + ldd [%o0 + 0x90], %f14 + ldd [%o0 + 0x88], %f16 + ldd [%o0 + 0x80], %f18 + ldd [%o0 + 0x78], %f20 + ldd [%o0 + 0x70], %f22 + ldd [%o0 + 0x68], %f24 + ldd [%o0 + 0x60], %f26 + ldd [%o0 + 0x58], %f28 + ldd [%o0 + 0x50], %f30 + ldd [%o0 + 0x48], %f32 + ldd [%o0 + 0x40], %f34 + ldd [%o0 + 0x38], %f36 + ldd [%o0 + 0x30], %f38 + ldd [%o0 + 0x28], %f40 + ldd [%o0 + 0x20], %f42 + ldd [%o0 + 0x18], %f44 + ldd [%o0 + 0x10], %f46 + ldd [%o0 + 0x08], %f48 + ldd [%o0 + 0x00], %f50 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + DECRYPT_128(12, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_decrypt_192) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xc0], %f8 + ldd [%o0 + 0xc8], %f10 + ldd [%o0 + 0xb8], %f12 + ldd [%o0 + 0xb0], %f14 + ldd [%o0 + 0xa8], %f16 + ldd [%o0 + 0xa0], %f18 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + ldd [%o0 + 0x98], %f20 + ldd [%o0 + 0x90], %f22 + ldd [%o0 + 0x88], %f24 + ldd [%o0 + 0x80], %f26 + DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2) + ldd [%o0 + 0x78], %f28 + ldd [%o0 + 0x70], %f30 + ldd [%o0 + 0x68], %f32 + ldd [%o0 + 0x60], %f34 + ldd [%o0 + 0x58], %f36 + ldd [%o0 + 0x50], %f38 + ldd [%o0 + 0x48], %f40 + ldd [%o0 + 0x40], %f42 + ldd [%o0 + 0x38], %f44 + ldd [%o0 + 0x30], %f46 + ldd [%o0 + 0x28], %f48 + ldd [%o0 + 0x20], %f50 + ldd [%o0 + 0x18], %f52 + ldd [%o0 + 0x10], %f54 + ldd [%o0 + 0x08], %f56 + ldd [%o0 + 0x00], %f58 + DECRYPT_128(20, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_decrypt_256) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xe0], %f8 + ldd [%o0 + 0xe8], %f10 + ldd [%o0 + 0xd8], %f12 + ldd [%o0 + 0xd0], %f14 + ldd [%o0 + 0xc8], %f16 + fxor %f8, %f4, %f4 + ldd [%o0 + 0xc0], %f18 + fxor %f10, %f6, %f6 + ldd [%o0 + 0xb8], %f20 + AES_DROUND23(12, 4, 6, 2) + ldd [%o0 + 0xb0], %f22 + AES_DROUND01(14, 4, 6, 0) + ldd [%o0 + 0xa8], %f24 + AES_DROUND23(16, 0, 2, 6) + ldd [%o0 + 0xa0], %f26 + AES_DROUND01(18, 0, 2, 4) + ldd [%o0 + 0x98], %f12 + AES_DROUND23(20, 4, 6, 2) + ldd [%o0 + 0x90], %f14 + AES_DROUND01(22, 4, 6, 0) + ldd [%o0 + 0x88], %f16 + AES_DROUND23(24, 0, 2, 6) + ldd [%o0 + 0x80], %f18 + AES_DROUND01(26, 0, 2, 4) + ldd [%o0 + 0x78], %f20 + AES_DROUND23(12, 4, 6, 2) + ldd [%o0 + 0x70], %f22 + AES_DROUND01(14, 4, 6, 0) + ldd [%o0 + 0x68], %f24 + AES_DROUND23(16, 0, 2, 6) + ldd [%o0 + 0x60], %f26 + AES_DROUND01(18, 0, 2, 4) + ldd [%o0 + 0x58], %f28 + AES_DROUND23(20, 4, 6, 2) + ldd [%o0 + 0x50], %f30 + AES_DROUND01(22, 4, 6, 0) + ldd [%o0 + 0x48], %f32 + AES_DROUND23(24, 0, 2, 6) + ldd [%o0 + 0x40], %f34 + AES_DROUND01(26, 0, 2, 4) + ldd [%o0 + 0x38], %f36 + AES_DROUND23(28, 4, 6, 2) + ldd [%o0 + 0x30], %f38 + AES_DROUND01(30, 4, 6, 0) + ldd [%o0 + 0x28], %f40 + AES_DROUND23(32, 0, 2, 6) + ldd [%o0 + 0x20], %f42 + AES_DROUND01(34, 0, 2, 4) + ldd [%o0 + 0x18], %f44 + AES_DROUND23(36, 4, 6, 2) + ldd [%o0 + 0x10], %f46 + AES_DROUND01(38, 4, 6, 0) + ldd [%o0 + 0x08], %f48 + AES_DROUND23(40, 0, 2, 6) + ldd [%o0 + 0x00], %f50 + AES_DROUND01(42, 0, 2, 4) + AES_DROUND23(44, 4, 6, 2) + AES_DROUND01(46, 4, 6, 0) + AES_DROUND23_L(48, 0, 2, 6) + AES_DROUND01_L(50, 0, 2, 4) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_128) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + retl + ldd [%o0 + 0xa8], %f46 +ENDPROC(aes_sparc64_load_encrypt_keys_128) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_192) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 +ENDPROC(aes_sparc64_load_encrypt_keys_192) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_256) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + retl + ldd [%o0 + 0xe8], %f62 +ENDPROC(aes_sparc64_load_encrypt_keys_256) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_128) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x98], %f8 + ldd [%o0 + 0x90], %f10 + ldd [%o0 + 0x88], %f12 + ldd [%o0 + 0x80], %f14 + ldd [%o0 + 0x78], %f16 + ldd [%o0 + 0x70], %f18 + ldd [%o0 + 0x68], %f20 + ldd [%o0 + 0x60], %f22 + ldd [%o0 + 0x58], %f24 + ldd [%o0 + 0x50], %f26 + ldd [%o0 + 0x48], %f28 + ldd [%o0 + 0x40], %f30 + ldd [%o0 + 0x38], %f32 + ldd [%o0 + 0x30], %f34 + ldd [%o0 + 0x28], %f36 + ldd [%o0 + 0x20], %f38 + ldd [%o0 + 0x18], %f40 + ldd [%o0 + 0x10], %f42 + ldd [%o0 + 0x08], %f44 + retl + ldd [%o0 + 0x00], %f46 +ENDPROC(aes_sparc64_load_decrypt_keys_128) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_192) + /* %o0=key */ + VISEntry + ldd [%o0 + 0xb8], %f8 + ldd [%o0 + 0xb0], %f10 + ldd [%o0 + 0xa8], %f12 + ldd [%o0 + 0xa0], %f14 + ldd [%o0 + 0x98], %f16 + ldd [%o0 + 0x90], %f18 + ldd [%o0 + 0x88], %f20 + ldd [%o0 + 0x80], %f22 + ldd [%o0 + 0x78], %f24 + ldd [%o0 + 0x70], %f26 + ldd [%o0 + 0x68], %f28 + ldd [%o0 + 0x60], %f30 + ldd [%o0 + 0x58], %f32 + ldd [%o0 + 0x50], %f34 + ldd [%o0 + 0x48], %f36 + ldd [%o0 + 0x40], %f38 + ldd [%o0 + 0x38], %f40 + ldd [%o0 + 0x30], %f42 + ldd [%o0 + 0x28], %f44 + ldd [%o0 + 0x20], %f46 + ldd [%o0 + 0x18], %f48 + ldd [%o0 + 0x10], %f50 + ldd [%o0 + 0x08], %f52 + retl + ldd [%o0 + 0x00], %f54 +ENDPROC(aes_sparc64_load_decrypt_keys_192) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_256) + /* %o0=key */ + VISEntry + ldd [%o0 + 0xd8], %f8 + ldd [%o0 + 0xd0], %f10 + ldd [%o0 + 0xc8], %f12 + ldd [%o0 + 0xc0], %f14 + ldd [%o0 + 0xb8], %f16 + ldd [%o0 + 0xb0], %f18 + ldd [%o0 + 0xa8], %f20 + ldd [%o0 + 0xa0], %f22 + ldd [%o0 + 0x98], %f24 + ldd [%o0 + 0x90], %f26 + ldd [%o0 + 0x88], %f28 + ldd [%o0 + 0x80], %f30 + ldd [%o0 + 0x78], %f32 + ldd [%o0 + 0x70], %f34 + ldd [%o0 + 0x68], %f36 + ldd [%o0 + 0x60], %f38 + ldd [%o0 + 0x58], %f40 + ldd [%o0 + 0x50], %f42 + ldd [%o0 + 0x48], %f44 + ldd [%o0 + 0x40], %f46 + ldd [%o0 + 0x38], %f48 + ldd [%o0 + 0x30], %f50 + ldd [%o0 + 0x28], %f52 + ldd [%o0 + 0x20], %f54 + ldd [%o0 + 0x18], %f56 + ldd [%o0 + 0x10], %f58 + ldd [%o0 + 0x08], %f60 + retl + ldd [%o0 + 0x00], %f62 +ENDPROC(aes_sparc64_load_decrypt_keys_256) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + ENCRYPT_128(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_encrypt_128) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + ENCRYPT_192(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_encrypt_192) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + ENCRYPT_256_2(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f0, [%o2 + 0x10] + std %f2, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + ENCRYPT_256(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_encrypt_256) + + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_128) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_128(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_192) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_192(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_256) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 + sub %o0, 0xf0, %o0 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + DECRYPT_256_2(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f0, [%o2 + 0x10] + std %f2, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_256(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + ENCRYPT_128(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt_128) + + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + ENCRYPT_192(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt_192) + + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + ENCRYPT_256(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt_256) + + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_128) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_128(8, 4, 6, 0, 2) + MOVXTOD_O0_F0 + MOVXTOD_O5_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o5 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_192) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_192(8, 4, 6, 0, 2) + MOVXTOD_O0_F0 + MOVXTOD_O5_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o5 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_256) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_256(8, 4, 6, 0, 2) + MOVXTOD_O0_F0 + MOVXTOD_O5_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o5 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 + ldx [%o0 + 0x00], %g1 + be 10f + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_128(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: stx %g3, [%o4 + 0x00] + retl + stx %g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_128) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 + ldx [%o0 + 0x00], %g1 + be 10f + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_192(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: stx %g3, [%o4 + 0x00] + retl + stx %g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_192) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 + ldx [%o0 + 0x00], %g1 + be 10f + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_256_2(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop + ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + ldd [%o0 + 0xe8], %f62 +10: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_256(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: stx %g3, [%o4 + 0x00] + retl + stx %g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_256) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c new file mode 100644 index 000000000000..8f1c9980f637 --- /dev/null +++ b/arch/sparc/crypto/aes_glue.c @@ -0,0 +1,477 @@ +/* Glue code for AES encryption optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/aesni-intel_glue.c + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban <adrian.hoban@intel.com> + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +struct aes_ops { + void (*encrypt)(const u64 *key, const u32 *input, u32 *output); + void (*decrypt)(const u64 *key, const u32 *input, u32 *output); + void (*load_encrypt_keys)(const u64 *key); + void (*load_decrypt_keys)(const u64 *key); + void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len); + void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len); + void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); + void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); + void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); +}; + +struct crypto_sparc64_aes_ctx { + struct aes_ops *ops; + u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; + u32 key_length; + u32 expanded_key_length; +}; + +extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, + u32 *output); + +extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, + u32 *output); + +extern void aes_sparc64_load_encrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_256(const u64 *key); + +extern void aes_sparc64_load_decrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_256(const u64 *key); + +extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len); + +extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len); + +extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); +extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); +extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +struct aes_ops aes128_ops = { + .encrypt = aes_sparc64_encrypt_128, + .decrypt = aes_sparc64_decrypt_128, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128, + .ecb_encrypt = aes_sparc64_ecb_encrypt_128, + .ecb_decrypt = aes_sparc64_ecb_decrypt_128, + .cbc_encrypt = aes_sparc64_cbc_encrypt_128, + .cbc_decrypt = aes_sparc64_cbc_decrypt_128, + .ctr_crypt = aes_sparc64_ctr_crypt_128, +}; + +struct aes_ops aes192_ops = { + .encrypt = aes_sparc64_encrypt_192, + .decrypt = aes_sparc64_decrypt_192, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192, + .ecb_encrypt = aes_sparc64_ecb_encrypt_192, + .ecb_decrypt = aes_sparc64_ecb_decrypt_192, + .cbc_encrypt = aes_sparc64_cbc_encrypt_192, + .cbc_decrypt = aes_sparc64_cbc_decrypt_192, + .ctr_crypt = aes_sparc64_ctr_crypt_192, +}; + +struct aes_ops aes256_ops = { + .encrypt = aes_sparc64_encrypt_256, + .decrypt = aes_sparc64_decrypt_256, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256, + .ecb_encrypt = aes_sparc64_ecb_encrypt_256, + .ecb_decrypt = aes_sparc64_ecb_decrypt_256, + .cbc_encrypt = aes_sparc64_cbc_encrypt_256, + .cbc_decrypt = aes_sparc64_cbc_decrypt_256, + .ctr_crypt = aes_sparc64_ctr_crypt_256, +}; + +extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, + unsigned int key_len); + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->expanded_key_length = 0xb0; + ctx->ops = &aes128_ops; + break; + + case AES_KEYSIZE_192: + ctx->expanded_key_length = 0xd0; + ctx->ops = &aes192_ops; + break; + + case AES_KEYSIZE_256: + ctx->expanded_key_length = 0xf0; + ctx->ops = &aes256_ops; + break; + + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len); + ctx->key_length = key_len; + + return 0; +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); +} + +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ecb_encrypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u64 *key_end; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_decrypt_keys(&ctx->key[0]); + key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ecb_decrypt(key_end, + (const u64 *) walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, block_len); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->cbc_encrypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u64 *key_end; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_decrypt_keys(&ctx->key[0]); + key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->cbc_decrypt(key_end, + (const u64 *) walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + + return err; +} + +static int ctr_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ctr_crypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "aes", + .cra_driver_name = "aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +} }; + +static bool __init sparc64_has_aes_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_AES)) + return false; + + return true; +} + +static int __init aes_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_aes_opcode()) { + pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 aes opcodes not available.\n"); + return -ENODEV; +} + +static void __exit aes_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(aes_sparc64_mod_init); +module_exit(aes_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); + +MODULE_ALIAS("aes"); diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S new file mode 100644 index 000000000000..cc39553a4e43 --- /dev/null +++ b/arch/sparc/crypto/camellia_asm.S @@ -0,0 +1,563 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) + +#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ + CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ + CAMELLIA_FLI(KEY_BASE + 14, I1, I1) + + .data + + .align 8 +SIGMA: .xword 0xA09E667F3BCC908B + .xword 0xB67AE8584CAA73B2 + .xword 0xC6EF372FE94F82BE + .xword 0x54FF53A5F1D36F1C + .xword 0x10E527FADE682D1D + .xword 0xB05688C2B3E6C1FD + + .text + + .align 32 +ENTRY(camellia_sparc64_key_expand) + /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */ + VISEntry + ld [%o0 + 0x00], %f0 ! i0, k[0] + ld [%o0 + 0x04], %f1 ! i1, k[1] + ld [%o0 + 0x08], %f2 ! i2, k[2] + ld [%o0 + 0x0c], %f3 ! i3, k[3] + std %f0, [%o1 + 0x00] ! k[0, 1] + fsrc2 %f0, %f28 + std %f2, [%o1 + 0x08] ! k[2, 3] + cmp %o2, 16 + be 10f + fsrc2 %f2, %f30 + + ld [%o0 + 0x10], %f0 + ld [%o0 + 0x14], %f1 + std %f0, [%o1 + 0x20] ! k[8, 9] + cmp %o2, 24 + fone %f10 + be,a 1f + fxor %f10, %f0, %f2 + ld [%o0 + 0x18], %f2 + ld [%o0 + 0x1c], %f3 +1: + std %f2, [%o1 + 0x28] ! k[10, 11] + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + +10: + sethi %hi(SIGMA), %g3 + or %g3, %lo(SIGMA), %g3 + ldd [%g3 + 0x00], %f16 + ldd [%g3 + 0x08], %f18 + ldd [%g3 + 0x10], %f20 + ldd [%g3 + 0x18], %f22 + ldd [%g3 + 0x20], %f24 + ldd [%g3 + 0x28], %f26 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + CAMELLIA_F(20, 2, 0, 2) + CAMELLIA_F(22, 0, 2, 0) + +#define ROTL128(S01, S23, TMP1, TMP2, N) \ + srlx S01, (64 - N), TMP1; \ + sllx S01, N, S01; \ + srlx S23, (64 - N), TMP2; \ + sllx S23, N, S23; \ + or S01, TMP2, S01; \ + or S23, TMP1, S23 + + cmp %o2, 16 + bne 1f + nop + /* 128-bit key */ + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x60] ! k[24, 25] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xc0] ! k[48, 49] + stx %o5, [%o1 + 0xc8] ! k[50, 51] + + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + + ba,pt %xcc, 2f + mov (3 * 16 * 4), %o0 + +1: + /* 192-bit or 256-bit key */ + std %f0, [%o1 + 0x30] ! k[12, 13] + std %f2, [%o1 + 0x38] ! k[14, 15] + ldd [%o1 + 0x20], %f4 ! k[ 8, 9] + ldd [%o1 + 0x28], %f6 ! k[10, 11] + fxor %f0, %f4, %f0 + fxor %f2, %f6, %f2 + CAMELLIA_F(24, 2, 0, 2) + CAMELLIA_F(26, 0, 2, 0) + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 51) + stx %o4, [%o1 + 0x100] ! k[64, 65] + stx %o5, [%o1 + 0x108] ! k[66, 67] + ldx [%o1 + 0x20], %o4 ! k[ 8, 9] + ldx [%o1 + 0x28], %o5 ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xd0] ! k[52, 53] + stx %o5, [%o1 + 0xd8] ! k[54, 55] + ldx [%o1 + 0x30], %o4 ! k[12, 13] + ldx [%o1 + 0x38], %o5 ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + srlx %o4, 32, %g2 + srlx %o5, 32, %g3 + stw %o4, [%o1 + 0xc0] ! k[48] + stw %g3, [%o1 + 0xc4] ! k[49] + stw %o5, [%o1 + 0xc8] ! k[50] + stw %g2, [%o1 + 0xcc] ! k[51] + ROTL128(%o4, %o5, %g2, %g3, 49) + stx %o4, [%o1 + 0xe0] ! k[56, 57] + stx %o5, [%o1 + 0xe8] ! k[58, 59] + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 45) + stx %o4, [%o1 + 0x60] ! k[24, 25] + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xf0] ! k[60, 61] + stx %o5, [%o1 + 0xf8] ! k[62, 63] + mov (4 * 16 * 4), %o0 +2: + add %o1, %o0, %o1 + ldd [%o1 + 0x00], %f0 + ldd [%o1 + 0x08], %f2 + std %f0, [%o3 + 0x00] + std %f2, [%o3 + 0x08] + add %o3, 0x10, %o3 +1: + sub %o1, (16 * 4), %o1 + ldd [%o1 + 0x38], %f0 + ldd [%o1 + 0x30], %f2 + ldd [%o1 + 0x28], %f4 + ldd [%o1 + 0x20], %f6 + ldd [%o1 + 0x18], %f8 + ldd [%o1 + 0x10], %f10 + std %f0, [%o3 + 0x00] + std %f2, [%o3 + 0x08] + std %f4, [%o3 + 0x10] + std %f6, [%o3 + 0x18] + std %f8, [%o3 + 0x20] + std %f10, [%o3 + 0x28] + + ldd [%o1 + 0x08], %f0 + ldd [%o1 + 0x00], %f2 + std %f0, [%o3 + 0x30] + std %f2, [%o3 + 0x38] + subcc %o0, (16 * 4), %o0 + bne,pt %icc, 1b + add %o3, (16 * 4), %o3 + + std %f2, [%o3 - 0x10] + std %f0, [%o3 - 0x08] + + retl + VISExit +ENDPROC(camellia_sparc64_key_expand) + + .align 32 +ENTRY(camellia_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len */ + VISEntry + + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + + cmp %o3, 16 + fxor %f4, %f0, %f0 + be 1f + fxor %f6, %f2, %f2 + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + add %o0, 0x40, %o0 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + +1: + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + + st %f2, [%o2 + 0x00] + st %f3, [%o2 + 0x04] + st %f0, [%o2 + 0x08] + st %f1, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(camellia_sparc64_crypt) + + .align 32 +ENTRY(camellia_sparc64_load_keys) + /* %o0=key, %o1=key_len */ + VISEntry + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 +ENDPROC(camellia_sparc64_load_keys) + + .align 32 +ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key */ +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + retl + nop +ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key */ +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + retl + nop +ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f60 + fxor %f54, %f0, %f62 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f60 + fxor %f22, %f0, %f62 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds) diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c new file mode 100644 index 000000000000..42905c084299 --- /dev/null +++ b/arch/sparc/crypto/camellia_glue.c @@ -0,0 +1,322 @@ +/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 + +struct camellia_sparc64_ctx { + u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + int key_len; +}; + +extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key, + unsigned int key_len, u64 *decrypt_key); + +static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, + unsigned int key_len) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u32 *in_key = (const u32 *) _in_key; + u32 *flags = &tfm->crt_flags; + + if (key_len != 16 && key_len != 24 && key_len != 32) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + ctx->key_len = key_len; + + camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0], + key_len, &ctx->decrypt_key[0]); + return 0; +} + +extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, + u32 *output, unsigned int key_len); + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + + camellia_sparc64_crypt(&ctx->encrypt_key[0], + (const u32 *) src, + (u32 *) dst, ctx->key_len); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + + camellia_sparc64_crypt(&ctx->decrypt_key[0], + (const u32 *) src, + (u32 *) dst, ctx->key_len); +} + +extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len); + +typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, + const u64 *key); + +extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; +extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; + +#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + ecb_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_ecb_crypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_ecb_crypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + key = &ctx->encrypt_key[0]; + else + key = &ctx->decrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, false); +} + +typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, + const u64 *key, u64 *iv); + +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + cbc_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_cbc_encrypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_cbc_encrypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + key = &ctx->encrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key, + (u64 *) walk.iv); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + cbc_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_cbc_decrypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_cbc_decrypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + key = &ctx->decrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key, + (u64 *) walk.iv); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "camellia", + .cra_driver_name = "camellia-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, + .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, + .cia_setkey = camellia_set_key, + .cia_encrypt = camellia_encrypt, + .cia_decrypt = camellia_decrypt + } + } +}, { + .cra_name = "ecb(camellia)", + .cra_driver_name = "ecb-camellia-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(camellia)", + .cra_driver_name = "cbc-camellia-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +} +}; + +static bool __init sparc64_has_camellia_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CAMELLIA)) + return false; + + return true; +} + +static int __init camellia_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_camellia_opcode()) { + pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 camellia opcodes not available.\n"); + return -ENODEV; +} + +static void __exit camellia_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(camellia_sparc64_mod_init); +module_exit(camellia_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); + +MODULE_ALIAS("aes"); diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S new file mode 100644 index 000000000000..2b1976e765b5 --- /dev/null +++ b/arch/sparc/crypto/crc32c_asm.S @@ -0,0 +1,20 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> +#include <asm/asi.h> + +#include "opcodes.h" + +ENTRY(crc32c_sparc64) + /* %o0=crc32p, %o1=data_ptr, %o2=len */ + VISEntryHalf + lda [%o0] ASI_PL, %f1 +1: ldd [%o1], %f2 + CRC32C(0,2,0) + subcc %o2, 8, %o2 + bne,pt %icc, 1b + add %o1, 0x8, %o1 + sta %f1, [%o0] ASI_PL + VISExitHalf +2: retl + nop +ENDPROC(crc32c_sparc64) diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c new file mode 100644 index 000000000000..0bd89cea8d8e --- /dev/null +++ b/arch/sparc/crypto/crc32c_glue.c @@ -0,0 +1,179 @@ +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang <austin_zhang@linux.intel.com> + * Kent Liu <kent.liu@intel.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/crc32.h> + +#include <crypto/internal/hash.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *(__le32 *)mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32c_sparc64_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); + +static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len) +{ + unsigned int asm_len; + + asm_len = len & ~7U; + if (asm_len) { + crc32c_sparc64(crcp, data, asm_len); + data += asm_len / 8; + len -= asm_len; + } + if (len) + *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len); +} + +static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + crc32c_compute(crcp, (const u64 *) data, len); + + return 0; +} + +static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + u32 tmp = *crcp; + + crc32c_compute(&tmp, (const u64 *) data, len); + + *(__le32 *) out = ~cpu_to_le32(tmp); + return 0; +} + +static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *) out = ~cpu_to_le32p(crcp); + return 0; +} + +static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + + return 0; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +static struct shash_alg alg = { + .setkey = crc32c_sparc64_setkey, + .init = crc32c_sparc64_init, + .update = crc32c_sparc64_update, + .final = crc32c_sparc64_final, + .finup = crc32c_sparc64_finup, + .digest = crc32c_sparc64_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_init = crc32c_sparc64_cra_init, + } +}; + +static bool __init sparc64_has_crc32c_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CRC32C)) + return false; + + return true; +} + +static int __init crc32c_sparc64_mod_init(void) +{ + if (sparc64_has_crc32c_opcode()) { + pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 crc32c opcode not available.\n"); + return -ENODEV; +} + +static void __exit crc32c_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32c_sparc64_mod_init); +module_exit(crc32c_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); + +MODULE_ALIAS("crc32c"); diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c new file mode 100644 index 000000000000..5f5724a0ae22 --- /dev/null +++ b/arch/sparc/crypto/crop_devid.c @@ -0,0 +1,14 @@ +#include <linux/module.h> +#include <linux/of_device.h> + +/* This is a dummy device table linked into all of the crypto + * opcode drivers. It serves to trigger the module autoloading + * mechanisms in userspace which scan the OF device tree and + * load any modules which have device table entries that + * match OF device nodes. + */ +static const struct of_device_id crypto_opcode_match[] = { + { .name = "cpu", .compatible = "sun4v", }, + {}, +}; +MODULE_DEVICE_TABLE(of, crypto_opcode_match); diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S new file mode 100644 index 000000000000..30b6e90b28b2 --- /dev/null +++ b/arch/sparc/crypto/des_asm.S @@ -0,0 +1,418 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + + .align 32 +ENTRY(des_sparc64_key_expand) + /* %o0=input_key, %o1=output_key */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + DES_KEXPAND(0, 0, 0) + DES_KEXPAND(0, 1, 2) + DES_KEXPAND(2, 3, 6) + DES_KEXPAND(2, 2, 4) + DES_KEXPAND(6, 3, 10) + DES_KEXPAND(6, 2, 8) + DES_KEXPAND(10, 3, 14) + DES_KEXPAND(10, 2, 12) + DES_KEXPAND(14, 1, 16) + DES_KEXPAND(16, 3, 20) + DES_KEXPAND(16, 2, 18) + DES_KEXPAND(20, 3, 24) + DES_KEXPAND(20, 2, 22) + DES_KEXPAND(24, 3, 28) + DES_KEXPAND(24, 2, 26) + DES_KEXPAND(28, 1, 30) + std %f0, [%o1 + 0x00] + std %f2, [%o1 + 0x08] + std %f4, [%o1 + 0x10] + std %f6, [%o1 + 0x18] + std %f8, [%o1 + 0x20] + std %f10, [%o1 + 0x28] + std %f12, [%o1 + 0x30] + std %f14, [%o1 + 0x38] + std %f16, [%o1 + 0x40] + std %f18, [%o1 + 0x48] + std %f20, [%o1 + 0x50] + std %f22, [%o1 + 0x58] + std %f24, [%o1 + 0x60] + std %f26, [%o1 + 0x68] + std %f28, [%o1 + 0x70] + std %f30, [%o1 + 0x78] + retl + VISExitHalf +ENDPROC(des_sparc64_key_expand) + + .align 32 +ENTRY(des_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ldd [%o1 + 0x00], %f32 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o2 + 0x00] + retl + VISExit +ENDPROC(des_sparc64_crypt) + + .align 32 +ENTRY(des_sparc64_load_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + retl + ldd [%o0 + 0x78], %f30 +ENDPROC(des_sparc64_load_keys) + + .align 32 +ENTRY(des_sparc64_ecb_crypt) + /* %o0=input, %o1=output, %o2=len */ +1: ldd [%o0 + 0x00], %f32 + add %o0, 0x08, %o0 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o1 + 0x00] + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + nop +ENDPROC(des_sparc64_ecb_crypt) + + .align 32 +ENTRY(des_sparc64_cbc_encrypt) + /* %o0=input, %o1=output, %o2=len, %o3=IV */ + ldd [%o3 + 0x00], %f32 +1: ldd [%o0 + 0x00], %f34 + fxor %f32, %f34, %f32 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o1 + 0x00] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + std %f32, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_encrypt) + + .align 32 +ENTRY(des_sparc64_cbc_decrypt) + /* %o0=input, %o1=output, %o2=len, %o3=IV */ + ldd [%o3 + 0x00], %f34 +1: ldd [%o0 + 0x00], %f36 + DES_IP(36, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + fxor %f32, %f34, %f32 + fsrc2 %f36, %f34 + std %f32, [%o1 + 0x00] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + std %f36, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_decrypt) + + .align 32 +ENTRY(des3_ede_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ldd [%o1 + 0x00], %f32 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + ldd [%o0 + 0x80], %f0 + ldd [%o0 + 0x88], %f2 + DES_ROUND(4, 6, 32, 32) + ldd [%o0 + 0x90], %f4 + ldd [%o0 + 0x98], %f6 + DES_ROUND(8, 10, 32, 32) + ldd [%o0 + 0xa0], %f8 + ldd [%o0 + 0xa8], %f10 + DES_ROUND(12, 14, 32, 32) + ldd [%o0 + 0xb0], %f12 + ldd [%o0 + 0xb8], %f14 + DES_ROUND(16, 18, 32, 32) + ldd [%o0 + 0xc0], %f16 + ldd [%o0 + 0xc8], %f18 + DES_ROUND(20, 22, 32, 32) + ldd [%o0 + 0xd0], %f20 + ldd [%o0 + 0xd8], %f22 + DES_ROUND(24, 26, 32, 32) + ldd [%o0 + 0xe0], %f24 + ldd [%o0 + 0xe8], %f26 + DES_ROUND(28, 30, 32, 32) + ldd [%o0 + 0xf0], %f28 + ldd [%o0 + 0xf8], %f30 + DES_IIP(32, 32) + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + ldd [%o0 + 0x100], %f0 + ldd [%o0 + 0x108], %f2 + DES_ROUND(4, 6, 32, 32) + ldd [%o0 + 0x110], %f4 + ldd [%o0 + 0x118], %f6 + DES_ROUND(8, 10, 32, 32) + ldd [%o0 + 0x120], %f8 + ldd [%o0 + 0x128], %f10 + DES_ROUND(12, 14, 32, 32) + ldd [%o0 + 0x130], %f12 + ldd [%o0 + 0x138], %f14 + DES_ROUND(16, 18, 32, 32) + ldd [%o0 + 0x140], %f16 + ldd [%o0 + 0x148], %f18 + DES_ROUND(20, 22, 32, 32) + ldd [%o0 + 0x150], %f20 + ldd [%o0 + 0x158], %f22 + DES_ROUND(24, 26, 32, 32) + ldd [%o0 + 0x160], %f24 + ldd [%o0 + 0x168], %f26 + DES_ROUND(28, 30, 32, 32) + ldd [%o0 + 0x170], %f28 + ldd [%o0 + 0x178], %f30 + DES_IIP(32, 32) + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + + std %f32, [%o2 + 0x00] + retl + VISExit +ENDPROC(des3_ede_sparc64_crypt) + + .align 32 +ENTRY(des3_ede_sparc64_load_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + ldd [%o0 + 0x80], %f32 + ldd [%o0 + 0x88], %f34 + ldd [%o0 + 0x90], %f36 + ldd [%o0 + 0x98], %f38 + ldd [%o0 + 0xa0], %f40 + ldd [%o0 + 0xa8], %f42 + ldd [%o0 + 0xb0], %f44 + ldd [%o0 + 0xb8], %f46 + ldd [%o0 + 0xc0], %f48 + ldd [%o0 + 0xc8], %f50 + ldd [%o0 + 0xd0], %f52 + ldd [%o0 + 0xd8], %f54 + ldd [%o0 + 0xe0], %f56 + retl + ldd [%o0 + 0xe8], %f58 +ENDPROC(des3_ede_sparc64_load_keys) + +#define DES3_LOOP_BODY(X) \ + DES_IP(X, X) \ + DES_ROUND(0, 2, X, X) \ + DES_ROUND(4, 6, X, X) \ + DES_ROUND(8, 10, X, X) \ + DES_ROUND(12, 14, X, X) \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0xf0], %f16; \ + ldd [%o0 + 0xf8], %f18; \ + DES_ROUND(20, 22, X, X) \ + ldd [%o0 + 0x100], %f20; \ + ldd [%o0 + 0x108], %f22; \ + DES_ROUND(24, 26, X, X) \ + ldd [%o0 + 0x110], %f24; \ + ldd [%o0 + 0x118], %f26; \ + DES_ROUND(28, 30, X, X) \ + ldd [%o0 + 0x120], %f28; \ + ldd [%o0 + 0x128], %f30; \ + DES_IIP(X, X) \ + DES_IP(X, X) \ + DES_ROUND(32, 34, X, X) \ + ldd [%o0 + 0x130], %f0; \ + ldd [%o0 + 0x138], %f2; \ + DES_ROUND(36, 38, X, X) \ + ldd [%o0 + 0x140], %f4; \ + ldd [%o0 + 0x148], %f6; \ + DES_ROUND(40, 42, X, X) \ + ldd [%o0 + 0x150], %f8; \ + ldd [%o0 + 0x158], %f10; \ + DES_ROUND(44, 46, X, X) \ + ldd [%o0 + 0x160], %f12; \ + ldd [%o0 + 0x168], %f14; \ + DES_ROUND(48, 50, X, X) \ + DES_ROUND(52, 54, X, X) \ + DES_ROUND(56, 58, X, X) \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0x170], %f16; \ + ldd [%o0 + 0x178], %f18; \ + DES_IIP(X, X) \ + DES_IP(X, X) \ + DES_ROUND(20, 22, X, X) \ + ldd [%o0 + 0x50], %f20; \ + ldd [%o0 + 0x58], %f22; \ + DES_ROUND(24, 26, X, X) \ + ldd [%o0 + 0x60], %f24; \ + ldd [%o0 + 0x68], %f26; \ + DES_ROUND(28, 30, X, X) \ + ldd [%o0 + 0x70], %f28; \ + ldd [%o0 + 0x78], %f30; \ + DES_ROUND(0, 2, X, X) \ + ldd [%o0 + 0x00], %f0; \ + ldd [%o0 + 0x08], %f2; \ + DES_ROUND(4, 6, X, X) \ + ldd [%o0 + 0x10], %f4; \ + ldd [%o0 + 0x18], %f6; \ + DES_ROUND(8, 10, X, X) \ + ldd [%o0 + 0x20], %f8; \ + ldd [%o0 + 0x28], %f10; \ + DES_ROUND(12, 14, X, X) \ + ldd [%o0 + 0x30], %f12; \ + ldd [%o0 + 0x38], %f14; \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0x40], %f16; \ + ldd [%o0 + 0x48], %f18; \ + DES_IIP(X, X) + + .align 32 +ENTRY(des3_ede_sparc64_ecb_crypt) + /* %o0=key, %o1=input, %o2=output, %o3=len */ +1: ldd [%o1 + 0x00], %f60 + DES3_LOOP_BODY(60) + std %f60, [%o2 + 0x00] + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + nop +ENDPROC(des3_ede_sparc64_ecb_crypt) + + .align 32 +ENTRY(des3_ede_sparc64_cbc_encrypt) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f60 +1: ldd [%o1 + 0x00], %f62 + fxor %f60, %f62, %f60 + DES3_LOOP_BODY(60) + std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + std %f60, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_encrypt) + + .align 32 +ENTRY(des3_ede_sparc64_cbc_decrypt) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f62 +1: ldx [%o1 + 0x00], %g1 + MOVXTOD_G1_F60 + DES3_LOOP_BODY(60) + fxor %f62, %f60, %f60 + MOVXTOD_G1_F62 + std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + stx %g1, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_decrypt) diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c new file mode 100644 index 000000000000..c4940c2d3073 --- /dev/null +++ b/arch/sparc/crypto/des_glue.c @@ -0,0 +1,529 @@ +/* Glue code for DES encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/des.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +struct des_sparc64_ctx { + u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; + u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; +}; + +struct des3_ede_sparc64_ctx { + u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; + u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; +}; + +static void encrypt_to_decrypt(u64 *d, const u64 *e) +{ + const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1; + int i; + + for (i = 0; i < DES_EXPKEY_WORDS / 2; i++) + *d++ = *s--; +} + +extern void des_sparc64_key_expand(const u32 *input_key, u64 *key); + +static int des_set_key(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + u32 tmp[DES_EXPKEY_WORDS]; + int ret; + + /* Even though we have special instructions for key expansion, + * we call des_ekey() so that we don't have to write our own + * weak key detection code. + */ + ret = des_ekey(tmp, key); + if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); + encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); + + return 0; +} + +extern void des_sparc64_crypt(const u64 *key, const u64 *input, + u64 *output); + +static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->encrypt_expkey; + + des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->decrypt_expkey; + + des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des_sparc64_load_keys(const u64 *key); + +extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, + unsigned int len); + +#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + else + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, false); +} + +extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); + const u32 *K = (const u32 *)key; + u32 *flags = &tfm->crt_flags; + u64 k1[DES_EXPKEY_WORDS / 2]; + u64 k2[DES_EXPKEY_WORDS / 2]; + u64 k3[DES_EXPKEY_WORDS / 2]; + + if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || + !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + des_sparc64_key_expand((const u32 *)key, k1); + key += DES_KEY_SIZE; + des_sparc64_key_expand((const u32 *)key, k2); + key += DES_KEY_SIZE; + des_sparc64_key_expand((const u32 *)key, k3); + + memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1)); + encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]); + memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], + &k3[0], sizeof(k3)); + + encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]); + memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2], + &k2[0], sizeof(k2)); + encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], + &k1[0]); + + return 0; +} + +extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, + u64 *output); + +static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->encrypt_expkey; + + des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->decrypt_expkey; + + des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des3_ede_sparc64_load_keys(const u64 *key); + +extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len); + +static int __ecb3_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + K = &ctx->encrypt_expkey[0]; + else + K = &ctx->decrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_ecb_crypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb3_crypt(desc, dst, src, nbytes, true); +} + +static int ecb3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb3_crypt(desc, dst, src, nbytes, false); +} + +extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +static int cbc3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + K = &ctx->encrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_cbc_encrypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len, + (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +static int cbc3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + K = &ctx->decrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_cbc_decrypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len, + (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "des", + .cra_driver_name = "des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES_KEY_SIZE, + .cia_max_keysize = DES_KEY_SIZE, + .cia_setkey = des_set_key, + .cia_encrypt = des_encrypt, + .cia_decrypt = des_decrypt + } + } +}, { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_set_key, + .cia_encrypt = des3_ede_encrypt, + .cia_decrypt = des3_ede_decrypt + } + } +}, { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key, + .encrypt = ecb3_encrypt, + .decrypt = ecb3_decrypt, + }, + }, +}, { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key, + .encrypt = cbc3_encrypt, + .decrypt = cbc3_decrypt, + }, + }, +} }; + +static bool __init sparc64_has_des_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_DES)) + return false; + + return true; +} + +static int __init des_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_des_opcode()) { + pr_info("Using sparc64 des opcodes optimized DES implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 des opcodes not available.\n"); + return -ENODEV; +} + +static void __exit des_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(des_sparc64_mod_init); +module_exit(des_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); + +MODULE_ALIAS("des"); diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S new file mode 100644 index 000000000000..3150404e602e --- /dev/null +++ b/arch/sparc/crypto/md5_asm.S @@ -0,0 +1,70 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(md5_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x08], %f2 + bne,pn %xcc, 10f + ld [%o0 + 0x0c], %f3 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + MD5 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + MD5 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(md5_sparc64_transform) diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c new file mode 100644 index 000000000000..603d723038ce --- /dev/null +++ b/arch/sparc/crypto/md5_glue.c @@ -0,0 +1,188 @@ +/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * and crypto/md5.c which are: + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/md5.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int md5_sparc64_init(struct shash_desc *desc) +{ + struct md5_state *mctx = shash_desc_ctx(desc); + + mctx->hash[0] = cpu_to_le32(0x67452301); + mctx->hash[1] = cpu_to_le32(0xefcdab89); + mctx->hash[2] = cpu_to_le32(0x98badcfe); + mctx->hash[3] = cpu_to_le32(0x10325476); + mctx->byte_count = 0; + + return 0; +} + +static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->byte_count += len; + if (partial) { + done = MD5_HMAC_BLOCK_SIZE - partial; + memcpy((u8 *)sctx->block + partial, data, done); + md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1); + } + if (len - done >= MD5_HMAC_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE; + + md5_sparc64_transform(sctx->hash, data + done, rounds); + done += rounds * MD5_HMAC_BLOCK_SIZE; + } + + memcpy(sctx->block, data + done, len - done); +} + +static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < MD5_HMAC_BLOCK_SIZE) { + sctx->byte_count += len; + memcpy((u8 *)sctx->block + partial, data, len); + } else + __md5_sparc64_update(sctx, data, len, partial); + + return 0; +} + +/* Add padding and return the message digest. */ +static int md5_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + u32 *dst = (u32 *)out; + __le64 bits; + static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_le64(sctx->byte_count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __md5_sparc64_update() */ + if (padlen <= 56) { + sctx->byte_count += padlen; + memcpy((u8 *)sctx->block + index, padding, padlen); + } else { + __md5_sparc64_update(sctx, padding, padlen, index); + } + __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < MD5_HASH_WORDS; i++) + dst[i] = sctx->hash[i]; + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int md5_sparc64_export(struct shash_desc *desc, void *out) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int md5_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = MD5_DIGEST_SIZE, + .init = md5_sparc64_init, + .update = md5_sparc64_update, + .final = md5_sparc64_final, + .export = md5_sparc64_export, + .import = md5_sparc64_import, + .descsize = sizeof(struct md5_state), + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "md5", + .cra_driver_name= "md5-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_md5_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_MD5)) + return false; + + return true; +} + +static int __init md5_sparc64_mod_init(void) +{ + if (sparc64_has_md5_opcode()) { + pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 md5 opcode not available.\n"); + return -ENODEV; +} + +static void __exit md5_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(md5_sparc64_mod_init); +module_exit(md5_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); + +MODULE_ALIAS("md5"); diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h new file mode 100644 index 000000000000..19cbaea6976f --- /dev/null +++ b/arch/sparc/crypto/opcodes.h @@ -0,0 +1,99 @@ +#ifndef _OPCODES_H +#define _OPCODES_H + +#define SPARC_CR_OPCODE_PRIORITY 300 + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RS3(x) (FPD_ENCODE(x) << 9) +#define RD(x) (FPD_ENCODE(x) << 25) +#define IMM5_0(x) ((x) << 0) +#define IMM5_9(x) ((x) << 9) + +#define CRC32C(a,b,c) \ + .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); + +#define MD5 \ + .word 0x81b02800; +#define SHA1 \ + .word 0x81b02820; +#define SHA256 \ + .word 0x81b02840; +#define SHA512 \ + .word 0x81b02860; + +#define AES_EROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_KEXPAND1(a,b,c,d) \ + .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d)); +#define AES_KEXPAND0(a,b,c) \ + .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); +#define AES_KEXPAND2(a,b,c) \ + .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); + +#define DES_IP(a,b) \ + .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); +#define DES_IIP(a,b) \ + .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); +#define DES_KEXPAND(a,b,c) \ + .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); +#define DES_ROUND(a,b,c,d) \ + .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); + +#define CAMELLIA_F(a,b,c,d) \ + .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define CAMELLIA_FL(a,b,c) \ + .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_FLI(a,b,c) \ + .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); + +#define MOVDTOX_F0_O4 \ + .word 0x99b02200 +#define MOVDTOX_F2_O5 \ + .word 0x9bb02202 +#define MOVXTOD_G1_F60 \ + .word 0xbbb02301 +#define MOVXTOD_G1_F62 \ + .word 0xbfb02301 +#define MOVXTOD_G3_F4 \ + .word 0x89b02303; +#define MOVXTOD_G7_F6 \ + .word 0x8db02307; +#define MOVXTOD_G3_F0 \ + .word 0x81b02303; +#define MOVXTOD_G7_F2 \ + .word 0x85b02307; +#define MOVXTOD_O0_F0 \ + .word 0x81b02308; +#define MOVXTOD_O5_F0 \ + .word 0x81b0230d; +#define MOVXTOD_O5_F2 \ + .word 0x85b0230d; +#define MOVXTOD_O5_F4 \ + .word 0x89b0230d; +#define MOVXTOD_O5_F6 \ + .word 0x8db0230d; +#define MOVXTOD_G3_F60 \ + .word 0xbbb02303; +#define MOVXTOD_G7_F62 \ + .word 0xbfb02307; + +#endif /* _OPCODES_H */ diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S new file mode 100644 index 000000000000..219d10c5ae0e --- /dev/null +++ b/arch/sparc/crypto/sha1_asm.S @@ -0,0 +1,72 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha1_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x0c], %f3 + bne,pn %xcc, 10f + ld [%o0 + 0x10], %f4 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + SHA1 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + SHA1 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha1_sparc64_transform) diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c new file mode 100644 index 000000000000..2bbb20bee9f1 --- /dev/null +++ b/arch/sparc/crypto/sha1_glue.c @@ -0,0 +1,183 @@ +/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_sparc64_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_sparc64_transform(sctx->state, sctx->buffer, 1); + } + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + + sha1_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); +} + +static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + } else + __sha1_sparc64_update(sctx, data, len, partial); + + return 0; +} + +/* Add padding and return the message digest. */ +static int sha1_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __sha1_sparc64_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_sparc64_update(sctx, padding, padlen, index); + } + __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha1_sparc64_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_sparc64_init, + .update = sha1_sparc64_update, + .final = sha1_sparc64_final, + .export = sha1_sparc64_export, + .import = sha1_sparc64_import, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha1_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA1)) + return false; + + return true; +} + +static int __init sha1_sparc64_mod_init(void) +{ + if (sparc64_has_sha1_opcode()) { + pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 sha1 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha1_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_sparc64_mod_init); +module_exit(sha1_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); + +MODULE_ALIAS("sha1"); diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S new file mode 100644 index 000000000000..b5f3d5826eb4 --- /dev/null +++ b/arch/sparc/crypto/sha256_asm.S @@ -0,0 +1,78 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha256_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x18], %f6 + bne,pn %xcc, 10f + ld [%o0 + 0x1c], %f7 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + SHA256 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + st %f5, [%o0 + 0x14] + st %f6, [%o0 + 0x18] + st %f7, [%o0 + 0x1c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + SHA256 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha256_sparc64_transform) diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c new file mode 100644 index 000000000000..591e656bd891 --- /dev/null +++ b/arch/sparc/crypto/sha256_glue.c @@ -0,0 +1,241 @@ +/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha256_generic.c + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha224_sparc64_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +static int sha256_sparc64_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_sparc64_transform(sctx->state, sctx->buf, 1); + } + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); +} + +static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + } else + __sha256_sparc64_update(sctx, data, len, partial); + + return 0; +} + +static int sha256_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __sha256_sparc64_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_sparc64_update(sctx, padding, padlen, index); + } + __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_sparc64_final(desc, D); + + memcpy(hash, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); + + return 0; +} + +static int sha256_sparc64_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + +static int sha256_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + +static struct shash_alg sha256 = { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_sparc64_init, + .update = sha256_sparc64_update, + .final = sha256_sparc64_final, + .export = sha256_sparc64_export, + .import = sha256_sparc64_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name= "sha256-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha224 = { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_sparc64_init, + .update = sha256_sparc64_update, + .final = sha224_sparc64_final, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name= "sha224-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha256_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA256)) + return false; + + return true; +} + +static int __init sha256_sparc64_mod_init(void) +{ + if (sparc64_has_sha256_opcode()) { + int ret = crypto_register_shash(&sha224); + if (ret < 0) + return ret; + + ret = crypto_register_shash(&sha256); + if (ret < 0) { + crypto_unregister_shash(&sha224); + return ret; + } + + pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n"); + return 0; + } + pr_info("sparc64 sha256 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha256_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&sha224); + crypto_unregister_shash(&sha256); +} + +module_init(sha256_sparc64_mod_init); +module_exit(sha256_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); + +MODULE_ALIAS("sha224"); +MODULE_ALIAS("sha256"); diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S new file mode 100644 index 000000000000..54bfba713c0e --- /dev/null +++ b/arch/sparc/crypto/sha512_asm.S @@ -0,0 +1,102 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha512_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o0 + 0x30], %f12 + bne,pn %xcc, 10f + ldd [%o0 + 0x38], %f14 + +1: + ldd [%o1 + 0x00], %f16 + ldd [%o1 + 0x08], %f18 + ldd [%o1 + 0x10], %f20 + ldd [%o1 + 0x18], %f22 + ldd [%o1 + 0x20], %f24 + ldd [%o1 + 0x28], %f26 + ldd [%o1 + 0x30], %f28 + ldd [%o1 + 0x38], %f30 + ldd [%o1 + 0x40], %f32 + ldd [%o1 + 0x48], %f34 + ldd [%o1 + 0x50], %f36 + ldd [%o1 + 0x58], %f38 + ldd [%o1 + 0x60], %f40 + ldd [%o1 + 0x68], %f42 + ldd [%o1 + 0x70], %f44 + ldd [%o1 + 0x78], %f46 + + SHA512 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x80, %o1 + +5: + std %f0, [%o0 + 0x00] + std %f2, [%o0 + 0x08] + std %f4, [%o0 + 0x10] + std %f6, [%o0 + 0x18] + std %f8, [%o0 + 0x20] + std %f10, [%o0 + 0x28] + std %f12, [%o0 + 0x30] + std %f14, [%o0 + 0x38] + retl + VISExit +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f18 +1: + ldd [%o1 + 0x08], %f20 + ldd [%o1 + 0x10], %f22 + ldd [%o1 + 0x18], %f24 + ldd [%o1 + 0x20], %f26 + ldd [%o1 + 0x28], %f28 + ldd [%o1 + 0x30], %f30 + ldd [%o1 + 0x38], %f32 + ldd [%o1 + 0x40], %f34 + ldd [%o1 + 0x48], %f36 + ldd [%o1 + 0x50], %f38 + ldd [%o1 + 0x58], %f40 + ldd [%o1 + 0x60], %f42 + ldd [%o1 + 0x68], %f44 + ldd [%o1 + 0x70], %f46 + ldd [%o1 + 0x78], %f48 + ldd [%o1 + 0x80], %f50 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + SHA512 + + subcc %o2, 1, %o2 + fsrc2 %f50, %f18 + bne,pt %xcc, 1b + add %o1, 0x80, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha512_sparc64_transform) diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c new file mode 100644 index 000000000000..486f0a2b7001 --- /dev/null +++ b/arch/sparc/crypto/sha512_glue.c @@ -0,0 +1,226 @@ +/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha512_generic.c + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, + unsigned int rounds); + +static int sha512_sparc64_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA512_H0; + sctx->state[1] = SHA512_H1; + sctx->state[2] = SHA512_H2; + sctx->state[3] = SHA512_H3; + sctx->state[4] = SHA512_H4; + sctx->state[5] = SHA512_H5; + sctx->state[6] = SHA512_H6; + sctx->state[7] = SHA512_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_sparc64_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + if (partial) { + done = SHA512_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha512_sparc64_transform(sctx->state, sctx->buf, 1); + } + if (len - done >= SHA512_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; + + sha512_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA512_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); +} + +static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA512_BLOCK_SIZE) { + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + memcpy(sctx->buf + partial, data, len); + } else + __sha512_sparc64_update(sctx, data, len, partial); + + return 0; +} + +static int sha512_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be64 *dst = (__be64 *)out; + __be64 bits[2]; + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + /* Save number of bits */ + bits[1] = cpu_to_be64(sctx->count[0] << 3); + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + + /* Pad out to 112 mod 128 and append length */ + index = sctx->count[0] % SHA512_BLOCK_SIZE; + padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index); + + /* We need to fill a whole block for __sha512_sparc64_update() */ + if (padlen <= 112) { + if ((sctx->count[0] += padlen) < padlen) + sctx->count[1]++; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha512_sparc64_update(sctx, padding, padlen, index); + } + __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be64(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[64]; + + sha512_sparc64_final(desc, D); + + memcpy(hash, D, 48); + memset(D, 0, 64); + + return 0; +} + +static struct shash_alg sha512 = { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_sparc64_init, + .update = sha512_sparc64_update, + .final = sha512_sparc64_final, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name= "sha512-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha384 = { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_sparc64_init, + .update = sha512_sparc64_update, + .final = sha384_sparc64_final, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name= "sha384-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha512_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA512)) + return false; + + return true; +} + +static int __init sha512_sparc64_mod_init(void) +{ + if (sparc64_has_sha512_opcode()) { + int ret = crypto_register_shash(&sha384); + if (ret < 0) + return ret; + + ret = crypto_register_shash(&sha512); + if (ret < 0) { + crypto_unregister_shash(&sha384); + return ret; + } + + pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n"); + return 0; + } + pr_info("sparc64 sha512 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha512_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&sha384); + crypto_unregister_shash(&sha512); +} + +module_init(sha512_sparc64_mod_init); +module_exit(sha512_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); + +MODULE_ALIAS("sha384"); +MODULE_ALIAS("sha512"); diff --git a/arch/sparc/include/asm/asi.h b/arch/sparc/include/asm/asi.h index 61ebe7411ceb..cc0006dc5d4a 100644 --- a/arch/sparc/include/asm/asi.h +++ b/arch/sparc/include/asm/asi.h @@ -141,7 +141,8 @@ /* SpitFire and later extended ASIs. The "(III)" marker designates * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific - * ASIs, "(4V)" designates SUN4V specific ASIs. + * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4 + * and later ASIs. */ #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ @@ -243,6 +244,7 @@ #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ +#define ASI_PIC 0xb0 /* (NG4) PIC registers */ #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ #define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */ diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 7df8b7f544d4..370ca1e71ffb 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -86,6 +86,15 @@ #define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ #define AV_SPARC_ASI_CACHE_SPARING \ 0x00800000 /* cache sparing ASIs available */ +#define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */ +#define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */ + +/* Solaris decided to enumerate every single crypto instruction type + * in the AT_HWCAP bits. This is wasteful, since if crypto is present, + * you still need to look in the CFR register to see if the opcode is + * really available. So we simply advertise only "crypto" support. + */ +#define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */ #define CORE_DUMP_USE_REGSET diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 015a761eaa32..ca121f0fa3ec 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra, unsigned long len); #endif +#define HV_FAST_VT_GET_PERFREG 0x184 +#define HV_FAST_VT_SET_PERFREG 0x185 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra, #define HV_GRP_NIU 0x0204 #define HV_GRP_VF_CPU 0x0205 #define HV_GRP_KT_CPU 0x0209 +#define HV_GRP_VT_CPU 0x020c #define HV_GRP_DIAG 0x0300 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h index 9faa046713fb..139097f3a67b 100644 --- a/arch/sparc/include/asm/mdesc.h +++ b/arch/sparc/include/asm/mdesc.h @@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client); extern void mdesc_fill_in_cpu_data(cpumask_t *mask); extern void mdesc_populate_present_mask(cpumask_t *mask); +extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask); extern void sun4v_mdesc_init(void); diff --git a/arch/sparc/include/asm/oplib_32.h b/arch/sparc/include/asm/oplib_32.h index 27517879a6c2..c72f3045820c 100644 --- a/arch/sparc/include/asm/oplib_32.h +++ b/arch/sparc/include/asm/oplib_32.h @@ -94,7 +94,7 @@ extern int prom_getprev(void); extern void prom_console_write_buf(const char *buf, int len); /* Prom's internal routines, don't use in kernel/boot code. */ -extern void prom_printf(const char *fmt, ...); +extern __printf(1, 2) void prom_printf(const char *fmt, ...); extern void prom_write(const char *buf, unsigned int len); /* Multiprocessor operations... */ diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h index 97a90475c314..a12dbe3b7762 100644 --- a/arch/sparc/include/asm/oplib_64.h +++ b/arch/sparc/include/asm/oplib_64.h @@ -98,7 +98,7 @@ extern unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size); extern void prom_console_write_buf(const char *buf, int len); /* Prom's internal routines, don't use in kernel/boot code. */ -extern void prom_printf(const char *fmt, ...); +extern __printf(1, 2) void prom_printf(const char *fmt, ...); extern void prom_write(const char *buf, unsigned int len); /* Multiprocessor operations... */ diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h index 288d7beba051..942bb17f60cd 100644 --- a/arch/sparc/include/asm/pcr.h +++ b/arch/sparc/include/asm/pcr.h @@ -2,8 +2,13 @@ #define __PCR_H struct pcr_ops { - u64 (*read)(void); - void (*write)(u64); + u64 (*read_pcr)(unsigned long); + void (*write_pcr)(unsigned long, u64); + u64 (*read_pic)(unsigned long); + void (*write_pic)(unsigned long, u64); + u64 (*nmi_picl_value)(unsigned int nmi_hz); + u64 pcr_nmi_enable; + u64 pcr_nmi_disable; }; extern const struct pcr_ops *pcr_ops; @@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void); #define PCR_N2_SL1_SHIFT 27 #define PCR_N2_OV1 0x80000000 -extern unsigned int picl_shift; - -/* In order to commonize as much of the implementation as - * possible, we use PICH as our counter. Mostly this is - * to accommodate Niagara-1 which can only count insn cycles - * in PICH. - */ -static inline u64 picl_value(unsigned int nmi_hz) -{ - u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift); - - return ((u64)((0 - delta) & 0xffffffff)) << 32; -} - -extern u64 pcr_enable; +#define PCR_N4_OV 0x00000001 /* PIC overflow */ +#define PCR_N4_TOE 0x00000002 /* Trap On Event */ +#define PCR_N4_UTRACE 0x00000004 /* Trace user events */ +#define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */ +#define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */ +#define PCR_N4_MASK 0x000007e0 /* Event mask */ +#define PCR_N4_MASK_SHIFT 5 +#define PCR_N4_SL 0x0000f800 /* Event Select */ +#define PCR_N4_SL_SHIFT 11 +#define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */ +#define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */ +#define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */ extern int pcr_arch_init(void); diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h index 3332d2cba6c1..214feefa577c 100644 --- a/arch/sparc/include/asm/perfctr.h +++ b/arch/sparc/include/asm/perfctr.h @@ -54,11 +54,6 @@ enum perfctr_opcode { PERFCTR_GETPCR }; -/* I don't want the kernel's namespace to be polluted with this - * stuff when this file is included. --DaveM - */ -#ifndef __KERNEL__ - #define PRIV 0x00000001 #define SYS 0x00000002 #define USR 0x00000004 @@ -168,29 +163,4 @@ struct vcounter_struct { unsigned long long vcnt1; }; -#else /* !(__KERNEL__) */ - -#ifndef CONFIG_SPARC32 - -/* Performance counter register access. */ -#define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p)) -#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p)) -#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p)) - -/* Blackbird errata workaround. See commentary in - * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() - * for more information. - */ -#define write_pic(__p) \ - __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \ - " nop\n\t" \ - ".align 64\n" \ - "99:wr %0, 0x0, %%pic\n\t" \ - "rd %%pic, %%g0" : : "r" (__p)) -#define reset_pic() write_pic(0) - -#endif /* !CONFIG_SPARC32 */ - -#endif /* !(__KERNEL__) */ - #endif /* !(PERF_COUNTER_API) */ diff --git a/arch/sparc/include/asm/pstate.h b/arch/sparc/include/asm/pstate.h index a26a53777bb0..4b6b998afd99 100644 --- a/arch/sparc/include/asm/pstate.h +++ b/arch/sparc/include/asm/pstate.h @@ -88,4 +88,18 @@ #define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */ #define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/ +/* Compatability Feature Register (%asr26), SPARC-T4 and later */ +#define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */ +#define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */ +#define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */ +#define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/ +#define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */ +#define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */ +#define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */ +#define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */ +#define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */ +#define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */ +#define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */ +#define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */ + #endif /* !(_SPARC64_PSTATE_H) */ diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index fb2693464807..d9a677c51926 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -447,6 +447,7 @@ #else #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND +#define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif /* diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index b42ddbf9651e..ee5dcced2499 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -559,10 +559,10 @@ niagara_tlb_fixup: be,pt %xcc, niagara2_patch nop cmp %g1, SUN4V_CHIP_NIAGARA4 - be,pt %xcc, niagara2_patch + be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_NIAGARA5 - be,pt %xcc, niagara2_patch + be,pt %xcc, niagara4_patch nop call generic_patch_copyops @@ -573,6 +573,16 @@ niagara_tlb_fixup: nop ba,a,pt %xcc, 80f +niagara4_patch: + call niagara4_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara4_patch_pageops + nop + + ba,a,pt %xcc, 80f + niagara2_patch: call niagara2_patch_copyops nop diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 8593672838fd..c0a2de0fd624 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -45,6 +45,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_NIU, }, { .group = HV_GRP_VF_CPU, }, { .group = HV_GRP_KT_CPU, }, + { .group = HV_GRP_VT_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; @@ -193,7 +194,7 @@ void __init sun4v_hvapi_init(void) bad: prom_printf("HVAPI: Cannot register API group " - "%lx with major(%u) minor(%u)\n", + "%lx with major(%lu) minor(%lu)\n", group, major, minor); prom_halt(); } diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 58d60de4d65b..f3ab509b76a8 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set) retl nop ENDPROC(sun4v_reboot_data_set) + +ENTRY(sun4v_vt_get_perfreg) + mov %o1, %o4 + mov HV_FAST_VT_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_vt_get_perfreg) + +ENTRY(sun4v_vt_set_perfreg) + mov HV_FAST_VT_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_vt_set_perfreg) diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 79f310364849..0746e5e32b37 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -188,31 +188,26 @@ valid_addr_bitmap_patch: be,pn %xcc, kvmap_dtlb_longpath 2: sethi %hi(kpte_linear_bitmap), %g2 - or %g2, %lo(kpte_linear_bitmap), %g2 /* Get the 256MB physical address index. */ sllx %g4, 21, %g5 - mov 1, %g7 + or %g2, %lo(kpte_linear_bitmap), %g2 srlx %g5, 21 + 28, %g5 + and %g5, (32 - 1), %g7 - /* Don't try this at home kids... this depends upon srlx - * only taking the low 6 bits of the shift count in %g5. - */ - sllx %g7, %g5, %g7 - - /* Divide by 64 to get the offset into the bitmask. */ - srlx %g5, 6, %g5 + /* Divide by 32 to get the offset into the bitmask. */ + srlx %g5, 5, %g5 + add %g7, %g7, %g7 sllx %g5, 3, %g5 - /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ + /* kern_linear_pte_xor[(mask >> shift) & 3)] */ ldx [%g2 + %g5], %g2 - andcc %g2, %g7, %g0 + srlx %g2, %g7, %g7 sethi %hi(kern_linear_pte_xor), %g5 + and %g7, 3, %g7 or %g5, %lo(kern_linear_pte_xor), %g5 - bne,a,pt %xcc, 1f - add %g5, 8, %g5 - -1: ldx [%g5], %g2 + sllx %g7, 3, %g7 + ldx [%g5 + %g7], %g2 .globl kvmap_linear_patch kvmap_linear_patch: diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 6dc796280589..831c001604e8 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask) mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); } +static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) +{ + const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL); + unsigned long *pgsz_mask = arg; + u64 val; + + val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | + HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); + if (pgsz_prop) + val = *pgsz_prop; + + if (!*pgsz_mask) + *pgsz_mask = val; + else + *pgsz_mask &= val; + return NULL; +} + +void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask) +{ + *pgsz_mask = 0; + mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask); +} + static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) { const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index eb1c1f010a47..6479256fd5a4 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -22,7 +22,6 @@ #include <asm/perf_event.h> #include <asm/ptrace.h> #include <asm/pcr.h> -#include <asm/perfctr.h> #include "kstack.h" @@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) touched = 1; else - pcr_ops->write(PCR_PIC_PRIV); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); sum = local_cpu_data().irq0_irqs; if (__get_cpu_var(nmi_touch)) { @@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) __this_cpu_write(alert_counter, 0); } if (__get_cpu_var(wd_enabled)) { - write_pic(picl_value(nmi_hz)); - pcr_ops->write(pcr_enable); + pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); } restore_hardirq_stack(orig_sp); @@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { - pcr_ops->write(PCR_PIC_PRIV); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); __get_cpu_var(wd_enabled) = 0; atomic_dec(&nmi_active); } @@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused) __get_cpu_var(wd_enabled) = 1; atomic_inc(&nmi_active); - pcr_ops->write(PCR_PIC_PRIV); - write_pic(picl_value(nmi_hz)); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); + pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); - pcr_ops->write(pcr_enable); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); } static void nmi_adjust_hz_one(void *unused) @@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused) if (!__get_cpu_var(wd_enabled)) return; - pcr_ops->write(PCR_PIC_PRIV); - write_pic(picl_value(nmi_hz)); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); + pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); - pcr_ops->write(pcr_enable); + pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); } void nmi_adjust_hz(unsigned int new_hz) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 7661e84a05a0..051b69caeffd 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm) printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", vdma[0], vdma[1]); return -EINVAL; - }; + } dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); num_tsb_entries = vdma[1] / IO_PAGE_SIZE; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 0ce0dd2332aa..269af58497aa 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -13,23 +13,14 @@ #include <asm/pil.h> #include <asm/pcr.h> #include <asm/nmi.h> +#include <asm/asi.h> #include <asm/spitfire.h> -#include <asm/perfctr.h> /* This code is shared between various users of the performance * counters. Users will be oprofile, pseudo-NMI watchdog, and the * perf_event support layer. */ -#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) -#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ - PCR_N2_TOE_OV1 | \ - (2 << PCR_N2_SL1_SHIFT) | \ - (0xff << PCR_N2_MASK1_SHIFT)) - -u64 pcr_enable; -unsigned int picl_shift; - /* Performance counter interrupts run unmasked at PIL level 15. * Therefore we can't do things like wakeups and other work * that expects IRQ disabling to be adhered to in locking etc. @@ -60,39 +51,144 @@ void arch_irq_work_raise(void) const struct pcr_ops *pcr_ops; EXPORT_SYMBOL_GPL(pcr_ops); -static u64 direct_pcr_read(void) +static u64 direct_pcr_read(unsigned long reg_num) { u64 val; - read_pcr(val); + WARN_ON_ONCE(reg_num != 0); + __asm__ __volatile__("rd %%pcr, %0" : "=r" (val)); return val; } -static void direct_pcr_write(u64 val) +static void direct_pcr_write(unsigned long reg_num, u64 val) +{ + WARN_ON_ONCE(reg_num != 0); + __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val)); +} + +static u64 direct_pic_read(unsigned long reg_num) { - write_pcr(val); + u64 val; + + WARN_ON_ONCE(reg_num != 0); + __asm__ __volatile__("rd %%pic, %0" : "=r" (val)); + return val; +} + +static void direct_pic_write(unsigned long reg_num, u64 val) +{ + WARN_ON_ONCE(reg_num != 0); + + /* Blackbird errata workaround. See commentary in + * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() + * for more information. + */ + __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" + " nop\n\t" + ".align 64\n" + "99:wr %0, 0x0, %%pic\n\t" + "rd %%pic, %%g0" : : "r" (val)); +} + +static u64 direct_picl_value(unsigned int nmi_hz) +{ + u32 delta = local_cpu_data().clock_tick / nmi_hz; + + return ((u64)((0 - delta) & 0xffffffff)) << 32; } static const struct pcr_ops direct_pcr_ops = { - .read = direct_pcr_read, - .write = direct_pcr_write, + .read_pcr = direct_pcr_read, + .write_pcr = direct_pcr_write, + .read_pic = direct_pic_read, + .write_pic = direct_pic_write, + .nmi_picl_value = direct_picl_value, + .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE), + .pcr_nmi_disable = PCR_PIC_PRIV, }; -static void n2_pcr_write(u64 val) +static void n2_pcr_write(unsigned long reg_num, u64 val) { unsigned long ret; + WARN_ON_ONCE(reg_num != 0); if (val & PCR_N2_HTRACE) { ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); if (ret != HV_EOK) - write_pcr(val); + direct_pcr_write(reg_num, val); } else - write_pcr(val); + direct_pcr_write(reg_num, val); +} + +static u64 n2_picl_value(unsigned int nmi_hz) +{ + u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); + + return ((u64)((0 - delta) & 0xffffffff)) << 32; } static const struct pcr_ops n2_pcr_ops = { - .read = direct_pcr_read, - .write = n2_pcr_write, + .read_pcr = direct_pcr_read, + .write_pcr = n2_pcr_write, + .read_pic = direct_pic_read, + .write_pic = direct_pic_write, + .nmi_picl_value = n2_picl_value, + .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | + PCR_N2_TOE_OV1 | + (2 << PCR_N2_SL1_SHIFT) | + (0xff << PCR_N2_MASK1_SHIFT)), + .pcr_nmi_disable = PCR_PIC_PRIV, +}; + +static u64 n4_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_vt_get_perfreg(reg_num, &val); + + return val; +} + +static void n4_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_vt_set_perfreg(reg_num, val); +} + +static u64 n4_pic_read(unsigned long reg_num) +{ + unsigned long val; + + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=r" (val) + : "r" (reg_num * 0x8UL), "i" (ASI_PIC)); + + return val; +} + +static void n4_pic_write(unsigned long reg_num, u64 val) +{ + __asm__ __volatile__("stxa %0, [%1] %2" + : /* no outputs */ + : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC)); +} + +static u64 n4_picl_value(unsigned int nmi_hz) +{ + u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); + + return ((u64)((0 - delta) & 0xffffffff)); +} + +static const struct pcr_ops n4_pcr_ops = { + .read_pcr = n4_pcr_read, + .write_pcr = n4_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, }; static unsigned long perf_hsvc_group; @@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_KT_CPU; break; + case SUN4V_CHIP_NIAGARA4: + perf_hsvc_group = HV_GRP_VT_CPU; + break; + default: return -ENODEV; } @@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void) sun4v_hvapi_unregister(perf_hsvc_group); } +static int __init setup_sun4v_pcr_ops(void) +{ + int ret = 0; + + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + case SUN4V_CHIP_NIAGARA3: + pcr_ops = &n2_pcr_ops; + break; + + case SUN4V_CHIP_NIAGARA4: + pcr_ops = &n4_pcr_ops; + break; + + default: + ret = -ENODEV; + break; + } + + return ret; +} + int __init pcr_arch_init(void) { int err = register_perf_hsvc(); @@ -148,15 +271,14 @@ int __init pcr_arch_init(void) switch (tlb_type) { case hypervisor: - pcr_ops = &n2_pcr_ops; - pcr_enable = PCR_N2_ENABLE; - picl_shift = 2; + err = setup_sun4v_pcr_ops(); + if (err) + goto out_unregister; break; case cheetah: case cheetah_plus: pcr_ops = &direct_pcr_ops; - pcr_enable = PCR_SUN4U_ENABLE; break; case spitfire: diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5713957dcb8a..e48651dace1b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -25,36 +25,48 @@ #include <linux/atomic.h> #include <asm/nmi.h> #include <asm/pcr.h> -#include <asm/perfctr.h> #include <asm/cacheflush.h> #include "kernel.h" #include "kstack.h" -/* Sparc64 chips have two performance counters, 32-bits each, with - * overflow interrupts generated on transition from 0xffffffff to 0. - * The counters are accessed in one go using a 64-bit register. +/* Two classes of sparc64 chips currently exist. All of which have + * 32-bit counters which can generate overflow interrupts on the + * transition from 0xffffffff to 0. * - * Both counters are controlled using a single control register. The - * only way to stop all sampling is to clear all of the context (user, - * supervisor, hypervisor) sampling enable bits. But these bits apply - * to both counters, thus the two counters can't be enabled/disabled - * individually. + * All chips upto and including SPARC-T3 have two performance + * counters. The two 32-bit counters are accessed in one go using a + * single 64-bit register. * - * The control register has two event fields, one for each of the two - * counters. It's thus nearly impossible to have one counter going - * while keeping the other one stopped. Therefore it is possible to - * get overflow interrupts for counters not currently "in use" and - * that condition must be checked in the overflow interrupt handler. + * On these older chips both counters are controlled using a single + * control register. The only way to stop all sampling is to clear + * all of the context (user, supervisor, hypervisor) sampling enable + * bits. But these bits apply to both counters, thus the two counters + * can't be enabled/disabled individually. + * + * Furthermore, the control register on these older chips have two + * event fields, one for each of the two counters. It's thus nearly + * impossible to have one counter going while keeping the other one + * stopped. Therefore it is possible to get overflow interrupts for + * counters not currently "in use" and that condition must be checked + * in the overflow interrupt handler. * * So we use a hack, in that we program inactive counters with the * "sw_count0" and "sw_count1" events. These count how many times * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an * unusual way to encode a NOP and therefore will not trigger in * normal code. + * + * Starting with SPARC-T4 we have one control register per counter. + * And the counters are stored in individual registers. The registers + * for the counters are 64-bit but only a 32-bit counter is + * implemented. The event selections on SPARC-T4 lack any + * restrictions, therefore we can elide all of the complicated + * conflict resolution code we have for SPARC-T3 and earlier chips. */ -#define MAX_HWEVENTS 2 +#define MAX_HWEVENTS 4 +#define MAX_PCRS 4 #define MAX_PERIOD ((1UL << 32) - 1) #define PIC_UPPER_INDEX 0 @@ -90,8 +102,8 @@ struct cpu_hw_events { */ int current_idx[MAX_HWEVENTS]; - /* Software copy of %pcr register on this cpu. */ - u64 pcr; + /* Software copy of %pcr register(s) on this cpu. */ + u64 pcr[MAX_HWEVENTS]; /* Enabled/disable state. */ int enabled; @@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; /* An event map describes the characteristics of a performance * counter event. In particular it gives the encoding as well as * a mask telling which counters the event can be measured on. + * + * The mask is unused on SPARC-T4 and later. */ struct perf_event_map { u16 encoding; @@ -142,15 +156,53 @@ struct sparc_pmu { const struct perf_event_map *(*event_map)(int); const cache_map_t *cache_map; int max_events; + u32 (*read_pmc)(int); + void (*write_pmc)(int, u64); int upper_shift; int lower_shift; int event_mask; + int user_bit; + int priv_bit; int hv_bit; int irq_bit; int upper_nop; int lower_nop; + unsigned int flags; +#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 +#define SPARC_PMU_HAS_CONFLICTS 0x00000002 + int max_hw_events; + int num_pcrs; + int num_pic_regs; }; +static u32 sparc_default_read_pmc(int idx) +{ + u64 val; + + val = pcr_ops->read_pic(0); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void sparc_default_write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + pic = pcr_ops->read_pic(0); + pic &= ~mask; + pic |= val; + pcr_ops->write_pic(0, pic); +} + static const struct perf_event_map ultra3_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, @@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = { .event_map = ultra3_event_map, .cache_map = &ultra3_cache_map, .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), + .read_pmc = sparc_default_read_pmc, + .write_pmc = sparc_default_write_pmc, .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, + .user_bit = PCR_UTRACE, + .priv_bit = PCR_STRACE, .upper_nop = 0x1c, .lower_nop = 0x14, + .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | + SPARC_PMU_HAS_CONFLICTS), + .max_hw_events = 2, + .num_pcrs = 1, + .num_pic_regs = 1, }; /* Niagara1 is very limited. The upper PIC is hard-locked to count @@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = { .event_map = niagara1_event_map, .cache_map = &niagara1_cache_map, .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), + .read_pmc = sparc_default_read_pmc, + .write_pmc = sparc_default_write_pmc, .upper_shift = 0, .lower_shift = 4, .event_mask = 0x7, + .user_bit = PCR_UTRACE, + .priv_bit = PCR_STRACE, .upper_nop = 0x0, .lower_nop = 0x0, + .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | + SPARC_PMU_HAS_CONFLICTS), + .max_hw_events = 2, + .num_pcrs = 1, + .num_pic_regs = 1, }; static const struct perf_event_map niagara2_perfmon_event_map[] = { @@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = { .event_map = niagara2_event_map, .cache_map = &niagara2_cache_map, .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), + .read_pmc = sparc_default_read_pmc, + .write_pmc = sparc_default_write_pmc, .upper_shift = 19, .lower_shift = 6, .event_mask = 0xfff, - .hv_bit = 0x8, + .user_bit = PCR_UTRACE, + .priv_bit = PCR_STRACE, + .hv_bit = PCR_N2_HTRACE, .irq_bit = 0x30, .upper_nop = 0x220, .lower_nop = 0x220, + .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | + SPARC_PMU_HAS_CONFLICTS), + .max_hw_events = 2, + .num_pcrs = 1, + .num_pic_regs = 1, +}; + +static const struct perf_event_map niagara4_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, + [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 }, + [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 }, + [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f }, +}; + +static const struct perf_event_map *niagara4_event_map(int event_id) +{ + return &niagara4_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara4_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, + [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, + [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f }, + [C(RESULT_MISS)] = { (11 << 6) | 0x03 }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { (17 << 6) | 0x3f }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { (6 << 6) | 0x3f }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static u32 sparc_vt_read_pmc(int idx) +{ + u64 val = pcr_ops->read_pic(idx); + + return val & 0xffffffff; +} + +static void sparc_vt_write_pmc(int idx, u64 val) +{ + u64 pcr; + + /* There seems to be an internal latch on the overflow event + * on SPARC-T4 that prevents it from triggering unless you + * update the PIC exactly as we do here. The requirement + * seems to be that you have to turn off event counting in the + * PCR around the PIC update. + * + * For example, after the following sequence: + * + * 1) set PIC to -1 + * 2) enable event counting and overflow reporting in PCR + * 3) overflow triggers, softint 15 handler invoked + * 4) clear OV bit in PCR + * 5) write PIC to -1 + * + * a subsequent overflow event will not trigger. This + * sequence works on SPARC-T3 and previous chips. + */ + pcr = pcr_ops->read_pcr(idx); + pcr_ops->write_pcr(idx, PCR_N4_PICNPT); + + pcr_ops->write_pic(idx, val & 0xffffffff); + + pcr_ops->write_pcr(idx, pcr); +} + +static const struct sparc_pmu niagara4_pmu = { + .event_map = niagara4_event_map, + .cache_map = &niagara4_cache_map, + .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), + .read_pmc = sparc_vt_read_pmc, + .write_pmc = sparc_vt_write_pmc, + .upper_shift = 5, + .lower_shift = 5, + .event_mask = 0x7ff, + .user_bit = PCR_N4_UTRACE, + .priv_bit = PCR_N4_STRACE, + + /* We explicitly don't support hypervisor tracing. The T4 + * generates the overflow event for precise events via a trap + * which will not be generated (ie. it's completely lost) if + * we happen to be in the hypervisor when the event triggers. + * Essentially, the overflow event reporting is completely + * unusable when you have hypervisor mode tracing enabled. + */ + .hv_bit = 0, + + .irq_bit = PCR_N4_TOE, + .upper_nop = 0, + .lower_nop = 0, + .flags = 0, + .max_hw_events = 4, + .num_pcrs = 4, + .num_pic_regs = 4, }; static const struct sparc_pmu *sparc_pmu __read_mostly; @@ -558,55 +818,35 @@ static u64 nop_for_index(int idx) static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); + int pcr_index = 0; - val = cpuc->pcr; + if (sparc_pmu->num_pcrs > 1) + pcr_index = idx; + + val = cpuc->pcr[pcr_index]; val &= ~mask; val |= hwc->config; - cpuc->pcr = val; + cpuc->pcr[pcr_index] = val; - pcr_ops->write(cpuc->pcr); + pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); } static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); + int pcr_index = 0; u64 val; - val = cpuc->pcr; + if (sparc_pmu->num_pcrs > 1) + pcr_index = idx; + + val = cpuc->pcr[pcr_index]; val &= ~mask; val |= nop; - cpuc->pcr = val; + cpuc->pcr[pcr_index] = val; - pcr_ops->write(cpuc->pcr); -} - -static u32 read_pmc(int idx) -{ - u64 val; - - read_pic(val); - if (idx == PIC_UPPER_INDEX) - val >>= 32; - - return val & 0xffffffff; -} - -static void write_pmc(int idx, u64 val) -{ - u64 shift, mask, pic; - - shift = 0; - if (idx == PIC_UPPER_INDEX) - shift = 32; - - mask = ((u64) 0xffffffff) << shift; - val <<= shift; - - read_pic(pic); - pic &= ~mask; - pic |= val; - write_pic(pic); + pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); } static u64 sparc_perf_event_update(struct perf_event *event, @@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event, again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = read_pmc(idx); + new_raw_count = sparc_pmu->read_pmc(idx); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, (u64)-left); - write_pmc(idx, (u64)(-left) & 0xffffffff); + sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff); perf_event_update_userpage(event); return ret; } -/* If performance event entries have been added, move existing - * events around (if necessary) and then assign new entries to - * counters. - */ -static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) +static void read_in_all_counters(struct cpu_hw_events *cpuc) { int i; - if (!cpuc->n_added) - goto out; - - /* Read in the counters which are moving. */ for (i = 0; i < cpuc->n_events; i++) { struct perf_event *cp = cpuc->event[i]; @@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) cpuc->current_idx[i] = PIC_NO_INDEX; } } +} + +/* On this PMU all PICs are programmed using a single PCR. Calculate + * the combined control register value. + * + * For such chips we require that all of the events have the same + * configuration, so just fetch the settings from the first entry. + */ +static void calculate_single_pcr(struct cpu_hw_events *cpuc) +{ + int i; + + if (!cpuc->n_added) + goto out; /* Assign to counters all unassigned events. */ for (i = 0; i < cpuc->n_events; i++) { @@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) cpuc->current_idx[i] = idx; enc = perf_event_get_enc(cpuc->events[i]); - pcr &= ~mask_for_index(idx); + cpuc->pcr[0] &= ~mask_for_index(idx); if (hwc->state & PERF_HES_STOPPED) - pcr |= nop_for_index(idx); + cpuc->pcr[0] |= nop_for_index(idx); else - pcr |= event_encoding(enc, idx); + cpuc->pcr[0] |= event_encoding(enc, idx); } out: - return pcr; + cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; +} + +/* On this PMU each PIC has it's own PCR control register. */ +static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) +{ + int i; + + if (!cpuc->n_added) + goto out; + + for (i = 0; i < cpuc->n_events; i++) { + struct perf_event *cp = cpuc->event[i]; + struct hw_perf_event *hwc = &cp->hw; + int idx = hwc->idx; + u64 enc; + + if (cpuc->current_idx[i] != PIC_NO_INDEX) + continue; + + sparc_perf_event_set_period(cp, hwc, idx); + cpuc->current_idx[i] = idx; + + enc = perf_event_get_enc(cpuc->events[i]); + cpuc->pcr[idx] &= ~mask_for_index(idx); + if (hwc->state & PERF_HES_STOPPED) + cpuc->pcr[idx] |= nop_for_index(idx); + else + cpuc->pcr[idx] |= event_encoding(enc, idx); + } +out: + for (i = 0; i < cpuc->n_events; i++) { + struct perf_event *cp = cpuc->event[i]; + int idx = cp->hw.idx; + + cpuc->pcr[idx] |= cp->hw.config_base; + } +} + +/* If performance event entries have been added, move existing events + * around (if necessary) and then assign new entries to counters. + */ +static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) +{ + if (cpuc->n_added) + read_in_all_counters(cpuc); + + if (sparc_pmu->num_pcrs == 1) { + calculate_single_pcr(cpuc); + } else { + calculate_multiple_pcrs(cpuc); + } } static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - u64 pcr; + int i; if (cpuc->enabled) return; @@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu) cpuc->enabled = 1; barrier(); - pcr = cpuc->pcr; - if (!cpuc->n_events) { - pcr = 0; - } else { - pcr = maybe_change_configuration(cpuc, pcr); - - /* We require that all of the events have the same - * configuration, so just fetch the settings from the - * first entry. - */ - cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; - } + if (cpuc->n_events) + update_pcrs_for_enable(cpuc); - pcr_ops->write(cpuc->pcr); + for (i = 0; i < sparc_pmu->num_pcrs; i++) + pcr_ops->write_pcr(i, cpuc->pcr[i]); } static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - u64 val; + int i; if (!cpuc->enabled) return; @@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu) cpuc->enabled = 0; cpuc->n_added = 0; - val = cpuc->pcr; - val &= ~(PCR_UTRACE | PCR_STRACE | - sparc_pmu->hv_bit | sparc_pmu->irq_bit); - cpuc->pcr = val; + for (i = 0; i < sparc_pmu->num_pcrs; i++) { + u64 val = cpuc->pcr[i]; - pcr_ops->write(cpuc->pcr); + val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | + sparc_pmu->hv_bit | sparc_pmu->irq_bit); + cpuc->pcr[i] = val; + pcr_ops->write_pcr(i, cpuc->pcr[i]); + } } static int active_event_index(struct cpu_hw_events *cpuc, @@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex); static void perf_stop_nmi_watchdog(void *unused) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; stop_nmi_watchdog(NULL); - cpuc->pcr = pcr_ops->read(); + for (i = 0; i < sparc_pmu->num_pcrs; i++) + cpuc->pcr[i] = pcr_ops->read_pcr(i); } void perf_event_grab_pmc(void) @@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > MAX_HWEVENTS) + if (n_ev > sparc_pmu->max_hw_events) return -1; + if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { + int i; + + for (i = 0; i < n_ev; i++) + evts[i]->hw.idx = i; + return 0; + } + msk0 = perf_event_get_msk(events[0]); if (n_ev == 1) { if (msk0 & PIC_LOWER) @@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) struct perf_event *event; int i, n, first; + if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) + return 0; + n = n_prev + n_new; if (n <= 1) return 0; @@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= MAX_HWEVENTS) + if (n0 >= sparc_pmu->max_hw_events) goto out; cpuc->event[n0] = event; @@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event) /* We save the enable bits in the config_base. */ hwc->config_base = sparc_pmu->irq_bit; if (!attr->exclude_user) - hwc->config_base |= PCR_UTRACE; + hwc->config_base |= sparc_pmu->user_bit; if (!attr->exclude_kernel) - hwc->config_base |= PCR_STRACE; + hwc->config_base |= sparc_pmu->priv_bit; if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - MAX_HWEVENTS - 1, + sparc_pmu->max_hw_events - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; @@ -1254,8 +1557,7 @@ static struct pmu pmu = { void perf_event_print_debug(void) { unsigned long flags; - u64 pcr, pic; - int cpu; + int cpu, i; if (!sparc_pmu) return; @@ -1264,12 +1566,13 @@ void perf_event_print_debug(void) cpu = smp_processor_id(); - pcr = pcr_ops->read(); - read_pic(pic); - pr_info("\n"); - pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", - cpu, pcr, pic); + for (i = 0; i < sparc_pmu->num_pcrs; i++) + pr_info("CPU#%d: PCR%d[%016llx]\n", + cpu, i, pcr_ops->read_pcr(i)); + for (i = 0; i < sparc_pmu->num_pic_regs; i++) + pr_info("CPU#%d: PIC%d[%016llx]\n", + cpu, i, pcr_ops->read_pic(i)); local_irq_restore(flags); } @@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, * Do this before we peek at the counters to determine * overflow so we don't lose any events. */ - if (sparc_pmu->irq_bit) - pcr_ops->write(cpuc->pcr); + if (sparc_pmu->irq_bit && + sparc_pmu->num_pcrs == 1) + pcr_ops->write_pcr(0, cpuc->pcr[0]); for (i = 0; i < cpuc->n_events; i++) { struct perf_event *event = cpuc->event[i]; @@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct hw_perf_event *hwc; u64 val; + if (sparc_pmu->irq_bit && + sparc_pmu->num_pcrs > 1) + pcr_ops->write_pcr(idx, cpuc->pcr[idx]); + hwc = &event->hw; val = sparc_perf_event_update(event, hwc, idx); if (val & (1ULL << 31)) @@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara2_pmu; return true; } + if (!strcmp(sparc_pmu_type, "niagara4")) { + sparc_pmu = &niagara4_pmu; + return true; + } return false; } diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index 340c5b976d28..d397d7fc5c28 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -37,7 +37,7 @@ void * __init prom_early_alloc(unsigned long size) void *ret; if (!paddr) { - prom_printf("prom_early_alloc(%lu) failed\n"); + prom_printf("prom_early_alloc(%lu) failed\n", size); prom_halt(); } diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 1414d16712b2..0800e71d8a88 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -340,7 +340,12 @@ static const char *hwcaps[] = { */ "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", - "ima", "cspare", + "ima", "cspare", "pause", "cbcond", +}; + +static const char *crypto_hwcaps[] = { + "aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256", + "sha512", "mpmul", "montmul", "montsqr", "crc32c", }; void cpucap_info(struct seq_file *m) @@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m) printed++; } } + if (caps & HWCAP_SPARC_CRYPTO) { + unsigned long cfr; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { + unsigned long bit = 1UL << i; + if (cfr & bit) { + seq_printf(m, "%s%s", + printed ? "," : "", crypto_hwcaps[i]); + printed++; + } + } + } seq_putc(m, '\n'); } +static void __init report_one_hwcap(int *printed, const char *name) +{ + if ((*printed) == 0) + printk(KERN_INFO "CPU CAPS: ["); + printk(KERN_CONT "%s%s", + (*printed) ? "," : "", name); + if (++(*printed) == 8) { + printk(KERN_CONT "]\n"); + *printed = 0; + } +} + +static void __init report_crypto_hwcaps(int *printed) +{ + unsigned long cfr; + int i; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + + for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { + unsigned long bit = 1UL << i; + if (cfr & bit) + report_one_hwcap(printed, crypto_hwcaps[i]); + } +} + static void __init report_hwcaps(unsigned long caps) { int i, printed = 0; - printk(KERN_INFO "CPU CAPS: ["); for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { unsigned long bit = 1UL << i; - if (caps & bit) { - printk(KERN_CONT "%s%s", - printed ? "," : "", hwcaps[i]); - if (++printed == 8) { - printk(KERN_CONT "]\n"); - printk(KERN_INFO "CPU CAPS: ["); - printed = 0; - } - } + if (caps & bit) + report_one_hwcap(&printed, hwcaps[i]); } - printk(KERN_CONT "]\n"); + if (caps & HWCAP_SPARC_CRYPTO) + report_crypto_hwcaps(&printed); + if (printed != 0) + printk(KERN_CONT "]\n"); } static unsigned long __init mdesc_cpu_hwcap_list(void) @@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void) break; } } + for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { + if (!strcmp(prop, crypto_hwcaps[i])) + caps |= HWCAP_SPARC_CRYPTO; + } plen = strlen(prop) + 1; prop += plen; diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index d97f3eb72e06..44025f4ba41f 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1) SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) -SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) +SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1) SIGN1(sys32_prctl, sys_prctl, %o0) SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f7392336961f..d862499eb01c 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd, advice); } -asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, - compat_size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __user *) &of : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - -asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, - compat_size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; -} - /* This is just a version for 32-bit applications which does * not force O_LARGEFILE on. */ diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 3b05e6697710..fa1f1d375ffc 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -850,7 +850,7 @@ void __init cheetah_ecache_flush_init(void) ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size); if (ecache_flush_physbase == ~0UL) { - prom_printf("cheetah_ecache_flush_init: Cannot find %d byte " + prom_printf("cheetah_ecache_flush_init: Cannot find %ld byte " "contiguous physical memory.\n", ecache_flush_size); prom_halt(); diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index dff4096f3dec..30f6ab51c551 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o lib-$(CONFIG_SPARC64) += NG2patch.o +lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o +lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 03eadf66b0d3..2c20ad63ddbf 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -14,7 +14,7 @@ #define FPRS_FEF 0x04 #ifdef MEMCPY_DEBUG #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ - clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; + clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs #else #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs @@ -182,13 +182,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %g2, 0 tne %xcc, 5 PREAMBLE - mov %o0, GLOBAL_SPARE + mov %o0, %o3 cmp %o2, 0 be,pn %XCC, 85f - or %o0, %o1, %o3 + or %o0, %o1, GLOBAL_SPARE cmp %o2, 16 blu,a,pn %XCC, 80f - or %o3, %o2, %o3 + or GLOBAL_SPARE, %o2, GLOBAL_SPARE /* 2 blocks (128 bytes) is the minimum we can do the block * copy with. We need to ensure that we'll iterate at least @@ -202,7 +202,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ cmp %o2, (4 * 64) blu,pt %XCC, 75f - andcc %o3, 0x7, %g0 + andcc GLOBAL_SPARE, 0x7, %g0 /* %o0: dst * %o1: src @@ -404,13 +404,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * over. If anything is left, we copy it one byte at a time. */ brz,pt %o2, 85f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE ba,a,pt %XCC, 90f .align 64 75: /* 16 < len <= 64 */ bne,pn %XCC, 75f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE 72: andn %o2, 0xf, %o4 @@ -420,9 +420,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o1, 0x08, %o1 EX_LD(LOAD(ldx, %o1, %g1)) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 @@ -430,14 +430,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop sub %o2, 0x8, %o2 EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -454,11 +454,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %g1, 1, %g1 EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) bgu,pt %icc, 1b add %o1, 1, %o1 -2: add %o1, %o3, %o0 +2: add %o1, GLOBAL_SPARE, %o0 andcc %o1, 0x7, %g1 bne,pt %icc, 8f sll %g1, 3, %g1 @@ -468,16 +468,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop ba,a,pt %xcc, 73b -8: mov 64, %o3 +8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 EX_LD(LOAD(ldx, %o1, %g2)) - sub %o3, %g1, %o3 + sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 EX_LD(LOAD(ldx, %o1, %g3)) subcc %o4, 0x8, %o4 - srlx %g3, %o3, %o5 + srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 EX_ST(STORE(stx, %o5, %o0)) add %o0, 0x8, %o0 @@ -489,32 +489,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pn %icc, 85f add %o1, %g1, %o1 ba,pt %xcc, 90f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE .align 64 80: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 + andcc GLOBAL_SPARE, 0x3, %g0 bne,pn %XCC, 90f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE 1: subcc %o2, 4, %o2 EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 4, %o1 85: retl - mov EX_RETVAL(GLOBAL_SPARE), %o0 + mov EX_RETVAL(%o3), %o0 .align 32 90: subcc %o2, 1, %o2 EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 90b add %o1, 1, %o1 retl - mov EX_RETVAL(GLOBAL_SPARE), %o0 + mov EX_RETVAL(%o3), %o0 .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S new file mode 100644 index 000000000000..fd9f903ffa32 --- /dev/null +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -0,0 +1,30 @@ +/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __retl_one_asi;\ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME NG4copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, ___copy_in_user; \ + nop +#endif + +#include "NG4memcpy.S" diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 000000000000..f30ec10bbcac --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S @@ -0,0 +1,57 @@ +/* NG4copy_page.S: Niagara-4 optimized copy page. + * + * Copyright (C) 2012 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + + .text + .align 32 + + .register %g2, #scratch + .register %g3, #scratch + + .globl NG4copy_user_page +NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x000], #n_reads_strong + prefetch [%o1 + 0x040], #n_reads_strong + prefetch [%o1 + 0x080], #n_reads_strong + prefetch [%o1 + 0x0c0], #n_reads_strong + set PAGE_SIZE, %g7 + prefetch [%o1 + 0x100], #n_reads_strong + prefetch [%o1 + 0x140], #n_reads_strong + prefetch [%o1 + 0x180], #n_reads_strong + prefetch [%o1 + 0x1c0], #n_reads_strong +1: + ldx [%o1 + 0x00], %o2 + subcc %g7, 0x40, %g7 + ldx [%o1 + 0x08], %o3 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + ldx [%o1 + 0x20], %g1 + stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x28], %g2 + stxa %o3, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x30], %g3 + stxa %o4, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x38], %o2 + add %o1, 0x40, %o1 + stxa %o5, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %g1, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %g2, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %g3, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + bne,pt %icc, 1b + prefetch [%o1 + 0x200], #n_reads_strong + retl + membar #StoreLoad | #StoreStore + .size NG4copy_user_page,.-NG4copy_user_page diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S new file mode 100644 index 000000000000..9744c4540a8d --- /dev/null +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -0,0 +1,39 @@ +/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __retl_one_asi;\ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME NG4copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, ___copy_in_user; \ + nop +#endif + +#include "NG4memcpy.S" diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S new file mode 100644 index 000000000000..9cf2ee01cee3 --- /dev/null +++ b/arch/sparc/lib/NG4memcpy.S @@ -0,0 +1,360 @@ +/* NG4memcpy.S: Niagara-4 optimized memcpy. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include <asm/visasm.h> +#include <asm/asi.h> +#define GLOBAL_SPARE %g7 +#else +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define FPRS_FEF 0x04 + +/* On T4 it is very expensive to access ASRs like %fprs and + * %asi, avoiding a read or a write can save ~50 cycles. + */ +#define FPU_ENTER \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,a,pn %icc, 999f; \ + wr %g0, FPRS_FEF, %fprs; \ + 999: + +#ifdef MEMCPY_DEBUG +#define VISEntryHalf FPU_ENTER; \ + clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else +#define VISEntryHalf FPU_ENTER +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#endif + +#define GLOBAL_SPARE %g5 +#endif + +#ifndef STORE_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#else +#define STORE_ASI 0x80 /* ASI_P */ +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef STORE +#ifndef MEMCPY_DEBUG +#define STORE(type,src,addr) type src, [addr] +#else +#define STORE(type,src,addr) type##a src, [addr] %asi +#endif +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME NG4memcpy +#endif +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ +#ifdef MEMCPY_DEBUG + wr %g0, 0x80, %asi +#endif + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %XCC, 5 + PREAMBLE + mov %o0, %o3 + brz,pn %o2, .Lexit + cmp %o2, 3 + ble,pn %icc, .Ltiny + cmp %o2, 19 + ble,pn %icc, .Lsmall + or %o0, %o1, %g2 + cmp %o2, 128 + bl,pn %icc, .Lmedium + nop + +.Llarge:/* len >= 0x80 */ + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, 51f + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01)) + +51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) + LOAD(prefetch, %o1 + 0x080, #n_reads_strong) + LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) + LOAD(prefetch, %o1 + 0x100, #n_reads_strong) + LOAD(prefetch, %o1 + 0x140, #n_reads_strong) + LOAD(prefetch, %o1 + 0x180, #n_reads_strong) + LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + + /* Check if we can use the straight fully aligned + * loop, or we require the alignaddr/faligndata variant. + */ + andcc %o1, 0x7, %o5 + bne,pn %icc, .Llarge_src_unaligned + sub %g0, %o0, %g1 + + /* Legitimize the use of initializing stores by getting dest + * to be 64-byte aligned. + */ + and %g1, 0x3f, %g1 + brz,pt %g1, .Llarge_aligned + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) + add %o1, 8, %o1 + subcc %g1, 8, %g1 + add %o0, 8, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stx, %g2, %o0 - 0x08)) + +.Llarge_aligned: + /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + add %o1, 0x40, %o1 + EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + subcc %o4, 0x40, %o4 + EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) + EX_ST(STORE_INIT(%g1, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g2, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) + EX_ST(STORE_INIT(%g3, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) + EX_ST(STORE_INIT(%o5, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g2, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g3, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + + membar #StoreLoad | #StoreStore + + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned + nop + ba,a,pt %icc, .Lmedium_noprefetch + +.Lexit: retl + mov EX_RETVAL(%o3), %o0 + +.Llarge_src_unaligned: + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + VISEntryHalf + alignaddr %o1, %g0, %g1 + add %o1, %o4, %o1 + EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) +1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) + subcc %o4, 0x40, %o4 + EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) + EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) + EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) + EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) + EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) + EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) + faligndata %f0, %f2, %f16 + EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) + faligndata %f2, %f4, %f18 + add %g1, 0x40, %g1 + faligndata %f4, %f6, %f20 + faligndata %f6, %f8, %f22 + faligndata %f8, %f10, %f24 + faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + EX_ST(STORE(std, %f16, %o0 + 0x00)) + EX_ST(STORE(std, %f18, %o0 + 0x08)) + EX_ST(STORE(std, %f20, %o0 + 0x10)) + EX_ST(STORE(std, %f22, %o0 + 0x18)) + EX_ST(STORE(std, %f24, %o0 + 0x20)) + EX_ST(STORE(std, %f26, %o0 + 0x28)) + EX_ST(STORE(std, %f28, %o0 + 0x30)) + EX_ST(STORE(std, %f30, %o0 + 0x38)) + add %o0, 0x40, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %g1 + 0x200, #n_reads_strong) + VISExitHalf + + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned + nop + ba,a,pt %icc, .Lmedium_unaligned + +.Lmedium: + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) + andcc %g2, 0x7, %g0 + bne,pn %icc, .Lmedium_unaligned + nop +.Lmedium_noprefetch: + andncc %o2, 0x20 - 1, %o5 + be,pn %icc, 2f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) + add %o1, 0x20, %o1 + subcc %o5, 0x20, %o5 + EX_ST(STORE(stx, %g1, %o0 + 0x00)) + EX_ST(STORE(stx, %g2, %o0 + 0x08)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) + EX_ST(STORE(stx, %o4, %o0 + 0x18)) + bne,pt %icc, 1b + add %o0, 0x20, %o0 +2: andcc %o2, 0x18, %o5 + be,pt %icc, 3f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + add %o1, 0x08, %o1 + add %o0, 0x08, %o0 + subcc %o5, 0x08, %o5 + bne,pt %icc, 1b + EX_ST(STORE(stx, %g1, %o0 - 0x08)) +3: brz,pt %o2, .Lexit + cmp %o2, 0x04 + bl,pn %icc, .Ltiny + nop + EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + add %o1, 0x04, %o1 + add %o0, 0x04, %o0 + subcc %o2, 0x04, %o2 + bne,pn %icc, .Ltiny + EX_ST(STORE(stw, %g1, %o0 - 0x04)) + ba,a,pt %icc, .Lexit +.Lmedium_unaligned: + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, 2f + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01)) +2: + and %o1, 0x7, %g1 + brz,pn %g1, .Lmedium_noprefetch + sll %g1, 3, %g1 + mov 64, %g2 + sub %g2, %g1, %g2 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + sllx %o4, %g1, %o4 + andn %o2, 0x08 - 1, %o5 + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) + add %o1, 0x08, %o1 + subcc %o5, 0x08, %o5 + srlx %g3, %g2, GLOBAL_SPARE + or GLOBAL_SPARE, %o4, GLOBAL_SPARE + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + sllx %g3, %g1, %o4 + srl %g1, 3, %g1 + add %o1, %g1, %o1 + brz,pn %o2, .Lexit + nop + ba,pt %icc, .Lsmall_unaligned + +.Ltiny: + EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x00)) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x01)) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + ba,pt %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x02)) + +.Lsmall: + andcc %g2, 0x3, %g0 + bne,pn %icc, .Lsmall_unaligned + andn %o2, 0x4 - 1, %o5 + sub %o2, %o5, %o2 +1: + EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + add %o1, 0x04, %o1 + subcc %o5, 0x04, %o5 + add %o0, 0x04, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stw, %g1, %o0 - 0x04)) + brz,pt %o2, .Lexit + nop + ba,a,pt %icc, .Ltiny + +.Lsmall_unaligned: +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + add %o1, 1, %o1 + add %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g1, %o0 - 0x01)) + ba,a,pt %icc, .Lexit + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S new file mode 100644 index 000000000000..c21c34c61dda --- /dev/null +++ b/arch/sparc/lib/NG4patch.S @@ -0,0 +1,43 @@ +/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara4_patch_copyops + .type niagara4_patch_copyops,#function +niagara4_patch_copyops: + NG_DO_PATCH(memcpy, NG4memcpy) + NG_DO_PATCH(___copy_from_user, NG4copy_from_user) + NG_DO_PATCH(___copy_to_user, NG4copy_to_user) + retl + nop + .size niagara4_patch_copyops,.-niagara4_patch_copyops + + .globl niagara4_patch_pageops + .type niagara4_patch_pageops,#function +niagara4_patch_pageops: + NG_DO_PATCH(copy_user_page, NG4copy_user_page) + NG_DO_PATCH(_clear_page, NGclear_page) + NG_DO_PATCH(clear_user_page, NGclear_user_page) + retl + nop + .size niagara4_patch_pageops,.-niagara4_patch_pageops diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index b9e790b9c6b8..423d46e2258b 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S @@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ restore .align 32 + .globl NGclear_page + .globl NGclear_user_page NGclear_page: /* %o0=dest */ NGclear_user_page: /* %o0=dest, %o1=vaddr */ rd %asi, %g3 diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 3b31218cafc6..ee31b884c61b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); +/* CRYPTO code needs this */ +void VISenterhalf(void); +EXPORT_SYMBOL(VISenterhalf); + extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index d58edf5fefdb..7a9b788c6ced 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -51,22 +51,40 @@ #include "init_64.h" -unsigned long kern_linear_pte_xor[2] __read_mostly; +unsigned long kern_linear_pte_xor[4] __read_mostly; -/* A bitmap, one bit for every 256MB of physical memory. If the bit - * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else - * if set we should use a 256MB page (via kern_linear_pte_xor[1]). +/* A bitmap, two bits for every 256MB of physical memory. These two + * bits determine what page size we use for kernel linear + * translations. They form an index into kern_linear_pte_xor[]. The + * value in the indexed slot is XOR'd with the TLB miss virtual + * address to form the resulting TTE. The mapping is: + * + * 0 ==> 4MB + * 1 ==> 256MB + * 2 ==> 2GB + * 3 ==> 16GB + * + * All sun4v chips support 256MB pages. Only SPARC-T4 and later + * support 2GB pages, and hopefully future cpus will support the 16GB + * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there + * if these larger page sizes are not supported by the cpu. + * + * It would be nice to determine this from the machine description + * 'cpu' properties, but we need to have this table setup before the + * MDESC is initialized. */ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; #ifndef CONFIG_DEBUG_PAGEALLOC -/* A special kernel TSB for 4MB and 256MB linear mappings. - * Space is allocated for this right after the trap table - * in arch/sparc64/kernel/head.S +/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. + * Space is allocated for this right after the trap table in + * arch/sparc64/kernel/head.S */ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #endif +static unsigned long cpu_pgsz_mask; + #define MAX_BANKS 32 static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; @@ -101,7 +119,8 @@ static void __init read_obp_memory(const char *property, ret = prom_getproperty(node, property, (char *) regs, prop_size); if (ret == -1) { - prom_printf("Couldn't get %s property from /memory.\n"); + prom_printf("Couldn't get %s property from /memory.\n", + property); prom_halt(); } @@ -403,6 +422,12 @@ EXPORT_SYMBOL(flush_icache_range); void mmu_info(struct seq_file *m) { + static const char *pgsz_strings[] = { + "8K", "64K", "512K", "4MB", "32MB", + "256MB", "2GB", "16GB", + }; + int i, printed; + if (tlb_type == cheetah) seq_printf(m, "MMU Type\t: Cheetah\n"); else if (tlb_type == cheetah_plus) @@ -414,6 +439,17 @@ void mmu_info(struct seq_file *m) else seq_printf(m, "MMU Type\t: ???\n"); + seq_printf(m, "MMU PGSZs\t: "); + printed = 0; + for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) { + if (cpu_pgsz_mask & (1UL << i)) { + seq_printf(m, "%s%s", + printed ? "," : "", pgsz_strings[i]); + printed++; + } + } + seq_putc(m, '\n'); + #ifdef CONFIG_DEBUG_DCFLUSH seq_printf(m, "DCPageFlushes\t: %d\n", atomic_read(&dcpage_flushes)); @@ -462,7 +498,7 @@ static void __init read_obp_translations(void) prom_halt(); } if (unlikely(n > sizeof(prom_trans))) { - prom_printf("prom_mappings: Size %Zd is too big.\n", n); + prom_printf("prom_mappings: Size %d is too big.\n", n); prom_halt(); } @@ -524,7 +560,7 @@ static void __init hypervisor_tlb_lock(unsigned long vaddr, unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu); if (ret != 0) { - prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: " + prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: " "errors with %lx\n", vaddr, 0, pte, mmu, ret); prom_halt(); } @@ -1358,32 +1394,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, extern unsigned int kvmap_linear_patch[1]; #endif /* CONFIG_DEBUG_PAGEALLOC */ -static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +static void __init kpte_set_val(unsigned long index, unsigned long val) { - const unsigned long shift_256MB = 28; - const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL); - const unsigned long size_256MB = (1UL << shift_256MB); + unsigned long *ptr = kpte_linear_bitmap; - while (start < end) { - long remains; + val <<= ((index % (BITS_PER_LONG / 2)) * 2); + ptr += (index / (BITS_PER_LONG / 2)); - remains = end - start; - if (remains < size_256MB) - break; + *ptr |= val; +} - if (start & mask_256MB) { - start = (start + size_256MB) & ~mask_256MB; - continue; - } +static const unsigned long kpte_shift_min = 28; /* 256MB */ +static const unsigned long kpte_shift_max = 34; /* 16GB */ +static const unsigned long kpte_shift_incr = 3; + +static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, + unsigned long shift) +{ + unsigned long size = (1UL << shift); + unsigned long mask = (size - 1UL); + unsigned long remains = end - start; + unsigned long val; + + if (remains < size || (start & mask)) + return start; + + /* VAL maps: + * + * shift 28 --> kern_linear_pte_xor index 1 + * shift 31 --> kern_linear_pte_xor index 2 + * shift 34 --> kern_linear_pte_xor index 3 + */ + val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; + + remains &= ~mask; + if (shift != kpte_shift_max) + remains = size; - while (remains >= size_256MB) { - unsigned long index = start >> shift_256MB; + while (remains) { + unsigned long index = start >> kpte_shift_min; - __set_bit(index, kpte_linear_bitmap); + kpte_set_val(index, val); - start += size_256MB; - remains -= size_256MB; + start += 1UL << kpte_shift_min; + remains -= 1UL << kpte_shift_min; + } + + return start; +} + +static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +{ + unsigned long smallest_size, smallest_mask; + unsigned long s; + + smallest_size = (1UL << kpte_shift_min); + smallest_mask = (smallest_size - 1UL); + + while (start < end) { + unsigned long orig_start = start; + + for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { + start = kpte_mark_using_shift(start, end, s); + + if (start != orig_start) + break; } + + if (start == orig_start) + start = (start + smallest_size) & ~smallest_mask; } } @@ -1577,13 +1656,16 @@ static void __init sun4v_ktsb_init(void) ktsb_descr[0].resv = 0; #ifndef CONFIG_DEBUG_PAGEALLOC - /* Second KTSB for 4MB/256MB mappings. */ + /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */ ktsb_pa = (kern_base + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; - ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | - HV_PGSZ_MASK_256MB); + ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB | + HV_PGSZ_MASK_256MB | + HV_PGSZ_MASK_2GB | + HV_PGSZ_MASK_16GB) & + cpu_pgsz_mask); ktsb_descr[1].assoc = 1; ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; ktsb_descr[1].ctx_idx = 0; @@ -1606,6 +1688,47 @@ void __cpuinit sun4v_ktsb_register(void) } } +static void __init sun4u_linear_pte_xor_finalize(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + /* This is where we would add Panther support for + * 32MB and 256MB pages. + */ +#endif +} + +static void __init sun4v_linear_pte_xor_finalize(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { + kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ + 0xfffff80000000000UL; + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + } + + if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { + kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ + 0xfffff80000000000UL; + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; + } + + if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { + kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ + 0xfffff80000000000UL; + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; + } +#endif +} + /* paging_init() sets up the page tables */ static unsigned long last_valid_pfn; @@ -1665,10 +1788,8 @@ void __init paging_init(void) ktsb_phys_patch(); } - if (tlb_type == hypervisor) { + if (tlb_type == hypervisor) sun4v_patch_tlb_handlers(); - sun4v_ktsb_init(); - } /* Find available physical memory... * @@ -1727,9 +1848,6 @@ void __init paging_init(void) __flush_tlb_all(); - if (tlb_type == hypervisor) - sun4v_ktsb_register(); - prom_build_devicetree(); of_populate_present_mask(); #ifndef CONFIG_SMP @@ -1742,8 +1860,36 @@ void __init paging_init(void) #ifndef CONFIG_SMP mdesc_fill_in_cpu_data(cpu_all_mask); #endif + mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask); + + sun4v_linear_pte_xor_finalize(); + + sun4v_ktsb_init(); + sun4v_ktsb_register(); + } else { + unsigned long impl, ver; + + cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | + HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); + + __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); + impl = ((ver >> 32) & 0xffff); + if (impl == PANTHER_IMPL) + cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB | + HV_PGSZ_MASK_256MB); + + sun4u_linear_pte_xor_finalize(); } + /* Flush the TLBs and the 4M TSB so that the updated linear + * pte XOR settings are realized for all mappings. + */ + __flush_tlb_all(); +#ifndef CONFIG_DEBUG_PAGEALLOC + memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); +#endif + __flush_tlb_all(); + /* Setup bootmem... */ last_valid_pfn = end_pfn = bootmem_init(phys_base); @@ -2110,6 +2256,7 @@ static void __init sun4u_pgprot_init(void) { unsigned long page_none, page_shared, page_copy, page_readonly; unsigned long page_exec_bit; + int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | _PAGE_CACHE_4U | _PAGE_P_4U | @@ -2137,8 +2284,8 @@ static void __init sun4u_pgprot_init(void) kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | _PAGE_W_4U); - /* XXX Should use 256MB on Panther. XXX */ - kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + for (i = 1; i < 4; i++) + kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; _PAGE_SZBITS = _PAGE_SZBITS_4U; _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | @@ -2164,6 +2311,7 @@ static void __init sun4v_pgprot_init(void) { unsigned long page_none, page_shared, page_copy, page_readonly; unsigned long page_exec_bit; + int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | _PAGE_CACHE_4V | _PAGE_P_4V | @@ -2185,15 +2333,8 @@ static void __init sun4v_pgprot_init(void) kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); -#ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ - 0xfffff80000000000UL; -#else - kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ - 0xfffff80000000000UL; -#endif - kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + for (i = 1; i < 4; i++) + kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | __ACCESS_BITS_4V | _PAGE_E_4V); diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 3e1ac8b96cae..0661aa606dec 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -8,12 +8,12 @@ #define MAX_PHYS_ADDRESS (1UL << 41UL) #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) #define KPTE_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) + ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) #define VALID_ADDR_BITMAP_BYTES \ ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) -extern unsigned long kern_linear_pte_xor[2]; +extern unsigned long kern_linear_pte_xor[4]; extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned long sparc64_kern_pri_context; diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index a8a58cad9d2b..0f4f7191fbba 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -90,8 +90,8 @@ static void __init sbus_iommu_init(struct platform_device *op) it to us. */ tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER); if (!tmp) { - prom_printf("Unable to allocate iommu table [0x%08x]\n", - IOMMU_NPTES*sizeof(iopte_t)); + prom_printf("Unable to allocate iommu table [0x%lx]\n", + IOMMU_NPTES * sizeof(iopte_t)); prom_halt(); } iommu->page_table = (iopte_t *)tmp; diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index e9073e9501b3..28368701ef79 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -464,8 +464,12 @@ void bpf_jit_compile(struct sk_filter *fp) emit_alu_K(OR, K); break; case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ + case BPF_S_ALU_XOR_X: emit_alu_X(XOR); break; + case BPF_S_ALU_XOR_K: /* A ^= K */ + emit_alu_K(XOR, K); + break; case BPF_S_ALU_LSH_X: /* A <<= X */ emit_alu_X(SLL); break; diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 0270620a1692..8c5eff6d6df5 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -134,7 +134,6 @@ CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index c11de27a9bcb..e7a3dfcbcda7 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -132,7 +132,6 @@ CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index c17de0db6736..9efeb6da48bc 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -21,6 +21,9 @@ #include <linux/un.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/file.h> #include <asm/uaccess.h> #include <asm/switch_to.h> @@ -118,90 +121,38 @@ void mconsole_log(struct mc_request *req) mconsole_reply(req, "", 0, 0); } -/* This is a more convoluted version of mconsole_proc, which has some stability - * problems; however, we need it fixed, because it is expected that UML users - * mount HPPFS instead of procfs on /proc. And we want mconsole_proc to still - * show the real procfs content, not the ones from hppfs.*/ -#if 0 void mconsole_proc(struct mc_request *req) { struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; - struct file *file; - int n; - char *ptr = req->request.data, *buf; - mm_segment_t old_fs = get_fs(); - - ptr += strlen("proc"); - ptr = skip_spaces(ptr); - - file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); - if (IS_ERR(file)) { - mconsole_reply(req, "Failed to open file", 1, 0); - goto out; - } - - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (buf == NULL) { - mconsole_reply(req, "Failed to allocate buffer", 1, 0); - goto out_fput; - } - - if (file->f_op->read) { - do { - loff_t pos; - set_fs(KERNEL_DS); - n = vfs_read(file, buf, PAGE_SIZE - 1, &pos); - file_pos_write(file, pos); - set_fs(old_fs); - if (n >= 0) { - buf[n] = '\0'; - mconsole_reply(req, buf, 0, (n > 0)); - } - else { - mconsole_reply(req, "Read of file failed", - 1, 0); - goto out_free; - } - } while (n > 0); - } - else mconsole_reply(req, "", 0, 0); - - out_free: - kfree(buf); - out_fput: - fput(file); - out: ; -} -#endif - -void mconsole_proc(struct mc_request *req) -{ - char path[64]; char *buf; int len; - int fd; + struct file *file; int first_chunk = 1; char *ptr = req->request.data; ptr += strlen("proc"); ptr = skip_spaces(ptr); - snprintf(path, sizeof(path), "/proc/%s", ptr); - fd = sys_open(path, 0, 0); - if (fd < 0) { + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); + if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); - printk(KERN_ERR "open %s: %d\n",path,fd); + printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file)); goto out; } buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (buf == NULL) { mconsole_reply(req, "Failed to allocate buffer", 1, 0); - goto out_close; + goto out_fput; } - for (;;) { - len = sys_read(fd, buf, PAGE_SIZE-1); + do { + loff_t pos; + mm_segment_t old_fs = get_fs(); + set_fs(KERNEL_DS); + len = vfs_read(file, buf, PAGE_SIZE - 1, &pos); + set_fs(old_fs); + file->f_pos = pos; if (len < 0) { mconsole_reply(req, "Read of file failed", 1, 0); goto out_free; @@ -211,22 +162,14 @@ void mconsole_proc(struct mc_request *req) mconsole_reply(req, "\n", 0, 1); first_chunk = 0; } - if (len == PAGE_SIZE-1) { - buf[len] = '\0'; - mconsole_reply(req, buf, 0, 1); - } else { - buf[len] = '\0'; - mconsole_reply(req, buf, 0, 0); - break; - } - } - + buf[len] = '\0'; + mconsole_reply(req, buf, 0, (len > 0)); + } while (len > 0); out_free: kfree(buf); - out_close: - sys_close(fd); - out: - /* nothing */; + out_fput: + fput(file); + out: ; } #define UML_MCONSOLE_HELPTEXT \ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 957ec87385af..fbee9714d9ab 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,9 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 80502a2bb789..1707cfa928fb 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -47,6 +47,10 @@ #endif #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that on ARM we can have one ABI that works for 32 and 64 + * bit guests. */ +typedef unsigned long xen_pfn_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); @@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); #endif #ifndef HYPERVISOR_VIRT_START @@ -121,6 +126,8 @@ struct arch_shared_info { #include <asm/xen/interface_64.h> #endif +#include <asm/pvclock-abi.h> + #ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 1be1ab7d6a41..ee52fcac6f72 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -5,10 +5,12 @@ extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); extern void __init pci_xen_swiotlb_init(void); +extern int pci_xen_swiotlb_init_late(void); #else #define xen_swiotlb (0) static inline int __init pci_xen_swiotlb_detect(void) { return 0; } static inline void __init pci_xen_swiotlb_init(void) { } +static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif #endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 33643a8bcbbb..520d2bd0b9c5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp) } EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + seen |= SEEN_XREG; + EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is start address of target + * (addrs[i] - 6) is the address following this jmp + * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) + */ + EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - + (addrs[i] - 6)); + } else { + EMIT_COND_JMP(X86_JNE, 2 + 5); + CLEAR_A(); + EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ + } + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT2(0xf7, 0xf3); /* div %ebx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ + EMIT2(0xf7, 0xf1); /* div %ecx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ EMIT(K, 4); @@ -310,9 +335,18 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT1_off32(0x0d, K); /* or imm32,%eax */ break; case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ + case BPF_S_ALU_XOR_X: seen |= SEEN_XREG; EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ break; + case BPF_S_ALU_XOR_K: /* A ^= K; */ + if (K == 0) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */ + else + EMIT1_off32(0x35, K); /* xor imm32,%eax */ + break; case BPF_S_ALU_LSH_X: /* A <<= X; */ seen |= SEEN_XREG; EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index ec57bd3818a4..7005ced5d1ad 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -6,8 +6,9 @@ #include <xen/xen.h> #include <xen/interface/physdev.h> +#include "xen-ops.h" -unsigned int xen_io_apic_read(unsigned apic, unsigned reg) +static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { struct physdev_apic apic_op; int ret; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1fbe75a95f15..2d932c351f91 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -80,6 +80,8 @@ #include "smp.h" #include "multicalls.h" +#include <xen/events.h> + EXPORT_SYMBOL_GPL(hypercall_page); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -1288,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; int rc; - pgd_t *pgd; if (!xen_start_info) return; @@ -1380,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void) acpi_numa = -1; #endif - pgd = (pgd_t *)xen_start_info->pt_base; - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1390,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); @@ -1441,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void) const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + xen_start_info->console.dom0.info_off); + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .interface_version = XENPF_INTERFACE_VERSION, + .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, + }; xen_init_vga(info, xen_start_info->console.dom0.info_size); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; + if (HYPERVISOR_dom0_op(&op) == 0) + boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; + xen_init_apic(); /* Make sure ACS will be enabled */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7a769b7526cb..5a16824cc2b3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -84,6 +84,7 @@ */ DEFINE_SPINLOCK(xen_reservation_lock); +#ifdef CONFIG_X86_32 /* * Identity map, in addition to plain kernel map. This needs to be * large enough to allocate page table pages to allocate the rest. @@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); */ #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - +#endif #ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; @@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm) static void xen_post_allocator_init(void); -static void __init xen_pagetable_init(void) -{ - paging_init(); - xen_setup_shared_info(); - xen_post_allocator_init(); -} - static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) { /* reserve the range used */ @@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) } } +#ifdef CONFIG_X86_64 +static void __init xen_cleanhighmap(unsigned long vaddr, + unsigned long vaddr_end) +{ + unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; + pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); + + /* NOTE: The loop is more greedy than the cleanup_highmap variant. + * We include the PMD passed in on _both_ boundaries. */ + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + pmd++, vaddr += PMD_SIZE) { + if (pmd_none(*pmd)) + continue; + if (vaddr < (unsigned long) _text || vaddr > kernel_end) + set_pmd(pmd, __pmd(0)); + } + /* In case we did something silly, we should crash in this function + * instead of somewhere later and be confusing. */ + xen_mc_flush(); +} +#endif +static void __init xen_pagetable_init(void) +{ +#ifdef CONFIG_X86_64 + unsigned long size; + unsigned long addr; +#endif + paging_init(); + xen_setup_shared_info(); +#ifdef CONFIG_X86_64 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long new_mfn_list; + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + /* On 32-bit, we get zero so this never gets executed. */ + new_mfn_list = xen_revector_p2m_tree(); + if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { + /* using __ka address and sticking INVALID_P2M_ENTRY! */ + memset((void *)xen_start_info->mfn_list, 0xff, size); + + /* We should be in __ka space. */ + BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); + addr = xen_start_info->mfn_list; + /* We roundup to the PMD, which means that if anybody at this stage is + * using the __ka address of xen_start_info or xen_start_info->shared_info + * they are in going to crash. Fortunatly we have already revectored + * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ + size = roundup(size, PMD_SIZE); + xen_cleanhighmap(addr, addr + size); + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + memblock_free(__pa(xen_start_info->mfn_list), size); + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = new_mfn_list; + } else + goto skip; + } + /* At this stage, cleanup_highmap has already cleaned __ka space + * from _brk_limit way up to the max_pfn_mapped (which is the end of + * the ramdisk). We continue on, erasing PMD entries that point to page + * tables - do note that they are accessible at this stage via __va. + * For good measure we also round up to the PMD - which means that if + * anybody is using __ka address to the initial boot-stack - and try + * to use it - they are going to crash. The xen_start_info has been + * taken care of already in xen_setup_kernel_pagetable. */ + addr = xen_start_info->pt_base; + size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); + + xen_cleanhighmap(addr, addr + size); + xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); +#ifdef DEBUG + /* This is superflous and is not neccessary, but you know what + * lets do it. The MODULES_VADDR -> MODULES_END should be clear of + * anything at this stage. */ + xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); +#endif +skip: +#endif + xen_post_allocator_init(); +} static void xen_write_cr2(unsigned long cr2) { this_cpu_read(xen_vcpu)->arch.cr2 = cr2; @@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } - +#ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; @@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } - +#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v) for (i = 0; i < PTRS_PER_PTE; i++) pte[i] = xen_make_pte(pte[i].pte); } - +static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, + unsigned long addr) +{ + if (*pt_base == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_base)++; + } + if (*pt_end == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_end)--; + } +} /* * Set up the initial kernel pagetable. * @@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + unsigned long addr[3]; + unsigned long pt_base, pt_end; + unsigned i; /* max_pfn_mapped is the last pfn mapped in the initial memory * mappings. Considering that on Xen after the kernel mappings we @@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, * set max_pfn_mapped to the last real pfn mapped. */ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); + pt_end = pt_base + xen_start_info->nr_pt_frames; + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt + * L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_level4_pgt); + + /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt + * L3_i[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - + addr[0] = (unsigned long)pgd; + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: + * Both L4[272][0] and L4[511][511] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ + copy_page(level2_ident_pgt, l2); + /* Graft it onto L4[511][511] */ + copy_page(level2_kernel_pgt, l2); + + /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); + copy_page(level2_fixmap_pgt, l2); + /* Note that we don't do anything with level1_fixmap_pgt which + * we don't need. */ /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* Switch over */ - pgd = init_level4_pgt; - /* * At this stage there can be no user pgd, and no page * structure to attach it to, so make sure we just set kernel * pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); + __xen_write_cr3(true, __pa(init_level4_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); + /* We can't that easily rip out L3 and L2, as the Xen pagetables are + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for + * the initial domain. For guests using the toolstack, they are in: + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only + * rip out the [L4] (pgd), but for guests we shave off three pages. + */ + for (i = 0; i < ARRAY_SIZE(addr); i++) + check_pt_base(&pt_base, &pt_end, addr[i]); - return pgd; + /* Our (by three pages) smaller Xen pagetable that we are using */ + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); + /* Revector the xen_start_info */ + xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); @@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) */ swapper_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - memcpy(swapper_kernel_pmd, initial_kernel_pmd, - sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(swapper_kernel_pmd, initial_kernel_pmd); swapper_pg_dir[KERNEL_PGD_BOUNDARY] = __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); @@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(initial_kernel_pmd, kernel_pmd); xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + copy_page(initial_page_table, pgd); initial_page_table[KERNEL_PGD_BOUNDARY] = __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); @@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; } #endif /* CONFIG_X86_64 */ @@ -2333,6 +2446,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EINVAL; + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == @@ -2351,8 +2467,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); + if (err < 0) goto out; nr -= batch; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 72213da605f5..95fb2aa5927e 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -22,7 +22,7 @@ * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. + * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * @@ -139,11 +139,11 @@ * / | ~0, ~0, .... | * | \---------------/ * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ + * p2m_mid_missing p2m_missing + * /-----------------\ /------------\ + * | [p2m_missing] +---->| ~0, ~0, ~0 | + * | [p2m_missing] +---->| ..., ~0 | + * \-----------------/ \------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) m2p_override_init(); } +#ifdef CONFIG_X86_64 +#include <linux/bootmem.h> +unsigned long __init xen_revector_p2m_tree(void) +{ + unsigned long va_start; + unsigned long va_end; + unsigned long pfn; + unsigned long pfn_free = 0; + unsigned long *mfn_list = NULL; + unsigned long size; + + va_start = xen_start_info->mfn_list; + /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), + * so make sure it is rounded up to that */ + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + va_end = va_start + size; + + /* If we were revectored already, don't do it again. */ + if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) + return 0; + + mfn_list = alloc_bootmem_align(size, PAGE_SIZE); + if (!mfn_list) { + pr_warn("Could not allocate space for a new P2M tree!\n"); + return xen_start_info->mfn_list; + } + /* Fill it out with INVALID_P2M_ENTRY value */ + memset(mfn_list, 0xFF, size); + + for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx; + unsigned long *mid_p; + + if (!p2m_top[topidx]) + continue; + + if (p2m_top[topidx] == p2m_mid_missing) + continue; + + mididx = p2m_mid_index(pfn); + mid_p = p2m_top[topidx][mididx]; + if (!mid_p) + continue; + if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) + continue; + + if ((unsigned long)mid_p == INVALID_P2M_ENTRY) + continue; + + /* The old va. Rebase it on mfn_list */ + if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { + unsigned long *new; + + if (pfn_free > (size / sizeof(unsigned long))) { + WARN(1, "Only allocated for %ld pages, but we want %ld!\n", + size / sizeof(unsigned long), pfn_free); + return 0; + } + new = &mfn_list[pfn_free]; + + copy_page(new, mid_p); + p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); + + pfn_free += P2M_PER_PAGE; + } + /* This should be the leafs allocated for identity from _brk. */ + } + return (unsigned long)mfn_list; + +} +#else +unsigned long __init xen_revector_p2m_tree(void) +{ + return 0; +} +#endif unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, mididx, idx; @@ -430,7 +508,7 @@ static void free_p2m_page(void *p) free_page((unsigned long)p); } -/* +/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 967633ad98c4..969570491c39 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -8,6 +8,14 @@ #include <xen/xen.h> #include <asm/iommu_table.h> + +#include <asm/xen/swiotlb-xen.h> +#ifdef CONFIG_X86_64 +#include <asm/iommu.h> +#include <asm/dma.h> +#endif +#include <linux/export.h> + int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { @@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { int __init pci_xen_swiotlb_detect(void) { + if (!xen_pv_domain()) + return 0; + /* If running as PV guest, either iommu=soft, or swiotlb=force will * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force) && - (xen_pv_domain())) + if ((xen_initial_domain() || swiotlb || swiotlb_force)) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. * Don't worry about swiotlb_force flag activating the native, as * the 'swiotlb' flag is the only one turning it on. */ - if (xen_pv_domain()) - swiotlb = 0; + swiotlb = 0; +#ifdef CONFIG_X86_64 + /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 + * (so no iommu=X command line over-writes). + * Considering that PV guests do not want the *native SWIOTLB* but + * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. + */ + if (max_pfn > MAX_DMA32_PFN) + no_iommu = 1; +#endif return xen_swiotlb; } void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { - xen_swiotlb_init(1); + xen_swiotlb_init(1, true /* early */); dma_ops = &xen_swiotlb_dma_ops; /* Make sure ACS will be enabled */ pci_request_acs(); } } + +int pci_xen_swiotlb_init_late(void) +{ + int rc; + + if (xen_swiotlb) + return 0; + + rc = xen_swiotlb_init(1, false /* late */); + if (rc) + return rc; + + dma_ops = &xen_swiotlb_dma_ops; + /* Make sure ACS will be enabled */ + pci_request_acs(); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); + IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, + NULL, pci_xen_swiotlb_init, - 0); + NULL); diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index ffcf2615640b..0a7852483ffe 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <xen/platform_pci.h> +#include "xen-ops.h" #define XEN_PLATFORM_ERR_MAGIC -1 #define XEN_PLATFORM_ERR_PROTOCOL -2 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e2d62d697b5d..8971a26d21ab 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -432,6 +432,24 @@ char * __init xen_memory_setup(void) * - mfn_list * - xen_start_info * See comment above "struct start_info" in <xen/interface/xen.h> + * We tried to make the the memblock_reserve more selective so + * that it would be clear what region is reserved. Sadly we ran + * in the problem wherein on a 64-bit hypervisor with a 32-bit + * initial domain, the pt_base has the cr3 value which is not + * neccessarily where the pagetable starts! As Jan put it: " + * Actually, the adjustment turns out to be correct: The page + * tables for a 32-on-64 dom0 get allocated in the order "first L1", + * "first L2", "first L3", so the offset to the page table base is + * indeed 2. When reading xen/include/public/xen.h's comment + * very strictly, this is not a violation (since there nothing is said + * that the first thing in the page table space is pointed to by + * pt_base; I admit that this seems to be implied though, namely + * do I think that it is implied that the page table space is the + * range [pt_base, pt_base + nt_pt_frames), whereas that + * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), + * which - without a priori knowledge - the kernel would have + * difficulty to figure out)." - so lets just fall back to the + * easy way and reserve the whole region. */ memblock_reserve(__pa(xen_start_info->mfn_list), xen_start_info->pt_base - xen_start_info->mfn_list); diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 1cd7f4d11e29..6722e3733f02 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) info->u.text_mode_3.font_height; break; + case XEN_VGATYPE_EFI_LFB: case XEN_VGATYPE_VESA_LFB: if (size < offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps)) @@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->blue_pos = info->u.vesa_lfb.blue_pos; screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + + if (info->video_type == XEN_VGATYPE_EFI_LFB) { + screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; + break; + } + if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps) + sizeof(info->u.vesa_lfb.gbl_caps)) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291c9259..7faed5869e5b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,61 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE + .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE +#define NEXT_HYPERCALL(x) \ + ENTRY(xen_hypercall_##x) \ + .skip 32 + +NEXT_HYPERCALL(set_trap_table) +NEXT_HYPERCALL(mmu_update) +NEXT_HYPERCALL(set_gdt) +NEXT_HYPERCALL(stack_switch) +NEXT_HYPERCALL(set_callbacks) +NEXT_HYPERCALL(fpu_taskswitch) +NEXT_HYPERCALL(sched_op_compat) +NEXT_HYPERCALL(platform_op) +NEXT_HYPERCALL(set_debugreg) +NEXT_HYPERCALL(get_debugreg) +NEXT_HYPERCALL(update_descriptor) +NEXT_HYPERCALL(ni) +NEXT_HYPERCALL(memory_op) +NEXT_HYPERCALL(multicall) +NEXT_HYPERCALL(update_va_mapping) +NEXT_HYPERCALL(set_timer_op) +NEXT_HYPERCALL(event_channel_op_compat) +NEXT_HYPERCALL(xen_version) +NEXT_HYPERCALL(console_io) +NEXT_HYPERCALL(physdev_op_compat) +NEXT_HYPERCALL(grant_table_op) +NEXT_HYPERCALL(vm_assist) +NEXT_HYPERCALL(update_va_mapping_otherdomain) +NEXT_HYPERCALL(iret) +NEXT_HYPERCALL(vcpu_op) +NEXT_HYPERCALL(set_segment_base) +NEXT_HYPERCALL(mmuext_op) +NEXT_HYPERCALL(xsm_op) +NEXT_HYPERCALL(nmi_op) +NEXT_HYPERCALL(sched_op) +NEXT_HYPERCALL(callback_op) +NEXT_HYPERCALL(xenoprof_op) +NEXT_HYPERCALL(event_channel_op) +NEXT_HYPERCALL(physdev_op) +NEXT_HYPERCALL(hvm_op) +NEXT_HYPERCALL(sysctl) +NEXT_HYPERCALL(domctl) +NEXT_HYPERCALL(kexec_op) +NEXT_HYPERCALL(tmem_op) /* 38 */ +ENTRY(xen_hypercall_rsvr) + .skip 320 +NEXT_HYPERCALL(mca) /* 48 */ +NEXT_HYPERCALL(arch_1) +NEXT_HYPERCALL(arch_2) +NEXT_HYPERCALL(arch_3) +NEXT_HYPERCALL(arch_4) +NEXT_HYPERCALL(arch_5) +NEXT_HYPERCALL(arch_6) + .balign PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..bb5a8105ea86 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; @@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); +unsigned long __init xen_revector_p2m_tree(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); |