diff options
Diffstat (limited to 'arch')
971 files changed, 20603 insertions, 15069 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index a4429bcd609e..8d2ae24b9f4a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -365,6 +365,9 @@ config HAVE_IRQ_TIME_ACCOUNTING config HAVE_ARCH_TRANSPARENT_HUGEPAGE bool +config HAVE_ARCH_SOFT_DIRTY + bool + config HAVE_MOD_ARCH_SPECIFIC bool help diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h index c5b5d6bac9ed..14ce27bccd24 100644 --- a/arch/alpha/include/asm/mmzone.h +++ b/arch/alpha/include/asm/mmzone.h @@ -71,8 +71,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) - #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32)) #define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> 32)) #define pte_pfn(pte) (pte_val(pte) >> 32) diff --git a/arch/alpha/include/uapi/asm/fcntl.h b/arch/alpha/include/uapi/asm/fcntl.h index 6d9e805f18a7..dfdadb0b4bef 100644 --- a/arch/alpha/include/uapi/asm/fcntl.h +++ b/arch/alpha/include/uapi/asm/fcntl.h @@ -32,6 +32,7 @@ #define O_SYNC (__O_SYNC|O_DSYNC) #define O_PATH 040000000 +#define O_TMPFILE 0100000000 #define F_GETLK 7 #define F_SETLK 8 diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index eee6ea76bdaf..4885825e498d 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -81,4 +81,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 1d4aabfcf9a1..837c0fa58317 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -238,8 +238,8 @@ nautilus_init_pci(void) if (pci_mem < memtop) memtop = pci_mem; if (memtop > alpha_mv.min_mem_address) { - free_reserved_area((unsigned long)__va(alpha_mv.min_mem_address), - (unsigned long)__va(memtop), 0, NULL); + free_reserved_area(__va(alpha_mv.min_mem_address), + __va(memtop), -1, NULL); printk("nautilus_init_pci: %ldk freed\n", (memtop - alpha_mv.min_mem_address) >> 10); } diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 0ba85ee4a466..a1bea91df56a 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -276,56 +276,25 @@ srm_paging_stop (void) } #endif -#ifndef CONFIG_DISCONTIGMEM -static void __init -printk_memory_info(void) -{ - unsigned long codesize, reservedpages, datasize, initsize, tmp; - extern int page_is_ram(unsigned long) __init; - - /* printk all informations */ - reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - /* - * Only count reserved RAM pages - */ - if (page_is_ram(tmp) && PageReserved(mem_map+tmp)) - reservedpages++; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_data; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); -} - void __init mem_init(void) { - max_mapnr = num_physpages = max_low_pfn; - totalram_pages += free_all_bootmem(); + set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - - printk_memory_info(); + free_all_bootmem(); + mem_init_print_info(NULL); } -#endif /* CONFIG_DISCONTIGMEM */ void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 33885048fa36..d543d71c28b4 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -129,8 +129,6 @@ setup_memory_node(int nid, void *kernel_end) if (node_max_pfn > max_low_pfn) max_pfn = max_low_pfn = node_max_pfn; - num_physpages += node_max_pfn - node_min_pfn; - #if 0 /* we'll try this one again in a little while */ /* Cute trick to make sure our local node data is on local memory */ node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT)); @@ -321,41 +319,3 @@ void __init paging_init(void) /* Initialize the kernel's ZERO_PGE. */ memset((void *)ZERO_PGE, 0, PAGE_SIZE); } - -void __init mem_init(void) -{ - unsigned long codesize, reservedpages, datasize, initsize, pfn; - extern int page_is_ram(unsigned long) __init; - unsigned long nid, i; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - - reservedpages = 0; - for_each_online_node(nid) { - /* - * This will free up the bootmem, ie, slot 0 memory - */ - totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); - - pfn = NODE_DATA(nid)->node_start_pfn; - for (i = 0; i < node_spanned_pages(nid); i++, pfn++) - if (page_is_ram(pfn) && - PageReserved(nid_page_nr(nid, i))) - reservedpages++; - } - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_data; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, " - "%luk data, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); -#if 0 - mem_stress(); -#endif -} diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5917099470ea..68fcbb2d59e2 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -34,6 +34,7 @@ config ARC select OF select OF_EARLY_FLATTREE select PERF_USE_VMALLOC + select HAVE_DEBUG_STACKOVERFLOW config SCHED_OMIT_FRAME_POINTER def_bool y @@ -184,6 +185,7 @@ config ARC_CACHE_PAGES config ARC_CACHE_VIPT_ALIASING bool "Support VIPT Aliasing D$" + depends on ARC_HAS_DCACHE default n endif #ARC_CACHE @@ -361,13 +363,6 @@ config ARC_MISALIGN_ACCESS Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide potential bugs in code -config ARC_STACK_NONEXEC - bool "Make stack non-executable" - default n - help - To disable the execute permissions of stack/heap of processes - which are enabled by default. - config HZ int "Timer Frequency" default 100 diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug index 962c6099659e..a7fc0da25650 100644 --- a/arch/arc/Kconfig.debug +++ b/arch/arc/Kconfig.debug @@ -15,13 +15,6 @@ config EARLY_PRINTK with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit. - config 16KSTACKS bool "Use 16Kb for kernel stacks instead of 8Kb" help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 183397fd289e..8c0b1aa56f7e 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -9,25 +9,27 @@ UTS_MACHINE := arc ifeq ($(CROSS_COMPILE),) -CROSS_COMPILE := arc-elf32- +CROSS_COMPILE := arc-linux-uclibc- endif KBUILD_DEFCONFIG := fpga_defconfig cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__ -LINUXINCLUDE += -include ${src}/arch/arc/include/asm/defines.h - ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using # any kernel headers, and missing the r25 global register -# Can't do unconditionally (like above) because of recursive include issues +# Can't do unconditionally because of recursive include issues # due to <linux/thread_info.h> LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h endif -atleast_gcc44 := $(call cc-ifversion, -gt, 0402, y) +upto_gcc42 := $(call cc-ifversion, -le, 0402, y) +upto_gcc44 := $(call cc-ifversion, -le, 0404, y) +atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) +atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y) + cflags-$(atleast_gcc44) += -fsection-anchors cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock @@ -35,6 +37,11 @@ cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape cflags-$(CONFIG_ARC_HAS_RTSC) += -mrtsc cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables +# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok +ifeq ($(atleast_gcc48),y) +cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 +endif + ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 cflags-y += -O3 @@ -48,11 +55,10 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -# STAR 9000518362: +# STAR 9000518362: (fixed with binutils shipping with gcc 4.8) # arc-linux-uclibc-ld (buildroot) or arceb-elf32-ld (EZChip) don't accept -# --build-id w/o "-marclinux". -# Default arc-elf32-ld is OK -ldflags-y += -marclinux +# --build-id w/o "-marclinux". Default arc-elf32-ld is OK +ldflags-$(upto_gcc44) += -marclinux ARC_LIBGCC := -mA7 cflags-$(CONFIG_ARC_HAS_HW_MPY) += -multcost=16 @@ -66,8 +72,8 @@ ifndef CONFIG_ARC_HAS_HW_MPY # With gcc 4.4.7, -mno-mpy is enough to make any other related adjustments, # e.g. increased cost of MPY. With gcc 4.2.1 this had to be explicitly hinted - ARC_LIBGCC := -marc600 - ifneq ($(atleast_gcc44),y) + ifeq ($(upto_gcc42),y) + ARC_LIBGCC := -marc600 cflags-y += -multcost=30 endif endif diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi index 941ad118a7e7..d9f8249aa66e 100644 --- a/arch/arc/boot/dts/abilis_tb100.dtsi +++ b/arch/arc/boot/dts/abilis_tb100.dtsi @@ -21,10 +21,6 @@ /include/ "abilis_tb10x.dtsi" -/* interrupt specifiers - * -------------------- - * 0: rising, 1: low, 2: high, 3: falling, - */ / { clock-frequency = <500000000>; /* 500 MHZ */ @@ -173,7 +169,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF140000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -185,7 +181,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF141000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -197,7 +193,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF142000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -209,7 +205,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF143000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -221,7 +217,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF144000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -233,7 +229,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF145000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -245,7 +241,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF146000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -257,7 +253,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF147000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -269,7 +265,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF148000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -281,7 +277,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF149000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -293,7 +289,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14A000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -305,7 +301,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14B000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -317,7 +313,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14C000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -329,7 +325,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14D000 0x1000>; gpio-controller; #gpio-cells = <1>; diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts index 0fa0d4abe795..ebc313a9f5b2 100644 --- a/arch/arc/boot/dts/abilis_tb100_dvk.dts +++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts @@ -45,19 +45,19 @@ }; i2c0: i2c@FF120000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c1: i2c@FF121000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c2: i2c@FF122000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c3: i2c@FF123000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c4: i2c@FF124000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; leds { diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi index fd25c212049f..da8ca7941e67 100644 --- a/arch/arc/boot/dts/abilis_tb101.dtsi +++ b/arch/arc/boot/dts/abilis_tb101.dtsi @@ -21,10 +21,6 @@ /include/ "abilis_tb10x.dtsi" -/* interrupt specifiers - * -------------------- - * 0: rising, 1: low, 2: high, 3: falling, - */ / { clock-frequency = <500000000>; /* 500 MHZ */ @@ -182,7 +178,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF140000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -194,7 +190,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF141000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -206,7 +202,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF142000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -218,7 +214,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF143000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -230,7 +226,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF144000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -242,7 +238,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF145000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -254,7 +250,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF146000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -266,7 +262,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF147000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -278,7 +274,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF148000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -290,7 +286,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF149000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -302,7 +298,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14A000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -314,7 +310,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14B000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -326,7 +322,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14C000 0x1000>; gpio-controller; #gpio-cells = <1>; @@ -338,7 +334,7 @@ interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; - interrupts = <27 1>; + interrupts = <27 2>; reg = <0xFF14D000 0x1000>; gpio-controller; #gpio-cells = <1>; diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts index a4d80ce283ae..b204657993aa 100644 --- a/arch/arc/boot/dts/abilis_tb101_dvk.dts +++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts @@ -45,19 +45,19 @@ }; i2c0: i2c@FF120000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c1: i2c@FF121000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c2: i2c@FF122000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c3: i2c@FF123000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; i2c4: i2c@FF124000 { - sda-hold-time = <432>; + i2c-sda-hold-time-ns = <432>; }; leds { diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi index b97e3051ba4b..edf56f4749e1 100644 --- a/arch/arc/boot/dts/abilis_tb10x.dtsi +++ b/arch/arc/boot/dts/abilis_tb10x.dtsi @@ -19,10 +19,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* interrupt specifiers - * -------------------- - * 0: rising, 1: low, 2: high, 3: falling, - */ / { compatible = "abilis,arc-tb10x"; @@ -78,7 +74,7 @@ #interrupt-cells = <1>; }; tb10x_ictl: pic@fe002000 { - compatible = "abilis,tb10x_ictl"; + compatible = "abilis,tb10x-ictl"; reg = <0xFE002000 0x20>; interrupt-controller; #interrupt-cells = <2>; @@ -91,7 +87,7 @@ compatible = "snps,dw-apb-uart"; reg = <0xFF100000 0x100>; clock-frequency = <166666666>; - interrupts = <25 1>; + interrupts = <25 8>; reg-shift = <2>; reg-io-width = <4>; interrupt-parent = <&tb10x_ictl>; @@ -100,7 +96,7 @@ compatible = "snps,dwmac-3.70a","snps,dwmac"; reg = <0xFE100000 0x1058>; interrupt-parent = <&tb10x_ictl>; - interrupts = <6 1>; + interrupts = <6 8>; interrupt-names = "macirq"; clocks = <&ahb_clk>; clock-names = "stmmaceth"; @@ -109,7 +105,7 @@ compatible = "snps,dma-spear1340"; reg = <0xFE000000 0x400>; interrupt-parent = <&tb10x_ictl>; - interrupts = <14 1>; + interrupts = <14 8>; dma-channels = <6>; dma-requests = <0>; dma-masters = <1>; @@ -128,7 +124,7 @@ compatible = "snps,designware-i2c"; reg = <0xFF120000 0x1000>; interrupt-parent = <&tb10x_ictl>; - interrupts = <12 1>; + interrupts = <12 8>; clocks = <&ahb_clk>; }; i2c1: i2c@FF121000 { @@ -137,7 +133,7 @@ compatible = "snps,designware-i2c"; reg = <0xFF121000 0x1000>; interrupt-parent = <&tb10x_ictl>; - interrupts = <12 1>; + interrupts = <12 8>; clocks = <&ahb_clk>; }; i2c2: i2c@FF122000 { @@ -146,7 +142,7 @@ compatible = "snps,designware-i2c"; reg = <0xFF122000 0x1000>; interrupt-parent = <&tb10x_ictl>; - interrupts = <12 1>; + interrupts = <12 8>; clocks = <&ahb_clk>; }; i2c3: i2c@FF123000 { @@ -155,7 +151,7 @@ compatible = "snps,designware-i2c"; reg = <0xFF123000 0x1000>; interrupt-parent = <&tb10x_ictl>; - interrupts = <12 1>; + interrupts = <12 8>; clocks = <&ahb_clk>; }; i2c4: i2c@FF124000 { @@ -164,7 +160,7 @@ compatible = "snps,designware-i2c"; reg = <0xFF124000 0x1000>; interrupt-parent = <&tb10x_ictl>; - interrupts = <12 1>; + interrupts = <12 8>; clocks = <&ahb_clk>; }; @@ -176,7 +172,7 @@ num-cs = <1>; reg = <0xFE010000 0x20>; interrupt-parent = <&tb10x_ictl>; - interrupts = <26 1>; + interrupts = <26 8>; clocks = <&ahb_clk>; }; spi1: spi@0xFE011000 { @@ -187,7 +183,7 @@ num-cs = <2>; reg = <0xFE011000 0x20>; interrupt-parent = <&tb10x_ictl>; - interrupts = <10 1>; + interrupts = <10 8>; clocks = <&ahb_clk>; }; @@ -195,7 +191,7 @@ compatible = "abilis,tb100-tsm"; reg = <0xff316000 0x400>; interrupt-parent = <&tb10x_ictl>; - interrupts = <17 1>; + interrupts = <17 8>; output-clkdiv = <4>; global-packet-delay = <0x21>; port-packet-delay = <0>; @@ -213,7 +209,7 @@ "cpuctrl", "a6it_int_force"; interrupt-parent = <&tb10x_ictl>; - interrupts = <20 1>, <19 1>; + interrupts = <20 2>, <19 2>; interrupt-names = "cmd_irq", "event_irq"; }; tb10x_mdsc0: tb10x-mdscr@FF300000 { @@ -239,7 +235,7 @@ compatible = "abilis,tb100-wfb"; reg = <0xff319000 0x1000>; interrupt-parent = <&tb10x_ictl>; - interrupts = <16 1>; + interrupts = <16 8>; }; }; }; diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts index bae4f936cb03..4fb2d6f655bd 100644 --- a/arch/arc/boot/dts/angel4.dts +++ b/arch/arc/boot/dts/angel4.dts @@ -51,5 +51,21 @@ current-speed = <115200>; status = "okay"; }; + + ethernet@c0fc2000 { + compatible = "snps,arc-emac"; + reg = <0xc0fc2000 0x3c>; + interrupts = <6>; + mac-address = [ 00 11 22 33 44 55 ]; + clock-frequency = <80000000>; + max-speed = <100>; + phy = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <1>; + }; + }; }; }; diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig index 95350be6ef6f..4ca50f1f8d05 100644 --- a/arch/arc/configs/fpga_defconfig +++ b/arch/arc/configs/fpga_defconfig @@ -1,4 +1,4 @@ -CONFIG_CROSS_COMPILE="arc-elf32-" +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set @@ -38,6 +38,9 @@ CONFIG_INET=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set +CONFIG_NETDEVICES=y +CONFIG_ARC_EMAC=y +CONFIG_LXT_PHY=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 446c96c24eff..451af30914f6 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -1,4 +1,4 @@ -CONFIG_CROSS_COMPILE="arc-elf32-" +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 4fa5cd9f2202..6be6492442d6 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -1,4 +1,4 @@ -CONFIG_CROSS_COMPILE="arc-elf32-" +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="tb10x" CONFIG_SYSVIPC=y diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 1b907c465666..355cb470c2a4 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -20,7 +20,6 @@ #define ARC_REG_PERIBASE_BCR 0x69 #define ARC_REG_FP_BCR 0x6B /* Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* Dbl Precision FPU */ -#define ARC_REG_MMU_BCR 0x6f #define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ #define ARC_REG_TIMERS_BCR 0x75 #define ARC_REG_ICCM_BCR 0x78 @@ -34,22 +33,12 @@ #define ARC_REG_D_UNCACH_BCR 0x6A /* status32 Bits Positions */ -#define STATUS_H_BIT 0 /* CPU Halted */ -#define STATUS_E1_BIT 1 /* Int 1 enable */ -#define STATUS_E2_BIT 2 /* Int 2 enable */ -#define STATUS_A1_BIT 3 /* Int 1 active */ -#define STATUS_A2_BIT 4 /* Int 2 active */ #define STATUS_AE_BIT 5 /* Exception active */ #define STATUS_DE_BIT 6 /* PC is in delay slot */ #define STATUS_U_BIT 7 /* User/Kernel mode */ #define STATUS_L_BIT 12 /* Loop inhibit */ /* These masks correspond to the status word(STATUS_32) bits */ -#define STATUS_H_MASK (1<<STATUS_H_BIT) -#define STATUS_E1_MASK (1<<STATUS_E1_BIT) -#define STATUS_E2_MASK (1<<STATUS_E2_BIT) -#define STATUS_A1_MASK (1<<STATUS_A1_BIT) -#define STATUS_A2_MASK (1<<STATUS_A2_BIT) #define STATUS_AE_MASK (1<<STATUS_AE_BIT) #define STATUS_DE_MASK (1<<STATUS_DE_BIT) #define STATUS_U_MASK (1<<STATUS_U_BIT) @@ -71,6 +60,7 @@ #define ECR_V_ITLB_MISS 0x21 #define ECR_V_DTLB_MISS 0x22 #define ECR_V_PROTV 0x23 +#define ECR_V_TRAP 0x25 /* Protection Violation Exception Cause Code Values */ #define ECR_C_PROTV_INST_FETCH 0x00 @@ -79,94 +69,23 @@ #define ECR_C_PROTV_XCHG 0x03 #define ECR_C_PROTV_MISALIG_DATA 0x04 +#define ECR_C_BIT_PROTV_MISALIG_DATA 10 + +/* Machine Check Cause Code Values */ +#define ECR_C_MCHK_DUP_TLB 0x01 + /* DTLB Miss Exception Cause Code Values */ #define ECR_C_BIT_DTLB_LD_MISS 8 #define ECR_C_BIT_DTLB_ST_MISS 9 +/* Dummy ECR values for Interrupts */ +#define event_IRQ1 0x0031abcd +#define event_IRQ2 0x0032abcd /* Auxiliary registers */ #define AUX_IDENTITY 4 #define AUX_INTR_VEC_BASE 0x25 -#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */ -#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */ -#define AUX_IRQ_LV12 0x43 /* interrupt level register */ - -#define AUX_IENABLE 0x40c -#define AUX_ITRIGGER 0x40d -#define AUX_IPULSE 0x415 - -/* Timer related Aux registers */ -#define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */ -#define ARC_REG_TIMER0_CTRL 0x22 /* timer 0 control */ -#define ARC_REG_TIMER0_CNT 0x21 /* timer 0 count */ -#define ARC_REG_TIMER1_LIMIT 0x102 /* timer 1 limit */ -#define ARC_REG_TIMER1_CTRL 0x101 /* timer 1 control */ -#define ARC_REG_TIMER1_CNT 0x100 /* timer 1 count */ - -#define TIMER_CTRL_IE (1 << 0) /* Interupt when Count reachs limit */ -#define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */ - -/* MMU Management regs */ -#define ARC_REG_TLBPD0 0x405 -#define ARC_REG_TLBPD1 0x406 -#define ARC_REG_TLBINDEX 0x407 -#define ARC_REG_TLBCOMMAND 0x408 -#define ARC_REG_PID 0x409 -#define ARC_REG_SCRATCH_DATA0 0x418 - -/* Bits in MMU PID register */ -#define MMU_ENABLE (1 << 31) /* Enable MMU for process */ - -/* Error code if probe fails */ -#define TLB_LKUP_ERR 0x80000000 - -/* TLB Commands */ -#define TLBWrite 0x1 -#define TLBRead 0x2 -#define TLBGetIndex 0x3 -#define TLBProbe 0x4 - -#if (CONFIG_ARC_MMU_VER >= 2) -#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ -#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ -#else -#undef TLBWriteNI /* These cmds don't exist on older MMU */ -#undef TLBIVUTLB -#endif -/* Instruction cache related Auxiliary registers */ -#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ -#define ARC_REG_IC_IVIC 0x10 -#define ARC_REG_IC_CTRL 0x11 -#define ARC_REG_IC_IVIL 0x19 -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_IC_PTAG 0x1E -#endif - -/* Bit val in IC_CTRL */ -#define IC_CTRL_CACHE_DISABLE 0x1 - -/* Data cache related Auxiliary registers */ -#define ARC_REG_DC_BCR 0x72 -#define ARC_REG_DC_IVDC 0x47 -#define ARC_REG_DC_CTRL 0x48 -#define ARC_REG_DC_IVDL 0x4A -#define ARC_REG_DC_FLSH 0x4B -#define ARC_REG_DC_FLDL 0x4C -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_DC_PTAG 0x5C -#endif - -/* Bit val in DC_CTRL */ -#define DC_CTRL_INV_MODE_FLUSH 0x40 -#define DC_CTRL_FLUSH_STATUS 0x100 - -/* MMU Management regs */ -#define ARC_REG_PID 0x409 -#define ARC_REG_SCRATCH_DATA0 0x418 - -/* Bits in MMU PID register */ -#define MMU_ENABLE (1 << 31) /* Enable MMU for process */ /* * Floating Pt Registers @@ -293,24 +212,6 @@ struct bcr_identity { #endif }; -struct bcr_mmu_1_2 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; -#else - unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; -#endif -}; - -struct bcr_mmu_3 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, - u_itlb:4, u_dtlb:4; -#else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, - ways:4, ver:8; -#endif -}; - #define EXTN_SWAP_VALID 0x1 #define EXTN_NORM_VALID 0x2 #define EXTN_MINMAX_VALID 0x2 @@ -343,14 +244,6 @@ struct bcr_extn_xymem { #endif }; -struct bcr_cache { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; -#else - unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; -#endif -}; - struct bcr_perip { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int start:8, pad2:8, sz:8, pad:8; @@ -403,7 +296,7 @@ struct cpuinfo_arc_mmu { }; struct cpuinfo_arc_cache { - unsigned int has_aliasing, sz, line_len, assoc, ver; + unsigned int sz, line_len, assoc, ver; }; struct cpuinfo_arc_ccm { diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 2ad8f9b1c54b..5b18e94c6678 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -18,9 +18,8 @@ struct task_struct; void show_regs(struct pt_regs *regs); void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs); void show_kernel_fault_diag(const char *str, struct pt_regs *regs, - unsigned long address, unsigned long cause_reg); -void die(const char *str, struct pt_regs *regs, unsigned long address, - unsigned long cause_reg); + unsigned long address); +void die(const char *str, struct pt_regs *regs, unsigned long address); #define BUG() do { \ dump_stack(); \ diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index d5555fe4742a..5802849a6cae 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -18,21 +18,19 @@ #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define ARC_ICACHE_WAYS 2 -#define ARC_DCACHE_WAYS 4 - -/* Helpers */ +/* For a rare case where customers have differently config I/D */ #define ARC_ICACHE_LINE_LEN L1_CACHE_BYTES #define ARC_DCACHE_LINE_LEN L1_CACHE_BYTES #define ICACHE_LINE_MASK (~(ARC_ICACHE_LINE_LEN - 1)) #define DCACHE_LINE_MASK (~(ARC_DCACHE_LINE_LEN - 1)) -#if ARC_ICACHE_LINE_LEN != ARC_DCACHE_LINE_LEN -#error "Need to fix some code as I/D cache lines not same" -#else -#define is_not_cache_aligned(p) ((unsigned long)p & (~DCACHE_LINE_MASK)) -#endif +/* + * ARC700 doesn't cache any access in top 256M. + * Ideal for wiring memory mapped peripherals as we don't need to do + * explicit uncached accesses (LD.di/ST.di) hence more portable drivers + */ +#define ARC_UNCACHED_ADDR_SPACE 0xc0000000 #ifndef __ASSEMBLY__ @@ -57,16 +55,10 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES -/* - * ARC700 doesn't cache any access in top 256M. - * Ideal for wiring memory mapped peripherals as we don't need to do - * explicit uncached accesses (LD.di/ST.di) hence more portable drivers - */ -#define ARC_UNCACHED_ADDR_SPACE 0xc0000000 - extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void __init read_decode_cache_bcr(void); -#endif + +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index ef62682e8d95..6abc4972bc93 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -81,16 +81,19 @@ void flush_anon_page(struct vm_area_struct *vma, #endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */ /* + * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default + * This works around some PIO based drivers which don't call flush_dcache_page + * to record that they dirtied the dcache + */ +#define PG_dc_clean PG_arch_1 + +/* * Simple wrapper over config option * Bootup code ensures that hardware matches kernel configuration */ static inline int cache_is_vipt_aliasing(void) { -#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING - return 1; -#else - return 0; -#endif + return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); } #define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & 1) diff --git a/arch/arc/include/asm/defines.h b/arch/arc/include/asm/defines.h deleted file mode 100644 index 6097bb439cc5..000000000000 --- a/arch/arc/include/asm/defines.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __ARC_ASM_DEFINES_H__ -#define __ARC_ASM_DEFINES_H__ - -#if defined(CONFIG_ARC_MMU_V1) -#define CONFIG_ARC_MMU_VER 1 -#elif defined(CONFIG_ARC_MMU_V2) -#define CONFIG_ARC_MMU_VER 2 -#elif defined(CONFIG_ARC_MMU_V3) -#define CONFIG_ARC_MMU_VER 3 -#endif - -#ifdef CONFIG_ARC_HAS_LLSC -#define __CONFIG_ARC_HAS_LLSC_VAL 1 -#else -#define __CONFIG_ARC_HAS_LLSC_VAL 0 -#endif - -#ifdef CONFIG_ARC_HAS_SWAPE -#define __CONFIG_ARC_HAS_SWAPE_VAL 1 -#else -#define __CONFIG_ARC_HAS_SWAPE_VAL 0 -#endif - -#ifdef CONFIG_ARC_HAS_RTSC -#define __CONFIG_ARC_HAS_RTSC_VAL 1 -#else -#define __CONFIG_ARC_HAS_RTSC_VAL 0 -#endif - -#ifdef CONFIG_ARC_MMU_SASID -#define __CONFIG_ARC_MMU_SASID_VAL 1 -#else -#define __CONFIG_ARC_MMU_SASID_VAL 0 -#endif - -#ifdef CONFIG_ARC_HAS_ICACHE -#define __CONFIG_ARC_HAS_ICACHE 1 -#else -#define __CONFIG_ARC_HAS_ICACHE 0 -#endif - -#ifdef CONFIG_ARC_HAS_DCACHE -#define __CONFIG_ARC_HAS_DCACHE 1 -#else -#define __CONFIG_ARC_HAS_DCACHE 0 -#endif - -#endif /* __ARC_ASM_DEFINES_H__ */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index eb2ae53187d9..8943c028d4bb 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -50,194 +50,177 @@ * Eff Addr for load = [reg2] */ +.macro PUSH reg + st.a \reg, [sp, -4] +.endm + +.macro PUSHAX aux + lr r9, [\aux] + PUSH r9 +.endm + +.macro POP reg + ld.ab \reg, [sp, 4] +.endm + +.macro POPAX aux + POP r9 + sr r9, [\aux] +.endm + /*-------------------------------------------------------------- - * Save caller saved registers (scratch registers) ( r0 - r12 ) - * Registers are pushed / popped in the order defined in struct ptregs - * in asm/ptrace.h + * Helpers to save/restore Scratch Regs: + * used by Interrupt/Exception Prologue/Epilogue *-------------------------------------------------------------*/ -.macro SAVE_CALLER_SAVED - st.a r0, [sp, -4] - st.a r1, [sp, -4] - st.a r2, [sp, -4] - st.a r3, [sp, -4] - st.a r4, [sp, -4] - st.a r5, [sp, -4] - st.a r6, [sp, -4] - st.a r7, [sp, -4] - st.a r8, [sp, -4] - st.a r9, [sp, -4] - st.a r10, [sp, -4] - st.a r11, [sp, -4] - st.a r12, [sp, -4] +.macro SAVE_R0_TO_R12 + PUSH r0 + PUSH r1 + PUSH r2 + PUSH r3 + PUSH r4 + PUSH r5 + PUSH r6 + PUSH r7 + PUSH r8 + PUSH r9 + PUSH r10 + PUSH r11 + PUSH r12 +.endm + +.macro RESTORE_R12_TO_R0 + POP r12 + POP r11 + POP r10 + POP r9 + POP r8 + POP r7 + POP r6 + POP r5 + POP r4 + POP r3 + POP r2 + POP r1 + POP r0 + +#ifdef CONFIG_ARC_CURR_IN_REG + ld r25, [sp, 12] +#endif .endm /*-------------------------------------------------------------- - * Restore caller saved registers (scratch registers) + * Helpers to save/restore callee-saved regs: + * used by several macros below *-------------------------------------------------------------*/ -.macro RESTORE_CALLER_SAVED - ld.ab r12, [sp, 4] - ld.ab r11, [sp, 4] - ld.ab r10, [sp, 4] - ld.ab r9, [sp, 4] - ld.ab r8, [sp, 4] - ld.ab r7, [sp, 4] - ld.ab r6, [sp, 4] - ld.ab r5, [sp, 4] - ld.ab r4, [sp, 4] - ld.ab r3, [sp, 4] - ld.ab r2, [sp, 4] - ld.ab r1, [sp, 4] - ld.ab r0, [sp, 4] +.macro SAVE_R13_TO_R24 + PUSH r13 + PUSH r14 + PUSH r15 + PUSH r16 + PUSH r17 + PUSH r18 + PUSH r19 + PUSH r20 + PUSH r21 + PUSH r22 + PUSH r23 + PUSH r24 +.endm + +.macro RESTORE_R24_TO_R13 + POP r24 + POP r23 + POP r22 + POP r21 + POP r20 + POP r19 + POP r18 + POP r17 + POP r16 + POP r15 + POP r14 + POP r13 .endm +#define OFF_USER_R25_FROM_R24 (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4 /*-------------------------------------------------------------- - * Save callee saved registers (non scratch registers) ( r13 - r25 ) - * on kernel stack. - * User mode callee regs need to be saved in case of - * -fork and friends for replicating from parent to child - * -before going into do_signal( ) for ptrace/core-dump - * Special case handling is required for r25 in case it is used by kernel - * for caching task ptr. Low level exception/ISR save user mode r25 - * into task->thread.user_r25. So it needs to be retrieved from there and - * saved into kernel stack with rest of callee reg-file + * Collect User Mode callee regs as struct callee_regs - needed by + * fork/do_signal/unaligned-access-emulation. + * (By default only scratch regs are saved on entry to kernel) + * + * Special handling for r25 if used for caching Task Pointer. + * It would have been saved in task->thread.user_r25 already, but to keep + * the interface same it is copied into regular r25 placeholder in + * struct callee_regs. *-------------------------------------------------------------*/ .macro SAVE_CALLEE_SAVED_USER - st.a r13, [sp, -4] - st.a r14, [sp, -4] - st.a r15, [sp, -4] - st.a r16, [sp, -4] - st.a r17, [sp, -4] - st.a r18, [sp, -4] - st.a r19, [sp, -4] - st.a r20, [sp, -4] - st.a r21, [sp, -4] - st.a r22, [sp, -4] - st.a r23, [sp, -4] - st.a r24, [sp, -4] + + SAVE_R13_TO_R24 #ifdef CONFIG_ARC_CURR_IN_REG ; Retrieve orig r25 and save it on stack - ld r12, [r25, TASK_THREAD + THREAD_USER_R25] + ld.as r12, [sp, OFF_USER_R25_FROM_R24] st.a r12, [sp, -4] #else - st.a r25, [sp, -4] + PUSH r25 #endif - /* move up by 1 word to "create" callee_regs->"stack_place_holder" */ - sub sp, sp, 4 .endm /*-------------------------------------------------------------- - * Save callee saved registers (non scratch registers) ( r13 - r25 ) - * kernel mode callee regs needed to be saved in case of context switch - * If r25 is used for caching task pointer then that need not be saved - * as it can be re-created from current task global + * Save kernel Mode callee regs at the time of Contect Switch. + * + * Special handling for r25 if used for caching Task Pointer. + * Kernel simply skips saving it since it will be loaded with + * incoming task pointer anyways *-------------------------------------------------------------*/ .macro SAVE_CALLEE_SAVED_KERNEL - st.a r13, [sp, -4] - st.a r14, [sp, -4] - st.a r15, [sp, -4] - st.a r16, [sp, -4] - st.a r17, [sp, -4] - st.a r18, [sp, -4] - st.a r19, [sp, -4] - st.a r20, [sp, -4] - st.a r21, [sp, -4] - st.a r22, [sp, -4] - st.a r23, [sp, -4] - st.a r24, [sp, -4] + + SAVE_R13_TO_R24 + #ifdef CONFIG_ARC_CURR_IN_REG - sub sp, sp, 8 -#else - st.a r25, [sp, -4] sub sp, sp, 4 +#else + PUSH r25 #endif .endm /*-------------------------------------------------------------- - * RESTORE_CALLEE_SAVED_KERNEL: - * Loads callee (non scratch) Reg File by popping from Kernel mode stack. - * This is reverse of SAVE_CALLEE_SAVED, - * - * NOTE: - * Ideally this shd only be called in switch_to for loading - * switched-IN task's CALLEE Reg File. - * For all other cases RESTORE_CALLEE_SAVED_FAST must be used - * which simply pops the stack w/o touching regs. + * Opposite of SAVE_CALLEE_SAVED_KERNEL *-------------------------------------------------------------*/ .macro RESTORE_CALLEE_SAVED_KERNEL - #ifdef CONFIG_ARC_CURR_IN_REG - add sp, sp, 8 /* skip callee_reg gutter and user r25 placeholder */ + add sp, sp, 4 /* skip usual r25 placeholder */ #else - add sp, sp, 4 /* skip "callee_regs->stack_place_holder" */ - ld.ab r25, [sp, 4] + POP r25 #endif - - ld.ab r24, [sp, 4] - ld.ab r23, [sp, 4] - ld.ab r22, [sp, 4] - ld.ab r21, [sp, 4] - ld.ab r20, [sp, 4] - ld.ab r19, [sp, 4] - ld.ab r18, [sp, 4] - ld.ab r17, [sp, 4] - ld.ab r16, [sp, 4] - ld.ab r15, [sp, 4] - ld.ab r14, [sp, 4] - ld.ab r13, [sp, 4] - + RESTORE_R24_TO_R13 .endm /*-------------------------------------------------------------- - * RESTORE_CALLEE_SAVED_USER: - * This is called after do_signal where tracer might have changed callee regs - * thus we need to restore the reg file. - * Special case handling is required for r25 in case it is used by kernel - * for caching task ptr. Ptrace would have modified on-kernel-stack value of - * r25, which needs to be shoved back into task->thread.user_r25 where from - * Low level exception/ISR return code will retrieve to populate with rest of - * callee reg-file. + * Opposite of SAVE_CALLEE_SAVED_USER + * + * ptrace tracer or unaligned-access fixup might have changed a user mode + * callee reg which is saved back to usual r25 storage location *-------------------------------------------------------------*/ .macro RESTORE_CALLEE_SAVED_USER - add sp, sp, 4 /* skip "callee_regs->stack_place_holder" */ - #ifdef CONFIG_ARC_CURR_IN_REG ld.ab r12, [sp, 4] - st r12, [r25, TASK_THREAD + THREAD_USER_R25] + st.as r12, [sp, OFF_USER_R25_FROM_R24] #else - ld.ab r25, [sp, 4] + POP r25 #endif - - ld.ab r24, [sp, 4] - ld.ab r23, [sp, 4] - ld.ab r22, [sp, 4] - ld.ab r21, [sp, 4] - ld.ab r20, [sp, 4] - ld.ab r19, [sp, 4] - ld.ab r18, [sp, 4] - ld.ab r17, [sp, 4] - ld.ab r16, [sp, 4] - ld.ab r15, [sp, 4] - ld.ab r14, [sp, 4] - ld.ab r13, [sp, 4] + RESTORE_R24_TO_R13 .endm /*-------------------------------------------------------------- * Super FAST Restore callee saved regs by simply re-adjusting SP *-------------------------------------------------------------*/ .macro DISCARD_CALLEE_SAVED_USER - add sp, sp, 14 * 4 -.endm - -/*-------------------------------------------------------------- - * Restore User mode r25 saved in task_struct->thread.user_r25 - *-------------------------------------------------------------*/ -.macro RESTORE_USER_R25 - ld r25, [r25, TASK_THREAD + THREAD_USER_R25] + add sp, sp, SZ_CALLEE_REGS .endm /*------------------------------------------------------------- @@ -252,7 +235,7 @@ ld \out, [\tsk, TASK_THREAD_INFO] /* Go to end of page where stack begins (grows upwards) */ - add2 \out, \out, (THREAD_SIZE - 4)/4 /* one word GUTTER */ + add2 \out, \out, (THREAD_SIZE)/4 .endm @@ -305,33 +288,28 @@ * safe-keeping not really needed, but it keeps the epilogue code * (SP restore) simpler/uniform. */ - b.d 77f - - st.a sp, [sp, -12] ; Make room for orig_r0 and orig_r8 + b.d 66f + mov r9, sp 88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ GET_CURR_TASK_ON_CPU r9 -#ifdef CONFIG_ARC_CURR_IN_REG - - /* If current task pointer cached in r25, time to - * -safekeep USER r25 in task->thread_struct->user_r25 - * -load r25 with current task ptr - */ - st.as r25, [r9, (TASK_THREAD + THREAD_USER_R25)/4] - mov r25, r9 -#endif - /* With current tsk in r9, get it's kernel mode stack base */ GET_TSK_STACK_BASE r9, r9 -#ifdef PT_REGS_CANARY - st 0xabcdabcd, [r9, 0] +66: +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * Treat r25 as scratch reg, save it on stack first + * Load it with current task pointer + */ + st r25, [r9, -4] + GET_CURR_TASK_ON_CPU r25 #endif /* Save Pre Intr/Exception User SP on kernel stack */ - st.a sp, [r9, -12] ; Make room for orig_r0 and orig_r8 + st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25 /* CAUTION: * SP should be set at the very end when we are done with everything @@ -342,7 +320,7 @@ /* set SP to point to kernel mode stack */ mov sp, r9 -77: /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ + /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ .endm @@ -369,7 +347,7 @@ * @reg [OUT] &thread_info of "current" */ .macro GET_CURR_THR_INFO_FROM_SP reg - and \reg, sp, ~(THREAD_SIZE - 1) + bic \reg, sp, (THREAD_SIZE - 1) .endm /* @@ -413,62 +391,25 @@ * Note that syscalls are implemented via TRAP which is also a exception * from CPU's point of view *-------------------------------------------------------------*/ -.macro SAVE_ALL_EXCEPTION marker +.macro SAVE_ALL_SYS - st \marker, [sp, 8] /* orig_r8 */ + lr r9, [ecr] + st r9, [sp, 8] /* ECR */ st r0, [sp, 4] /* orig_r0, needed only for sys calls */ /* Restore r9 used to code the early prologue */ EXCPN_PROLOG_RESTORE_REG r9 - SAVE_CALLER_SAVED - st.a r26, [sp, -4] /* gp */ - st.a fp, [sp, -4] - st.a blink, [sp, -4] - lr r9, [eret] - st.a r9, [sp, -4] - lr r9, [erstatus] - st.a r9, [sp, -4] - st.a lp_count, [sp, -4] - lr r9, [lp_end] - st.a r9, [sp, -4] - lr r9, [lp_start] - st.a r9, [sp, -4] - lr r9, [erbta] - st.a r9, [sp, -4] - -#ifdef PT_REGS_CANARY - mov r9, 0xdeadbeef - st r9, [sp, -4] -#endif - - /* move up by 1 word to "create" pt_regs->"stack_place_holder" */ - sub sp, sp, 4 -.endm - -/*-------------------------------------------------------------- - * Save scratch regs for exceptions - *-------------------------------------------------------------*/ -.macro SAVE_ALL_SYS - SAVE_ALL_EXCEPTION orig_r8_IS_EXCPN -.endm - -/*-------------------------------------------------------------- - * Save scratch regs for sys calls - *-------------------------------------------------------------*/ -.macro SAVE_ALL_TRAP - /* - * Setup pt_regs->orig_r8. - * Encode syscall number (r8) in upper short word of event type (r9) - * N.B. #1: This is already endian safe (see ptrace.h) - * #2: Only r9 can be used as scratch as it is already clobbered - * and it's contents are no longer needed by the latter part - * of exception prologue - */ - lsl r9, r8, 16 - or r9, r9, orig_r8_IS_SCALL - - SAVE_ALL_EXCEPTION r9 + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSHAX eret + PUSHAX erstatus + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX erbta .endm /*-------------------------------------------------------------- @@ -483,28 +424,22 @@ * by hardware and that is not good. *-------------------------------------------------------------*/ .macro RESTORE_ALL_SYS + POPAX erbta + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed - add sp, sp, 4 /* hop over unused "pt_regs->stack_place_holder" */ - - ld.ab r9, [sp, 4] - sr r9, [erbta] - ld.ab r9, [sp, 4] - sr r9, [lp_start] - ld.ab r9, [sp, 4] - sr r9, [lp_end] - ld.ab r9, [sp, 4] - mov lp_count, r9 - ld.ab r9, [sp, 4] - sr r9, [erstatus] - ld.ab r9, [sp, 4] - sr r9, [eret] - ld.ab blink, [sp, 4] - ld.ab fp, [sp, 4] - ld.ab r26, [sp, 4] /* gp */ - RESTORE_CALLER_SAVED + POPAX erstatus + POPAX eret + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 ld sp, [sp] /* restore original sp */ - /* orig_r0 and orig_r8 skipped automatically */ + /* orig_r0, ECR, user_r25 skipped automatically */ .endm @@ -513,9 +448,7 @@ *-------------------------------------------------------------*/ .macro SAVE_ALL_INT1 - /* restore original r9 , saved in int1_saved_reg - * It will be saved on stack in macro: SAVE_CALLER_SAVED - */ + /* restore original r9 to be saved as part of reg-file */ #ifdef CONFIG_SMP lr r9, [ARC_REG_SCRATCH_DATA0] #else @@ -523,29 +456,19 @@ #endif /* now we are ready to save the remaining context :) */ - st orig_r8_IS_IRQ1, [sp, 8] /* Event Type */ + st event_IRQ1, [sp, 8] /* Dummy ECR */ st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - SAVE_CALLER_SAVED - st.a r26, [sp, -4] /* gp */ - st.a fp, [sp, -4] - st.a blink, [sp, -4] - st.a ilink1, [sp, -4] - lr r9, [status32_l1] - st.a r9, [sp, -4] - st.a lp_count, [sp, -4] - lr r9, [lp_end] - st.a r9, [sp, -4] - lr r9, [lp_start] - st.a r9, [sp, -4] - lr r9, [bta_l1] - st.a r9, [sp, -4] - -#ifdef PT_REGS_CANARY - mov r9, 0xdeadbee1 - st r9, [sp, -4] -#endif - /* move up by 1 word to "create" pt_regs->"stack_place_holder" */ - sub sp, sp, 4 + + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSH ilink1 + PUSHAX status32_l1 + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX bta_l1 .endm .macro SAVE_ALL_INT2 @@ -558,30 +481,19 @@ ld r9, [@int2_saved_reg] /* now we are ready to save the remaining context :) */ - st orig_r8_IS_IRQ2, [sp, 8] /* Event Type */ + st event_IRQ2, [sp, 8] /* Dummy ECR */ st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - SAVE_CALLER_SAVED - st.a r26, [sp, -4] /* gp */ - st.a fp, [sp, -4] - st.a blink, [sp, -4] - st.a ilink2, [sp, -4] - lr r9, [status32_l2] - st.a r9, [sp, -4] - st.a lp_count, [sp, -4] - lr r9, [lp_end] - st.a r9, [sp, -4] - lr r9, [lp_start] - st.a r9, [sp, -4] - lr r9, [bta_l2] - st.a r9, [sp, -4] - -#ifdef PT_REGS_CANARY - mov r9, 0xdeadbee2 - st r9, [sp, -4] -#endif - /* move up by 1 word to "create" pt_regs->"stack_place_holder" */ - sub sp, sp, 4 + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSH ilink2 + PUSHAX status32_l2 + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX bta_l2 .endm /*-------------------------------------------------------------- @@ -595,52 +507,41 @@ *-------------------------------------------------------------*/ .macro RESTORE_ALL_INT1 - add sp, sp, 4 /* hop over unused "pt_regs->stack_place_holder" */ - - ld.ab r9, [sp, 4] /* Actual reg file */ - sr r9, [bta_l1] - ld.ab r9, [sp, 4] - sr r9, [lp_start] - ld.ab r9, [sp, 4] - sr r9, [lp_end] - ld.ab r9, [sp, 4] - mov lp_count, r9 - ld.ab r9, [sp, 4] - sr r9, [status32_l1] - ld.ab r9, [sp, 4] - mov ilink1, r9 - ld.ab blink, [sp, 4] - ld.ab fp, [sp, 4] - ld.ab r26, [sp, 4] /* gp */ - RESTORE_CALLER_SAVED + POPAX bta_l1 + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX status32_l1 + POP ilink1 + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 ld sp, [sp] /* restore original sp */ - /* orig_r0 and orig_r8 skipped automatically */ + /* orig_r0, ECR, user_r25 skipped automatically */ .endm .macro RESTORE_ALL_INT2 - add sp, sp, 4 /* hop over unused "pt_regs->stack_place_holder" */ - - ld.ab r9, [sp, 4] - sr r9, [bta_l2] - ld.ab r9, [sp, 4] - sr r9, [lp_start] - ld.ab r9, [sp, 4] - sr r9, [lp_end] - ld.ab r9, [sp, 4] - mov lp_count, r9 - ld.ab r9, [sp, 4] - sr r9, [status32_l2] - ld.ab r9, [sp, 4] - mov ilink2, r9 - ld.ab blink, [sp, 4] - ld.ab fp, [sp, 4] - ld.ab r26, [sp, 4] /* gp */ - RESTORE_CALLER_SAVED + POPAX bta_l2 + POPAX lp_start + POPAX lp_end - ld sp, [sp] /* restore original sp */ - /* orig_r0 and orig_r8 skipped automatically */ + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + POPAX status32_l2 + POP ilink2 + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ .endm diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index 57898a17eb82..c0a72105ee0b 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -21,6 +21,6 @@ extern void __init arc_init_IRQ(void); extern int __init get_hw_config_num_irq(void); -void __cpuinit arc_local_timer_setup(unsigned int cpu); +void arc_local_timer_setup(unsigned int cpu); #endif diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index eac071668201..d99f79bcf865 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -19,6 +19,26 @@ #include <asm/arcregs.h> +/* status32 Reg bits related to Interrupt Handling */ +#define STATUS_E1_BIT 1 /* Int 1 enable */ +#define STATUS_E2_BIT 2 /* Int 2 enable */ +#define STATUS_A1_BIT 3 /* Int 1 active */ +#define STATUS_A2_BIT 4 /* Int 2 active */ + +#define STATUS_E1_MASK (1<<STATUS_E1_BIT) +#define STATUS_E2_MASK (1<<STATUS_E2_BIT) +#define STATUS_A1_MASK (1<<STATUS_A1_BIT) +#define STATUS_A2_MASK (1<<STATUS_A2_BIT) + +/* Other Interrupt Handling related Aux regs */ +#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */ +#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */ +#define AUX_IRQ_LV12 0x43 /* interrupt level register */ + +#define AUX_IENABLE 0x40c +#define AUX_ITRIGGER 0x40d +#define AUX_IPULSE 0x415 + #ifndef __ASSEMBLY__ /****************************************************************** diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h index 4930957ca3d3..b65fca7ffeb5 100644 --- a/arch/arc/include/asm/kgdb.h +++ b/arch/arc/include/asm/kgdb.h @@ -31,7 +31,7 @@ static inline void arch_kgdb_breakpoint(void) __asm__ __volatile__ ("trap_s 0x4\n"); } -extern void kgdb_trap(struct pt_regs *regs, int param); +extern void kgdb_trap(struct pt_regs *regs); enum arc700_linux_regnums { _R0 = 0, @@ -53,7 +53,7 @@ enum arc700_linux_regnums { }; #else -#define kgdb_trap(regs, param) +#define kgdb_trap(regs) #endif #endif /* __ARC_KGDB_H__ */ diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h index 4d9c211fce70..944dbedb38b5 100644 --- a/arch/arc/include/asm/kprobes.h +++ b/arch/arc/include/asm/kprobes.h @@ -50,11 +50,9 @@ struct kprobe_ctlblk { int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause); void kretprobe_trampoline(void); -void trap_is_kprobe(unsigned long cause, unsigned long address, - struct pt_regs *regs); +void trap_is_kprobe(unsigned long address, struct pt_regs *regs); #else -static void trap_is_kprobe(unsigned long cause, unsigned long address, - struct pt_regs *regs) +static void trap_is_kprobe(unsigned long address, struct pt_regs *regs) { } #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 56b02320f1a9..7c03fe61759c 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -9,6 +9,40 @@ #ifndef _ASM_ARC_MMU_H #define _ASM_ARC_MMU_H +#if defined(CONFIG_ARC_MMU_V1) +#define CONFIG_ARC_MMU_VER 1 +#elif defined(CONFIG_ARC_MMU_V2) +#define CONFIG_ARC_MMU_VER 2 +#elif defined(CONFIG_ARC_MMU_V3) +#define CONFIG_ARC_MMU_VER 3 +#endif + +/* MMU Management regs */ +#define ARC_REG_MMU_BCR 0x06f +#define ARC_REG_TLBPD0 0x405 +#define ARC_REG_TLBPD1 0x406 +#define ARC_REG_TLBINDEX 0x407 +#define ARC_REG_TLBCOMMAND 0x408 +#define ARC_REG_PID 0x409 +#define ARC_REG_SCRATCH_DATA0 0x418 + +/* Bits in MMU PID register */ +#define MMU_ENABLE (1 << 31) /* Enable MMU for process */ + +/* Error code if probe fails */ +#define TLB_LKUP_ERR 0x80000000 + +/* TLB Commands */ +#define TLBWrite 0x1 +#define TLBRead 0x2 +#define TLBGetIndex 0x3 +#define TLBProbe 0x4 + +#if (CONFIG_ARC_MMU_VER >= 2) +#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ +#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ +#endif + #ifndef __ASSEMBLY__ typedef struct { @@ -18,6 +52,16 @@ typedef struct { #endif } mm_context_t; +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA +void tlb_paranoid_check(unsigned int pid_sw, unsigned long address); +#else +#define tlb_paranoid_check(a, b) #endif +void arc_mmu_init(void); +extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); +void __init read_decode_mmu_bcr(void); + +#endif /* !__ASSEMBLY__ */ + #endif diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index ab84bf131fe1..9c8aa41e45c2 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -96,13 +96,8 @@ typedef unsigned long pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -/* Default Permissions for page, used in mmap.c */ -#ifdef CONFIG_ARC_STACK_NONEXEC +/* Default Permissions for stack/heaps pages (Non Executable) */ #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE) -#else -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#endif #define WANT_PAGE_VIRTUAL 1 diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index c110ac87d22b..4749a0eee1cf 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -135,6 +135,12 @@ /* ioremap */ #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) +/* Masks for actual TLB "PD"s */ +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) +#define PTE_BITS_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE | \ + _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \ + _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ) + /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) * diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 5f26b2c1cba0..15334ab66b56 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -19,6 +19,7 @@ #ifndef __ASSEMBLY__ #include <asm/arcregs.h> /* for STATUS_E1_MASK et all */ +#include <asm/ptrace.h> /* Arch specific stuff which needs to be saved per task. * However these items are not so important so as to earn a place in @@ -28,10 +29,6 @@ struct thread_struct { unsigned long ksp; /* kernel mode stack pointer */ unsigned long callee_reg; /* pointer to callee regs */ unsigned long fault_address; /* dbls as brkpt holder as well */ - unsigned long cause_code; /* Exception Cause Code (ECR) */ -#ifdef CONFIG_ARC_CURR_IN_REG - unsigned long user_r25; -#endif #ifdef CONFIG_ARC_FPU_SAVE_RESTORE struct arc_fpu fpu; #endif @@ -50,7 +47,7 @@ struct task_struct; unsigned long thread_saved_pc(struct task_struct *t); #define task_pt_regs(p) \ - ((struct pt_regs *)(THREAD_SIZE - 4 + (void *)task_stack_page(p)) - 1) + ((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1) /* Free all resources held by a thread. */ #define release_thread(thread) do { } while (0) @@ -75,11 +72,15 @@ unsigned long thread_saved_pc(struct task_struct *t); /* * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode. - * These can't be derived from pt_regs as that would give correp user-mode val + * Look in process.c for details of kernel stack layout */ #define KSTK_ESP(tsk) (tsk->thread.ksp) -#define KSTK_BLINK(tsk) (*((unsigned int *)((KSTK_ESP(tsk)) + (13+1+1)*4))) -#define KSTK_FP(tsk) (*((unsigned int *)((KSTK_ESP(tsk)) + (13+1)*4))) + +#define KSTK_REG(tsk, off) (*((unsigned int *)(KSTK_ESP(tsk) + \ + sizeof(struct callee_regs) + off))) + +#define KSTK_BLINK(tsk) KSTK_REG(tsk, 4) +#define KSTK_FP(tsk) KSTK_REG(tsk, 0) /* * Do necessary setup to start up a newly executed thread. diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 6179de7e07c2..c9938e7a7dbd 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -17,12 +17,6 @@ /* THE pt_regs: Defines how regs are saved during entry into kernel */ struct pt_regs { - /* - * 1 word gutter after reg-file has been saved - * Technically not needed, Since SP always points to a "full" location - * (vs. "empty"). But pt_regs is shared with tools.... - */ - long res; /* Real registers */ long bta; /* bta_l1, bta_l2, erbta */ @@ -50,22 +44,32 @@ struct pt_regs { long sp; /* user/kernel sp depending on where we came from */ long orig_r0; - /*to distinguish bet excp, syscall, irq */ + /* + * To distinguish bet excp, syscall, irq + * For traps and exceptions, Exception Cause Register. + * ECR: <00> <VV> <CC> <PP> + * Last word used by Linux for extra state mgmt (syscall-restart) + * For interrupts, use artificial ECR values to note current prio-level + */ union { + struct { #ifdef CONFIG_CPU_BIG_ENDIAN - /* so that assembly code is same for LE/BE */ - unsigned long orig_r8:16, event:16; + unsigned long state:8, ecr_vec:8, + ecr_cause:8, ecr_param:8; #else - unsigned long event:16, orig_r8:16; + unsigned long ecr_param:8, ecr_cause:8, + ecr_vec:8, state:8; #endif - long orig_r8_word; + }; + unsigned long event; }; + + long user_r25; }; /* Callee saved registers - need to be saved only when you are scheduled out */ struct callee_regs { - long res; /* Again this is not needed */ long r25; long r24; long r23; @@ -99,18 +103,20 @@ struct callee_regs { /* return 1 if PC in delay slot */ #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK) -#define in_syscall(regs) (regs->event & orig_r8_IS_SCALL) -#define in_brkpt_trap(regs) (regs->event & orig_r8_IS_BRKPT) +#define in_syscall(regs) ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param) +#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param) + +#define STATE_SCALL_RESTARTED 0x01 -#define syscall_wont_restart(regs) (regs->event |= orig_r8_IS_SCALL_RESTARTED) -#define syscall_restartable(regs) !(regs->event & orig_r8_IS_SCALL_RESTARTED) +#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED) +#define syscall_restartable(reg) !(reg->state & STATE_SCALL_RESTARTED) #define current_pt_regs() \ ({ \ /* open-coded current_thread_info() */ \ register unsigned long sp asm ("sp"); \ unsigned long pg_start = (sp & ~(THREAD_SIZE - 1)); \ - (struct pt_regs *)(pg_start + THREAD_SIZE - 4) - 1; \ + (struct pt_regs *)(pg_start + THREAD_SIZE) - 1; \ }) static inline long regs_return_value(struct pt_regs *regs) @@ -120,11 +126,4 @@ static inline long regs_return_value(struct pt_regs *regs) #endif /* !__ASSEMBLY__ */ -#define orig_r8_IS_SCALL 0x0001 -#define orig_r8_IS_SCALL_RESTARTED 0x0002 -#define orig_r8_IS_BRKPT 0x0004 -#define orig_r8_IS_EXCPN 0x0008 -#define orig_r8_IS_IRQ1 0x0010 -#define orig_r8_IS_IRQ2 0x0020 - #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h index 33ab3048e9b2..29de09804306 100644 --- a/arch/arc/include/asm/syscall.h +++ b/arch/arc/include/asm/syscall.h @@ -18,7 +18,7 @@ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { if (user_mode(regs) && in_syscall(regs)) - return regs->orig_r8; + return regs->r8; else return -1; } @@ -26,8 +26,7 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs) static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - /* XXX: I can't fathom how pt_regs->r8 will be clobbered ? */ - regs->r8 = regs->orig_r8; + regs->r0 = regs->orig_r0; } static inline long diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h index a5ff961b1efc..8a1ec96012ae 100644 --- a/arch/arc/include/asm/tlb-mmu1.h +++ b/arch/arc/include/asm/tlb-mmu1.h @@ -9,9 +9,9 @@ #ifndef __ASM_TLB_MMU_V1_H__ #define __ASM_TLB_MMU_V1_H__ -#if defined(__ASSEMBLY__) && defined(CONFIG_ARC_MMU_VER == 1) +#include <asm/mmu.h> -#include <asm/tlb.h> +#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1) .macro TLB_WRITE_HEURISTICS diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h index cb0c708ca665..a9db5f62aaf3 100644 --- a/arch/arc/include/asm/tlb.h +++ b/arch/arc/include/asm/tlb.h @@ -9,18 +9,6 @@ #ifndef _ASM_ARC_TLB_H #define _ASM_ARC_TLB_H -#ifdef __KERNEL__ - -#include <asm/pgtable.h> - -/* Masks for actual TLB "PD"s */ -#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) -#define PTE_BITS_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE | \ - _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \ - _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ) - -#ifndef __ASSEMBLY__ - #define tlb_flush(tlb) \ do { \ if (tlb->fullmm) \ @@ -56,18 +44,4 @@ do { \ #include <linux/pagemap.h> #include <asm-generic/tlb.h> -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA -void tlb_paranoid_check(unsigned int pid_sw, unsigned long address); -#else -#define tlb_paranoid_check(a, b) -#endif - -void arc_mmu_init(void); -extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); -void __init read_decode_mmu_bcr(void); - -#endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - #endif /* _ASM_ARC_TLB_H */ diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h index 5dbe63f17b66..60702f3751d2 100644 --- a/arch/arc/include/asm/unaligned.h +++ b/arch/arc/include/asm/unaligned.h @@ -16,11 +16,11 @@ #ifdef CONFIG_ARC_MISALIGN_ACCESS int misaligned_fixup(unsigned long address, struct pt_regs *regs, - unsigned long cause, struct callee_regs *cregs); + struct callee_regs *cregs); #else static inline int misaligned_fixup(unsigned long address, struct pt_regs *regs, - unsigned long cause, struct callee_regs *cregs) + struct callee_regs *cregs) { return 0; } diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 30333cec0fef..2618cc13ba75 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -20,28 +20,31 @@ * * This is to decouple pt_regs from user-space ABI, to be able to change it * w/o affecting the ABI. - * Although the layout (initial padding) is similar to pt_regs to have some - * optimizations when copying pt_regs to/from user_regs_struct. + * + * The intermediate pad,pad2 are relics of initial layout based on pt_regs + * for optimizations when copying pt_regs to/from user_regs_struct. + * We no longer need them, but can't be changed as they are part of ABI now. * * Also, sigcontext only care about the scratch regs as that is what we really - * save/restore for signal handling. + * save/restore for signal handling. However gdb also uses the same struct + * hence callee regs need to be in there too. */ struct user_regs_struct { + long pad; struct { - long pad; long bta, lp_start, lp_end, lp_count; long status32, ret, blink, fp, gp; long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; long sp; } scratch; + long pad2; struct { - long pad; long r25, r24, r23, r22, r21, r20; long r19, r18, r17, r16, r15, r14, r13; } callee; long efa; /* break pt addr, for break points in delay slots */ - long stop_pc; /* give dbg stop_pc directly after checking orig_r8 */ + long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */ }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index 7dcda7025241..6c3aa0edb9b5 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -24,9 +24,6 @@ int main(void) DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp)); DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg)); -#ifdef CONFIG_ARC_CURR_IN_REG - DEFINE(THREAD_USER_R25, offsetof(struct thread_struct, user_r25)); -#endif DEFINE(THREAD_FAULT_ADDR, offsetof(struct thread_struct, fault_address)); @@ -49,7 +46,7 @@ int main(void) BLANK(); DEFINE(PT_status32, offsetof(struct pt_regs, status32)); - DEFINE(PT_orig_r8, offsetof(struct pt_regs, orig_r8_word)); + DEFINE(PT_event, offsetof(struct pt_regs, event)); DEFINE(PT_sp, offsetof(struct pt_regs, sp)); DEFINE(PT_r0, offsetof(struct pt_regs, r0)); DEFINE(PT_r1, offsetof(struct pt_regs, r1)); @@ -60,5 +57,7 @@ int main(void) DEFINE(PT_r6, offsetof(struct pt_regs, r6)); DEFINE(PT_r7, offsetof(struct pt_regs, r7)); + DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); + DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); return 0; } diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c index 60844dac6132..34410eb1a308 100644 --- a/arch/arc/kernel/ctx_sw.c +++ b/arch/arc/kernel/ctx_sw.c @@ -23,10 +23,6 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) unsigned int tmp; unsigned int prev = (unsigned int)prev_task; unsigned int next = (unsigned int)next_task; - int num_words_to_skip = 1; -#ifdef CONFIG_ARC_CURR_IN_REG - num_words_to_skip++; -#endif __asm__ __volatile__( /* FP/BLINK save generated by gcc (standard function prologue */ @@ -44,8 +40,9 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) "st.a r24, [sp, -4] \n\t" #ifndef CONFIG_ARC_CURR_IN_REG "st.a r25, [sp, -4] \n\t" +#else + "sub sp, sp, 4 \n\t" /* usual r25 placeholder */ #endif - "sub sp, sp, %4 \n\t" /* create gutter at top */ /* set ksp of outgoing task in tsk->thread.ksp */ "st.as sp, [%3, %1] \n\t" @@ -76,10 +73,10 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) /* start loading it's CALLEE reg file */ - "add sp, sp, %4 \n\t" /* skip gutter at top */ - #ifndef CONFIG_ARC_CURR_IN_REG "ld.ab r25, [sp, 4] \n\t" +#else + "add sp, sp, 4 \n\t" #endif "ld.ab r24, [sp, 4] \n\t" "ld.ab r23, [sp, 4] \n\t" @@ -100,8 +97,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) /* FP/BLINK restore generated by gcc (standard func epilogue */ : "=r"(tmp) - : "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev), - "n"(num_words_to_skip * 4) + : "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev) : "blink" ); diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 0c6d664d4a83..1d7165156e17 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -142,7 +142,7 @@ VECTOR reserved ; Reserved Exceptions .endr #include <linux/linkage.h> /* ARC_{EXTRY,EXIT} */ -#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,TRAP...} */ +#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,SYS...} */ #include <asm/errno.h> #include <asm/arcregs.h> #include <asm/irqflags.h> @@ -274,10 +274,8 @@ ARC_ENTRY instr_service SWITCH_TO_KERNEL_STK SAVE_ALL_SYS - lr r0, [ecr] - lr r1, [efa] - - mov r2, sp + lr r0, [efa] + mov r1, sp FAKE_RET_FROM_EXCPN r9 @@ -298,9 +296,8 @@ ARC_ENTRY mem_service SWITCH_TO_KERNEL_STK SAVE_ALL_SYS - lr r0, [ecr] - lr r1, [efa] - mov r2, sp + lr r0, [efa] + mov r1, sp bl do_memory_error b ret_from_exception ARC_EXIT mem_service @@ -317,11 +314,14 @@ ARC_ENTRY EV_MachineCheck SWITCH_TO_KERNEL_STK SAVE_ALL_SYS - lr r0, [ecr] - lr r1, [efa] - mov r2, sp + lr r2, [ecr] + lr r0, [efa] + mov r1, sp + + lsr r3, r2, 8 + bmsk r3, r3, 7 + brne r3, ECR_C_MCHK_DUP_TLB, 1f - brne r0, 0x200100, 1f bl do_tlb_overlap_fault b ret_from_exception @@ -355,8 +355,8 @@ ARC_ENTRY EV_TLBProtV ; ecr and efa were not saved in case an Intr sneaks in ; after fake rtie ; - lr r3, [ecr] - lr r4, [efa] + lr r2, [ecr] + lr r1, [efa] ; Faulting Data address ; --------(4) Return from CPU Exception Mode --------- ; Fake a rtie, but rtie to next label @@ -368,31 +368,25 @@ ARC_ENTRY EV_TLBProtV ;------ (5) Type of Protection Violation? ---------- ; ; ProtV Hardware Exception is triggered for Access Faults of 2 types - ; -Access Violaton (WRITE to READ ONLY Page) - for linux COW - ; -Unaligned Access (READ/WRITE on odd boundary) + ; -Access Violaton : 00_23_(00|01|02|03)_00 + ; x r w r+w + ; -Unaligned Access : 00_23_04_00 ; - cmp r3, 0x230400 ; Misaligned data access ? - beq 4f + bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f ;========= (6a) Access Violation Processing ======== - cmp r3, 0x230100 - mov r1, 0x0 ; if LD exception ? write = 0 - mov.ne r1, 0x1 ; else write = 1 - - mov r2, r4 ; faulting address mov r0, sp ; pt_regs bl do_page_fault b ret_from_exception ;========== (6b) Non aligned access ============ 4: - mov r0, r3 ; cause code - mov r1, r4 ; faulting address - mov r2, sp ; pt_regs + mov r0, r1 + mov r1, sp ; pt_regs #ifdef CONFIG_ARC_MISALIGN_ACCESS SAVE_CALLEE_SAVED_USER - mov r3, sp ; callee_regs + mov r2, sp ; callee_regs bl do_misaligned_access @@ -419,9 +413,8 @@ ARC_ENTRY EV_PrivilegeV SWITCH_TO_KERNEL_STK SAVE_ALL_SYS - lr r0, [ecr] - lr r1, [efa] - mov r2, sp + lr r0, [efa] + mov r1, sp FAKE_RET_FROM_EXCPN r9 @@ -440,9 +433,8 @@ ARC_ENTRY EV_Extension SWITCH_TO_KERNEL_STK SAVE_ALL_SYS - lr r0, [ecr] - lr r1, [efa] - mov r2, sp + lr r0, [efa] + mov r1, sp bl do_extension_fault b ret_from_exception ARC_EXIT EV_Extension @@ -498,11 +490,8 @@ tracesys_exit: trap_with_param: ; stop_pc info by gdb needs this info - stw orig_r8_IS_BRKPT, [sp, PT_orig_r8] - - mov r0, r12 - lr r1, [efa] - mov r2, sp + lr r0, [efa] + mov r1, sp ; Now that we have read EFA, its safe to do "fake" rtie ; and get out of CPU exception mode @@ -544,11 +533,11 @@ ARC_ENTRY EV_Trap lr r9, [erstatus] SWITCH_TO_KERNEL_STK - SAVE_ALL_TRAP + SAVE_ALL_SYS ;------- (4) What caused the Trap -------------- lr r12, [ecr] - and.f 0, r12, ECR_PARAM_MASK + bmsk.f 0, r12, 7 bnz trap_with_param ; ======= (5a) Trap is due to System Call ======== @@ -589,11 +578,7 @@ ARC_ENTRY ret_from_exception ; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32 ld r8, [sp, PT_status32] ; returning to User/Kernel Mode -#ifdef CONFIG_PREEMPT bbit0 r8, STATUS_U_BIT, resume_kernel_mode -#else - bbit0 r8, STATUS_U_BIT, restore_regs -#endif ; Before returning to User mode check-for-and-complete any pending work ; such as rescheduling/signal-delivery etc. @@ -653,10 +638,10 @@ resume_user_mode_begin: b resume_user_mode_begin ; unconditionally back to U mode ret chks ; for single exit point from this block -#ifdef CONFIG_PREEMPT - resume_kernel_mode: +#ifdef CONFIG_PREEMPT + ; Can't preempt if preemption disabled GET_CURR_THR_INFO_FROM_SP r10 ld r8, [r10, THREAD_INFO_PREEMPT_COUNT] @@ -687,17 +672,6 @@ restore_regs : ; XXX can this be optimised out IRQ_DISABLE_SAVE r9, r10 ;@r10 has prisitine (pre-disable) copy -#ifdef CONFIG_ARC_CURR_IN_REG - ; Restore User R25 - ; Earlier this used to be only for returning to user mode - ; However with 2 levels of IRQ this can also happen even if - ; in kernel mode - ld r9, [sp, PT_sp] - brhs r9, VMALLOC_START, 8f - RESTORE_USER_R25 -8: -#endif - ; Restore REG File. In case multiple Events outstanding, ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None ; Note that we use realtime STATUS32 (not pt_regs->status32) to @@ -714,28 +688,33 @@ not_exception: #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + ; Level 2 interrupt return Path - from hardware standpoint bbit0 r10, STATUS_A2_BIT, not_level2_interrupt ;------------------------------------------------------------------ + ; However the context returning might not have taken L2 intr itself + ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret + ; Special considerations needed for the context which took L2 intr + + ld r9, [sp, PT_event] ; Ensure this is L2 intr context + brne r9, event_IRQ2, 149f + + ;------------------------------------------------------------------ ; if L2 IRQ interrupted a L1 ISR, we'd disbaled preemption earlier ; so that sched doesnt move to new task, causing L1 to be delayed ; undeterministically. Now that we've achieved that, lets reset ; things to what they were, before returning from L2 context ;---------------------------------------------------------------- - ldw r9, [sp, PT_orig_r8] ; get orig_r8 to make sure it is - brne r9, orig_r8_IS_IRQ2, 149f ; infact a L2 ISR ret path - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal - ; A1 is set in status32_l2 ; decrement thread_info->preempt_count (re-enable preemption) GET_CURR_THR_INFO_FROM_SP r10 ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] ; paranoid check, given A1 was active when A2 happened, preempt count - ; must not be 0 beccause we would have incremented it. + ; must not be 0 because we would have incremented it. ; If this does happen we simply HALT as it means a BUG !!! cmp r9, 0 bnz 2f diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 006dec3fc353..2a913f85a747 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -27,6 +27,8 @@ stext: ; Don't clobber r0-r4 yet. It might have bootloader provided info ;------------------------------------------------------------------- + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + #ifdef CONFIG_SMP ; Only Boot (Master) proceeds. Others wait in platform dependent way ; IDENTITY Reg [ 3 2 1 0 ] diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 8115fa531575..305b3f866aa7 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -28,25 +28,17 @@ * -Disable all IRQs (on CPU side) * -Optionally, setup the High priority Interrupts as Level 2 IRQs */ -void __cpuinit arc_init_IRQ(void) +void arc_init_IRQ(void) { int level_mask = 0; - write_aux_reg(AUX_INTR_VEC_BASE, _int_vec_base_lds); - /* Disable all IRQs: enable them as devices request */ write_aux_reg(AUX_IENABLE, 0); /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ -#ifdef CONFIG_ARC_IRQ3_LV2 - level_mask |= (1 << 3); -#endif -#ifdef CONFIG_ARC_IRQ5_LV2 - level_mask |= (1 << 5); -#endif -#ifdef CONFIG_ARC_IRQ6_LV2 - level_mask |= (1 << 6); -#endif + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; if (level_mask) { pr_info("Level-2 interrupts bitset %x\n", level_mask); diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 52bdc83c1495..a7698fb14818 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -169,7 +169,7 @@ int kgdb_arch_init(void) return 0; } -void kgdb_trap(struct pt_regs *regs, int param) +void kgdb_trap(struct pt_regs *regs) { /* trap_s 3 is used for breakpoints that overwrite existing * instructions, while trap_s 4 is used for compiled breakpoints. @@ -181,7 +181,7 @@ void kgdb_trap(struct pt_regs *regs, int param) * with trap_s 4 (compiled) breakpoints, continuation needs to * start after the breakpoint. */ - if (param == 3) + if (regs->ecr_param == 3) instruction_pointer(regs) -= BREAK_INSTR_SIZE; kgdb_handle_exception(1, SIGTRAP, 0, regs); diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c index 5a7b80e2d883..72f97822784a 100644 --- a/arch/arc/kernel/kprobes.c +++ b/arch/arc/kernel/kprobes.c @@ -517,8 +517,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) return 0; } -void trap_is_kprobe(unsigned long cause, unsigned long address, - struct pt_regs *regs) +void trap_is_kprobe(unsigned long address, struct pt_regs *regs) { - notify_die(DIE_TRAP, "kprobe_trap", regs, address, cause, SIGTRAP); + notify_die(DIE_TRAP, "kprobe_trap", regs, address, 0, SIGTRAP); } diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index cad66851e0c4..07a3a968fe49 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -55,10 +55,8 @@ asmlinkage void ret_from_fork(void); * | ... | * | unused | * | | - * ------------------ <==== top of Stack (thread.ksp) - * | UNUSED 1 word| * ------------------ - * | r25 | + * | r25 | <==== top of Stack (thread.ksp) * ~ ~ * | --to-- | (CALLEE Regs of user mode) * | r13 | @@ -76,7 +74,10 @@ asmlinkage void ret_from_fork(void); * | --to-- | (scratch Regs of user mode) * | r0 | * ------------------ - * | UNUSED 1 word| + * | SP | + * | orig_r0 | + * | event/ECR | + * | user_r25 | * ------------------ <===== END of PAGE */ int copy_thread(unsigned long clone_flags, diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index c6a81c58d0f3..333238564b67 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -40,7 +40,15 @@ static int genregs_get(struct task_struct *target, offsetof(struct user_regs_struct, LOC), \ offsetof(struct user_regs_struct, LOC) + 4); +#define REG_O_ZERO(LOC) \ + if (!ret) \ + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, \ + offsetof(struct user_regs_struct, LOC), \ + offsetof(struct user_regs_struct, LOC) + 4); + + REG_O_ZERO(pad); REG_O_CHUNK(scratch, callee, ptregs); + REG_O_ZERO(pad2); REG_O_CHUNK(callee, efa, cregs); REG_O_CHUNK(efa, stop_pc, &target->thread.fault_address); @@ -88,8 +96,10 @@ static int genregs_set(struct task_struct *target, offsetof(struct user_regs_struct, LOC), \ offsetof(struct user_regs_struct, LOC) + 4); - /* TBD: disallow updates to STATUS32, orig_r8 etc*/ - REG_IN_CHUNK(scratch, callee, ptregs); /* pt_regs[bta..orig_r8] */ + REG_IGNORE_ONE(pad); + /* TBD: disallow updates to STATUS32 etc*/ + REG_IN_CHUNK(scratch, pad2, ptregs); /* pt_regs[bta..sp] */ + REG_IGNORE_ONE(pad2); REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */ REG_IGNORE_ONE(efa); /* efa update invalid */ REG_IN_ONE(stop_pc, &ptregs->ret); /* stop_pc: PC update */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index b2b3731dd1e9..6b083454d039 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -31,14 +31,14 @@ int running_on_hw = 1; /* vs. on ISS */ char __initdata command_line[COMMAND_LINE_SIZE]; -struct machine_desc *machine_desc __cpuinitdata; +struct machine_desc *machine_desc; struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; -void __cpuinit read_arc_build_cfg_regs(void) +void read_arc_build_cfg_regs(void) { struct bcr_perip uncached_space; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; @@ -182,7 +182,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); #define IS_AVAIL1(var, str) ((var) ? str : "") #define IS_AVAIL2(var, str) ((var == 0x2) ? str : "") -#define IS_USED(var) ((var) ? "(in-use)" : "(not used)") +#define IS_USED(cfg) (IS_ENABLED(cfg) ? "(in-use)" : "(not used)") n += scnprintf(buf + n, len - n, "Extn [700-Base]\t: %s %s %s %s %s %s\n", @@ -202,9 +202,9 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) if (cpu->core.family == 0x34) { n += scnprintf(buf + n, len - n, "Extn [700-4.10]\t: LLOCK/SCOND %s, SWAPE %s, RTSC %s\n", - IS_USED(__CONFIG_ARC_HAS_LLSC_VAL), - IS_USED(__CONFIG_ARC_HAS_SWAPE_VAL), - IS_USED(__CONFIG_ARC_HAS_RTSC_VAL)); + IS_USED(CONFIG_ARC_HAS_LLSC), + IS_USED(CONFIG_ARC_HAS_SWAPE), + IS_USED(CONFIG_ARC_HAS_RTSC)); } n += scnprintf(buf + n, len - n, "Extn [CCM]\t: %s", @@ -237,7 +237,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) return buf; } -void __cpuinit arc_chk_ccms(void) +void arc_chk_ccms(void) { #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM) struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; @@ -272,7 +272,7 @@ void __cpuinit arc_chk_ccms(void) * hardware has dedicated regs which need to be saved/restored on ctx-sw * (Single Precision uses core regs), thus kernel is kind of oblivious to it */ -void __cpuinit arc_chk_fpu(void) +void arc_chk_fpu(void) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; @@ -293,7 +293,7 @@ void __cpuinit arc_chk_fpu(void) * such as only for boot CPU etc */ -void __cpuinit setup_processor(void) +void setup_processor(void) { char str[512]; int cpu_id = smp_processor_id(); diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 5c7fd603d216..bca3052c956d 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -117,7 +117,7 @@ const char *arc_platform_smp_cpuinfo(void) * Called from asm stub in head.S * "current"/R25 already setup by low level boot code */ -void __cpuinit start_kernel_secondary(void) +void start_kernel_secondary(void) { struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); @@ -154,7 +154,7 @@ void __cpuinit start_kernel_secondary(void) * * Essential requirements being where to run from (PC) and stack (SP) */ -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) +int __cpu_up(unsigned int cpu, struct task_struct *idle) { unsigned long wait_till; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index ca0207b9d5b6..f8b7d880304d 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -79,7 +79,7 @@ static void seed_unwind_frame_info(struct task_struct *tsk, * assembly code */ frame_info->regs.r27 = 0; - frame_info->regs.r28 += 64; + frame_info->regs.r28 += 60; frame_info->call_frame = 0; } else { diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 09f4309aa2c0..0e51e69cf30d 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -44,13 +44,24 @@ #include <asm/clk.h> #include <asm/mach_desc.h> +/* Timer related Aux registers */ +#define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */ +#define ARC_REG_TIMER0_CTRL 0x22 /* timer 0 control */ +#define ARC_REG_TIMER0_CNT 0x21 /* timer 0 count */ +#define ARC_REG_TIMER1_LIMIT 0x102 /* timer 1 limit */ +#define ARC_REG_TIMER1_CTRL 0x101 /* timer 1 control */ +#define ARC_REG_TIMER1_CNT 0x100 /* timer 1 count */ + +#define TIMER_CTRL_IE (1 << 0) /* Interupt when Count reachs limit */ +#define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */ + #define ARC_TIMER_MAX 0xFFFFFFFF /********** Clock Source Device *********/ #ifdef CONFIG_ARC_HAS_RTSC -int __cpuinit arc_counter_setup(void) +int arc_counter_setup(void) { /* RTSC insn taps into cpu clk, needs no setup */ @@ -105,7 +116,7 @@ static bool is_usable_as_clocksource(void) /* * set 32bit TIMER1 to keep counting monotonically and wraparound */ -int __cpuinit arc_counter_setup(void) +int arc_counter_setup(void) { write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX); write_aux_reg(ARC_REG_TIMER1_CNT, 0); @@ -212,7 +223,7 @@ static struct irqaction arc_timer_irq = { * Setup the local event timer for @cpu * N.B. weak so that some exotic ARC SoCs can completely override it */ -void __attribute__((weak)) __cpuinit arc_local_timer_setup(unsigned int cpu) +void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu) { struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu); diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 0471d9c9dd54..e21692d2fdab 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -28,10 +28,9 @@ void __init trap_init(void) return; } -void die(const char *str, struct pt_regs *regs, unsigned long address, - unsigned long cause_reg) +void die(const char *str, struct pt_regs *regs, unsigned long address) { - show_kernel_fault_diag(str, regs, address, cause_reg); + show_kernel_fault_diag(str, regs, address); /* DEAD END */ __asm__("flag 1"); @@ -42,14 +41,13 @@ void die(const char *str, struct pt_regs *regs, unsigned long address, * -for user faults enqueues requested signal * -for kernel, chk if due to copy_(to|from)_user, otherwise die() */ -static noinline int handle_exception(unsigned long cause, char *str, - struct pt_regs *regs, siginfo_t *info) +static noinline int +handle_exception(const char *str, struct pt_regs *regs, siginfo_t *info) { if (user_mode(regs)) { struct task_struct *tsk = current; tsk->thread.fault_address = (__force unsigned int)info->si_addr; - tsk->thread.cause_code = cause; force_sig_info(info->si_signo, info, tsk); @@ -58,14 +56,14 @@ static noinline int handle_exception(unsigned long cause, char *str, if (fixup_exception(regs)) return 0; - die(str, regs, (unsigned long)info->si_addr, cause); + die(str, regs, (unsigned long)info->si_addr); } return 1; } #define DO_ERROR_INFO(signr, str, name, sicode) \ -int name(unsigned long cause, unsigned long address, struct pt_regs *regs) \ +int name(unsigned long address, struct pt_regs *regs) \ { \ siginfo_t info = { \ .si_signo = signr, \ @@ -73,7 +71,7 @@ int name(unsigned long cause, unsigned long address, struct pt_regs *regs) \ .si_code = sicode, \ .si_addr = (void __user *)address, \ }; \ - return handle_exception(cause, str, regs, &info);\ + return handle_exception(str, regs, &info);\ } /* @@ -90,11 +88,11 @@ DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) /* * Entry Point for Misaligned Data access Exception, for emulating in software */ -int do_misaligned_access(unsigned long cause, unsigned long address, - struct pt_regs *regs, struct callee_regs *cregs) +int do_misaligned_access(unsigned long address, struct pt_regs *regs, + struct callee_regs *cregs) { - if (misaligned_fixup(address, regs, cause, cregs) != 0) - return do_misaligned_error(cause, address, regs); + if (misaligned_fixup(address, regs, cregs) != 0) + return do_misaligned_error(address, regs); return 0; } @@ -104,10 +102,9 @@ int do_misaligned_access(unsigned long cause, unsigned long address, * Entry point for miscll errors such as Nested Exceptions * -Duplicate TLB entry is handled seperately though */ -void do_machine_check_fault(unsigned long cause, unsigned long address, - struct pt_regs *regs) +void do_machine_check_fault(unsigned long address, struct pt_regs *regs) { - die("Machine Check Exception", regs, address, cause); + die("Machine Check Exception", regs, address); } @@ -120,23 +117,22 @@ void do_machine_check_fault(unsigned long cause, unsigned long address, * -1 used for software breakpointing (gdb) * -2 used by kprobes */ -void do_non_swi_trap(unsigned long cause, unsigned long address, - struct pt_regs *regs) +void do_non_swi_trap(unsigned long address, struct pt_regs *regs) { - unsigned int param = cause & 0xff; + unsigned int param = regs->ecr_param; switch (param) { case 1: - trap_is_brkpt(cause, address, regs); + trap_is_brkpt(address, regs); break; case 2: - trap_is_kprobe(param, address, regs); + trap_is_kprobe(address, regs); break; case 3: case 4: - kgdb_trap(regs, param); + kgdb_trap(regs); break; default: @@ -149,14 +145,14 @@ void do_non_swi_trap(unsigned long cause, unsigned long address, * -For a corner case, ARC kprobes implementation resorts to using * this exception, hence the check */ -void do_insterror_or_kprobe(unsigned long cause, - unsigned long address, - struct pt_regs *regs) +void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs) { + int rc; + /* Check if this exception is caused by kprobes */ - if (notify_die(DIE_IERR, "kprobe_ierr", regs, address, - cause, SIGILL) == NOTIFY_STOP) + rc = notify_die(DIE_IERR, "kprobe_ierr", regs, address, 0, SIGILL); + if (rc == NOTIFY_STOP) return; - insterror_is_error(cause, address, regs); + insterror_is_error(address, regs); } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 11c301b81c92..73a7450ee622 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -101,7 +101,7 @@ static void show_faulting_vma(unsigned long address, char *buf) if (file) { struct path *path = &file->f_path; nm = d_path(path, buf, PAGE_SIZE - 1); - inode = vma->vm_file->f_path.dentry->d_inode; + inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; } @@ -117,23 +117,22 @@ static void show_faulting_vma(unsigned long address, char *buf) static void show_ecr_verbose(struct pt_regs *regs) { - unsigned int vec, cause_code, cause_reg; + unsigned int vec, cause_code; unsigned long address; - cause_reg = current->thread.cause_code; - pr_info("\n[ECR ]: 0x%08x => ", cause_reg); + pr_info("\n[ECR ]: 0x%08lx => ", regs->event); /* For Data fault, this is data address not instruction addr */ address = current->thread.fault_address; - vec = cause_reg >> 16; - cause_code = (cause_reg >> 8) & 0xFF; + vec = regs->ecr_vec; + cause_code = regs->ecr_cause; /* For DTLB Miss or ProtV, display the memory involved too */ if (vec == ECR_V_DTLB_MISS) { - pr_cont("Invalid %s 0x%08lx by insn @ 0x%08lx\n", - (cause_code == 0x01) ? "Read From" : - ((cause_code == 0x02) ? "Write to" : "EX"), + pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n", + (cause_code == 0x01) ? "Read" : + ((cause_code == 0x02) ? "Write" : "EX"), address, regs->ret); } else if (vec == ECR_V_ITLB_MISS) { pr_cont("Insn could not be fetched\n"); @@ -144,14 +143,12 @@ static void show_ecr_verbose(struct pt_regs *regs) } else if (vec == ECR_V_PROTV) { if (cause_code == ECR_C_PROTV_INST_FETCH) pr_cont("Execute from Non-exec Page\n"); - else if (cause_code == ECR_C_PROTV_LOAD) - pr_cont("Read from Non-readable Page\n"); - else if (cause_code == ECR_C_PROTV_STORE) - pr_cont("Write to Non-writable Page\n"); - else if (cause_code == ECR_C_PROTV_XCHG) - pr_cont("Data exchange protection violation\n"); else if (cause_code == ECR_C_PROTV_MISALIG_DATA) pr_cont("Misaligned r/w from 0x%08lx\n", address); + else + pr_cont("%s access not allowed on page\n", + (cause_code == 0x01) ? "Read" : + ((cause_code == 0x02) ? "Write" : "EX")); } else if (vec == ECR_V_INSN_ERR) { pr_cont("Illegal Insn\n"); } else { @@ -176,8 +173,7 @@ void show_regs(struct pt_regs *regs) print_task_path_n_nm(tsk, buf); show_regs_print_info(KERN_INFO); - if (current->thread.cause_code) - show_ecr_verbose(regs); + show_ecr_verbose(regs); pr_info("[EFA ]: 0x%08lx\n[BLINK ]: %pS\n[ERET ]: %pS\n", current->thread.fault_address, @@ -213,10 +209,9 @@ void show_regs(struct pt_regs *regs) } void show_kernel_fault_diag(const char *str, struct pt_regs *regs, - unsigned long address, unsigned long cause_reg) + unsigned long address) { current->thread.fault_address = address; - current->thread.cause_code = cause_reg; /* Caller and Callee regs */ show_regs(regs); diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 4cd81633febd..c0f832f595d3 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -187,7 +187,7 @@ fault: state->fault = 1; * Returns 0 if successfully handled, 1 if some error happened */ int misaligned_fixup(unsigned long address, struct pt_regs *regs, - unsigned long cause, struct callee_regs *cregs) + struct callee_regs *cregs) { struct disasm_state state; char buf[TASK_COMM_LEN]; diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index a8d02223da44..e550b117ec4f 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -289,6 +289,8 @@ static void __init setup_unwind_table(struct unwind_table *table, * instead of the initial loc addr * return; */ + WARN(1, "unwinder: FDE->initial_location NULL %p\n", + (const u8 *)(fde + 1) + *fde); } ++n; } diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index d3c92f52d444..2555f5886af6 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -125,6 +125,11 @@ SECTIONS *(.debug_frame) __end_unwind = .; } + /* + * gcc 4.8 generates this for -fasynchonous-unwind-tables, + * while we still use the .debug_frame based unwinder + */ + /DISCARD/ : { *(.eh_frame) } #else /DISCARD/ : { *(.debug_frame) } #endif @@ -142,15 +147,18 @@ SECTIONS *(.arcextmap.*) } +#ifndef CONFIG_DEBUG_INFO /* open-coded because we need .debug_frame seperately for unwinding */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - .debug_info 0 : { *(.debug_info) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } + /DISCARD/ : { *(.debug_aranges) } + /DISCARD/ : { *(.debug_pubnames) } + /DISCARD/ : { *(.debug_info) } + /DISCARD/ : { *(.debug_abbrev) } + /DISCARD/ : { *(.debug_line) } + /DISCARD/ : { *(.debug_str) } + /DISCARD/ : { *(.debug_loc) } + /DISCARD/ : { *(.debug_macinfo) } + /DISCARD/ : { *(.debug_ranges) } +#endif #ifdef CONFIG_ARC_HAS_DCCM . = CONFIG_ARC_DCCM_BASE; diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index aedce1905441..f415d851b765 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -73,6 +73,33 @@ #include <asm/cachectl.h> #include <asm/setup.h> +/* Instruction cache related Auxiliary registers */ +#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ +#define ARC_REG_IC_IVIC 0x10 +#define ARC_REG_IC_CTRL 0x11 +#define ARC_REG_IC_IVIL 0x19 +#if (CONFIG_ARC_MMU_VER > 2) +#define ARC_REG_IC_PTAG 0x1E +#endif + +/* Bit val in IC_CTRL */ +#define IC_CTRL_CACHE_DISABLE 0x1 + +/* Data cache related Auxiliary registers */ +#define ARC_REG_DC_BCR 0x72 /* Build Config reg */ +#define ARC_REG_DC_IVDC 0x47 +#define ARC_REG_DC_CTRL 0x48 +#define ARC_REG_DC_IVDL 0x4A +#define ARC_REG_DC_FLSH 0x4B +#define ARC_REG_DC_FLDL 0x4C +#if (CONFIG_ARC_MMU_VER > 2) +#define ARC_REG_DC_PTAG 0x5C +#endif + +/* Bit val in DC_CTRL */ +#define DC_CTRL_INV_MODE_FLUSH 0x40 +#define DC_CTRL_FLUSH_STATUS 0x100 + char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; @@ -89,8 +116,10 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) enb ? "" : "DISABLED (kernel-build)"); \ } - PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache"); - PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache"); + PR_CACHE(&cpuinfo_arc700[c].icache, IS_ENABLED(CONFIG_ARC_HAS_ICACHE), + "I-Cache"); + PR_CACHE(&cpuinfo_arc700[c].dcache, IS_ENABLED(CONFIG_ARC_HAS_DCACHE), + "D-Cache"); return buf; } @@ -100,17 +129,23 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) * the cpuinfo structure for later use. * No Validation done here, simply read/convert the BCRs */ -void __cpuinit read_decode_cache_bcr(void) +void read_decode_cache_bcr(void) { - struct bcr_cache ibcr, dbcr; struct cpuinfo_arc_cache *p_ic, *p_dc; unsigned int cpu = smp_processor_id(); + struct bcr_cache { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif + } ibcr, dbcr; p_ic = &cpuinfo_arc700[cpu].icache; READ_BCR(ARC_REG_IC_BCR, ibcr); - if (ibcr.config == 0x3) - p_ic->assoc = 2; + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ p_ic->line_len = 8 << ibcr.line_len; p_ic->sz = 0x200 << ibcr.sz; p_ic->ver = ibcr.ver; @@ -118,8 +153,8 @@ void __cpuinit read_decode_cache_bcr(void) p_dc = &cpuinfo_arc700[cpu].dcache; READ_BCR(ARC_REG_DC_BCR, dbcr); - if (dbcr.config == 0x2) - p_dc->assoc = 4; + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ p_dc->line_len = 16 << dbcr.line_len; p_dc->sz = 0x200 << dbcr.sz; p_dc->ver = dbcr.ver; @@ -132,14 +167,12 @@ void __cpuinit read_decode_cache_bcr(void) * 3. Enable the Caches, setup default flush mode for D-Cache * 3. Calculate the SHMLBA used by user space */ -void __cpuinit arc_cache_init(void) +void arc_cache_init(void) { - unsigned int temp; unsigned int cpu = smp_processor_id(); struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - int way_pg_ratio = way_pg_ratio; - int dcache_does_alias; + unsigned int dcache_does_alias, temp; char str[256]; printk(arc_cache_mumbojumbo(0, str, sizeof(str))); @@ -149,20 +182,11 @@ void __cpuinit arc_cache_init(void) #ifdef CONFIG_ARC_HAS_ICACHE /* 1. Confirm some of I-cache params which Linux assumes */ - if ((ic->assoc != ARC_ICACHE_WAYS) || - (ic->line_len != ARC_ICACHE_LINE_LEN)) { + if (ic->line_len != ARC_ICACHE_LINE_LEN) panic("Cache H/W doesn't match kernel Config"); - } -#if (CONFIG_ARC_MMU_VER > 2) - if (ic->ver != 3) { - if (running_on_hw) - panic("Cache ver doesn't match MMU ver\n"); - - /* For ISS - suggest the toggles to use */ - pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n"); - } -#endif + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver doesn't match MMU ver\n"); #endif /* Enable/disable I-Cache */ @@ -181,14 +205,12 @@ chk_dc: return; #ifdef CONFIG_ARC_HAS_DCACHE - if ((dc->assoc != ARC_DCACHE_WAYS) || - (dc->line_len != ARC_DCACHE_LINE_LEN)) { + if (dc->line_len != ARC_DCACHE_LINE_LEN) panic("Cache H/W doesn't match kernel Config"); - } - - dcache_does_alias = (dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE; /* check for D-Cache aliasing */ + dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; + if (dcache_does_alias && !cache_is_vipt_aliasing()) panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); else if (!dcache_does_alias && cache_is_vipt_aliasing()) @@ -239,11 +261,9 @@ static inline void wait_for_flush(void) */ static inline void __dc_entire_op(const int cacheop) { - unsigned long flags, tmp = tmp; + unsigned int tmp = tmp; int aux; - local_irq_save(flags); - if (cacheop == OP_FLUSH_N_INV) { /* Dcache provides 2 cmd: FLUSH or INV * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE @@ -267,8 +287,6 @@ static inline void __dc_entire_op(const int cacheop) /* Switch back the DISCARD ONLY Invalidate mode */ if (cacheop == OP_FLUSH_N_INV) write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH); - - local_irq_restore(flags); } /* @@ -459,8 +477,15 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, local_irq_restore(flags); } +static inline void __ic_entire_inv(void) +{ + write_aux_reg(ARC_REG_IC_IVIC, 1); + read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ +} + #else +#define __ic_entire_inv() #define __ic_line_inv_vaddr(pstart, vstart, sz) #endif /* CONFIG_ARC_HAS_ICACHE */ @@ -487,7 +512,7 @@ void flush_dcache_page(struct page *page) struct address_space *mapping; if (!cache_is_vipt_aliasing()) { - set_bit(PG_arch_1, &page->flags); + clear_bit(PG_dc_clean, &page->flags); return; } @@ -501,7 +526,7 @@ void flush_dcache_page(struct page *page) * Make a note that K-mapping is dirty */ if (!mapping_mapped(mapping)) { - set_bit(PG_arch_1, &page->flags); + clear_bit(PG_dc_clean, &page->flags); } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ @@ -629,26 +654,13 @@ void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } -void flush_icache_all(void) -{ - unsigned long flags; - - local_irq_save(flags); - - write_aux_reg(ARC_REG_IC_IVIC, 1); - - /* lr will not complete till the icache inv operation is not over */ - read_aux_reg(ARC_REG_IC_CTRL); - local_irq_restore(flags); -} - noinline void flush_cache_all(void) { unsigned long flags; local_irq_save(flags); - flush_icache_all(); + __ic_entire_inv(); __dc_entire_op(OP_FLUSH_N_INV); local_irq_restore(flags); @@ -667,7 +679,12 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, { unsigned int paddr = pfn << PAGE_SHIFT; - __sync_icache_dcache(paddr, u_vaddr, PAGE_SIZE); + u_vaddr &= PAGE_MASK; + + ___flush_dcache_page(paddr, u_vaddr); + + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, u_vaddr); } void flush_cache_range(struct vm_area_struct *vma, unsigned long start, @@ -717,7 +734,7 @@ void copy_user_highpage(struct page *to, struct page *from, * non copied user pages (e.g. read faults which wire in pagecache page * directly). */ - set_bit(PG_arch_1, &to->flags); + clear_bit(PG_dc_clean, &to->flags); /* * if SRC was already usermapped and non-congruent to kernel mapping @@ -725,15 +742,16 @@ void copy_user_highpage(struct page *to, struct page *from, */ if (clean_src_k_mappings) { __flush_dcache_page(kfrom, kfrom); + set_bit(PG_dc_clean, &from->flags); } else { - set_bit(PG_arch_1, &from->flags); + clear_bit(PG_dc_clean, &from->flags); } } void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) { clear_page(to); - set_bit(PG_arch_1, &page->flags); + clear_bit(PG_dc_clean, &page->flags); } diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 689ffd86d5e9..0fd1f0d515ff 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/kdebug.h> #include <asm/pgalloc.h> +#include <asm/mmu.h> static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) { @@ -51,14 +52,14 @@ bad_area: return 1; } -void do_page_fault(struct pt_regs *regs, int write, unsigned long address, - unsigned long cause_code) +void do_page_fault(struct pt_regs *regs, unsigned long address) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; siginfo_t info; int fault, ret; + int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0); @@ -109,7 +110,8 @@ good_area: /* Handle protection violation, execute on heap or stack */ - if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH)) + if ((regs->ecr_vec == ECR_V_PROTV) && + (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) goto bad_area; if (write) { @@ -176,7 +178,6 @@ bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; - tsk->thread.cause_code = cause_code; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ @@ -197,7 +198,7 @@ no_context: if (fixup_exception(regs)) return; - die("Oops", regs, address, cause_code); + die("Oops", regs, address); out_of_memory: if (is_global_init(tsk)) { @@ -206,8 +207,10 @@ out_of_memory: } up_read(&mm->mmap_sem); - if (user_mode(regs)) - do_group_exit(SIGKILL); /* This will never return */ + if (user_mode(regs)) { + pagefault_out_of_memory(); + return; + } goto no_context; @@ -218,7 +221,6 @@ do_sigbus: goto no_context; tsk->thread.fault_address = address; - tsk->thread.cause_code = cause_code; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 4a177365b2c4..a08ce7185423 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -74,7 +74,7 @@ void __init setup_arch_memory(void) /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ max_low_pfn = max_pfn = PFN_DOWN(end_mem); - max_mapnr = num_physpages = max_low_pfn - min_low_pfn; + max_mapnr = max_low_pfn - min_low_pfn; /*------------- reserve kernel image -----------------------*/ memblock_reserve(CONFIG_LINUX_LINK_BASE, @@ -84,7 +84,7 @@ void __init setup_arch_memory(void) /*-------------- node setup --------------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = num_physpages; + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; /* * We can't use the helper free_area_init(zones[]) because it uses @@ -106,39 +106,9 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { - int codesize, datasize, initsize, reserved_pages, free_pages; - int tmp; - high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz); - - totalram_pages = free_all_bootmem(); - - /* count all reserved pages [kernel code/data/mem_map..] */ - reserved_pages = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) - if (PageReserved(mem_map + tmp)) - reserved_pages++; - - /* XXX: nr_free_pages() is equivalent */ - free_pages = max_mapnr - reserved_pages; - - /* - * For the purpose of display below, split the "reserve mem" - * kernel code/data is already shown explicitly, - * Show any other reservations (mem_map[ ] et al) - */ - reserved_pages -= (((unsigned int)_end - CONFIG_LINUX_LINK_BASE) >> - PAGE_SHIFT); - - codesize = _etext - _text; - datasize = _end - _etext; - initsize = __init_end - __init_begin; - - pr_info("Memory Available: %dM / %ldM (%dK code, %dK data, %dK init, %dK reserv)\n", - PAGES_TO_MB(free_pages), - TO_MB(arc_mem_sz), - TO_KB(codesize), TO_KB(datasize), TO_KB(initsize), - PAGES_TO_KB(reserved_pages)); + free_all_bootmem(); + mem_init_print_info(NULL); } /* @@ -146,13 +116,13 @@ void __init mem_init(void) */ void __init_refok free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index fe1c5a073afe..7957dc4e4d4a 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -55,7 +55,7 @@ #include <asm/arcregs.h> #include <asm/setup.h> #include <asm/mmu_context.h> -#include <asm/tlb.h> +#include <asm/mmu.h> /* Need for ARC MMU v2 * @@ -97,6 +97,7 @@ * J-TLB entry got evicted/replaced. */ + /* A copy of the ASID from the PID reg is kept in asid_cache */ int asid_cache = FIRST_ASID; @@ -432,9 +433,14 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); + if (page == ZERO_PAGE(0)) { + return; + } + /* * Exec page : Independent of aliasing/page-color considerations, * since icache doesn't snoop dcache on ARC, any dirty @@ -446,9 +452,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, */ if ((vma->vm_flags & VM_EXEC) || addr_not_cache_congruent(paddr, vaddr)) { - struct page *page = pfn_to_page(pte_pfn(*ptep)); - int dirty = test_and_clear_bit(PG_arch_1, &page->flags); + int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); if (dirty) { /* wback + inv dcache lines */ __flush_dcache_page(paddr, paddr); @@ -464,12 +469,27 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs */ -void __cpuinit read_decode_mmu_bcr(void) +void read_decode_mmu_bcr(void) { - unsigned int tmp; - struct bcr_mmu_1_2 *mmu2; /* encoded MMU2 attr */ - struct bcr_mmu_3 *mmu3; /* encoded MMU3 attr */ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int tmp; + struct bcr_mmu_1_2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; +#else + unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; +#endif + } *mmu2; + + struct bcr_mmu_3 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + u_itlb:4, u_dtlb:4; +#else + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + ways:4, ver:8; +#endif + } *mmu3; tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); @@ -505,12 +525,12 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n", p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - __CONFIG_ARC_MMU_SASID_VAL ? "SASID" : ""); + IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : ""); return buf; } -void __cpuinit arc_mmu_init(void) +void arc_mmu_init(void) { char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 3357d26ffe54..5c5bb23001b0 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -39,7 +39,7 @@ #include <linux/linkage.h> #include <asm/entry.h> -#include <asm/tlb.h> +#include <asm/mmu.h> #include <asm/pgtable.h> #include <asm/arcregs.h> #include <asm/cache.h> @@ -147,9 +147,9 @@ ex_saved_reg1: #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT and.f 0, r0, _PAGE_PRESENT bz 1f - ld r2, [num_pte_not_present] - add r2, r2, 1 - st r2, [num_pte_not_present] + ld r3, [num_pte_not_present] + add r3, r3, 1 + st r3, [num_pte_not_present] 1: #endif @@ -271,22 +271,22 @@ ARC_ENTRY EV_TLBMissI #endif ;---------------------------------------------------------------- - ; Get the PTE corresponding to V-addr accessed + ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA LOAD_FAULT_PTE ;---------------------------------------------------------------- ; VERIFY_PTE: Check if PTE permissions approp for executing code cmp_s r2, VMALLOC_START - mov.lo r2, (_PAGE_PRESENT | _PAGE_U_READ | _PAGE_U_EXECUTE) - mov.hs r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE) + mov.lo r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE) + mov.hs r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE) and r3, r0, r2 ; Mask out NON Flag bits from PTE xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) bnz do_slow_path_pf ; Let Linux VM know that the page was accessed - or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; set Accessed Bit - st_s r0, [r1] ; Write back PTE + or r0, r0, _PAGE_ACCESSED ; set Accessed Bit + st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB COMMIT_ENTRY_TO_MMU @@ -311,7 +311,7 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed - ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE + ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA LOAD_FAULT_PTE ;---------------------------------------------------------------- @@ -345,7 +345,7 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty lr r3, [ecr] - or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; Accessed bit always + or r0, r0, _PAGE_ACCESSED ; Accessed bit always btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE @@ -381,18 +381,7 @@ do_slow_path_pf: ; ------- setup args for Linux Page fault Hanlder --------- mov_s r0, sp - lr r2, [efa] - lr r3, [ecr] - - ; Both st and ex imply WRITE access of some sort, hence do_page_fault( ) - ; invoked with write=1 for DTLB-st/ex Miss and write=0 for ITLB miss or - ; DTLB-ld Miss - ; DTLB Miss Cause code is ld = 0x01 , st = 0x02, ex = 0x03 - ; Following code uses that fact that st/ex have one bit in common - - btst_s r3, ECR_C_BIT_DTLB_ST_MISS - mov.z r1, 0 - mov.nz r1, 1 + lr r1, [efa] ; We don't want exceptions to be disabled while the fault is handled. ; Now that we have saved the context we return from exception hence diff --git a/arch/arc/plat-arcfpga/include/plat/irq.h b/arch/arc/plat-arcfpga/include/plat/irq.h index 41e335670f60..6adbc53c3a5b 100644 --- a/arch/arc/plat-arcfpga/include/plat/irq.h +++ b/arch/arc/plat-arcfpga/include/plat/irq.h @@ -16,8 +16,6 @@ #define UART1_IRQ 10 #define UART2_IRQ 11 -#define VMAC_IRQ 6 - #define IDE_IRQ 13 #define PCI_IRQ 14 #define PS2_IRQ 15 diff --git a/arch/arc/plat-arcfpga/include/plat/memmap.h b/arch/arc/plat-arcfpga/include/plat/memmap.h index 1663f3388085..5c78e6135a1f 100644 --- a/arch/arc/plat-arcfpga/include/plat/memmap.h +++ b/arch/arc/plat-arcfpga/include/plat/memmap.h @@ -15,8 +15,6 @@ #define UART0_BASE 0xC0FC1000 #define UART1_BASE 0xC0FC1100 -#define VMAC_REG_BASEADDR 0xC0FC2000 - #define IDE_CONTROLLER_BASE 0xC0FC9000 #define AHB_PCI_HOST_BRG_BASE 0xC0FD0000 diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c index b3700c064c06..d71f3c3bcf24 100644 --- a/arch/arc/plat-arcfpga/platform.c +++ b/arch/arc/plat-arcfpga/platform.c @@ -77,6 +77,7 @@ static void __init setup_bvci_lat_unit(void) /*----------------------- Platform Devices -----------------------------*/ +#if IS_ENABLED(CONFIG_SERIAL_ARC) static unsigned long arc_uart_info[] = { 0, /* uart->is_emulated (runtime @running_on_hw) */ 0, /* uart->port.uartclk */ @@ -115,7 +116,7 @@ static struct platform_device arc_uart0_dev = { static struct platform_device *fpga_early_devs[] __initdata = { &arc_uart0_dev, }; -#endif +#endif /* CONFIG_SERIAL_ARC_CONSOLE */ static void arc_fpga_serial_init(void) { @@ -152,8 +153,13 @@ static void arc_fpga_serial_init(void) * otherwise the early console never gets a chance to run. */ add_preferred_console("ttyARC", 0, "115200"); -#endif +#endif /* CONFIG_SERIAL_ARC_CONSOLE */ +} +#else /* !IS_ENABLED(CONFIG_SERIAL_ARC) */ +static void arc_fpga_serial_init(void) +{ } +#endif static void __init plat_fpga_early_init(void) { @@ -169,7 +175,7 @@ static void __init plat_fpga_early_init(void) } static struct of_dev_auxdata plat_auxdata_lookup[] __initdata = { -#if defined(CONFIG_SERIAL_ARC) || defined(CONFIG_SERIAL_ARC_MODULE) +#if IS_ENABLED(CONFIG_SERIAL_ARC) OF_DEV_AUXDATA("snps,arc-uart", UART0_BASE, "arc-uart", arc_uart_info), #endif {} diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig index 1d3452100f1f..1ab386bb5da8 100644 --- a/arch/arc/plat-tb10x/Kconfig +++ b/arch/arc/plat-tb10x/Kconfig @@ -22,6 +22,7 @@ menuconfig ARC_PLAT_TB10X select PINCTRL select PINMUX select ARCH_REQUIRE_GPIOLIB + select TB10X_IRQC help Support for platforms based on the TB10x home media gateway SOC by Abilis Systems. TB10x is based on the ARC700 CPU architecture. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index de7049bdea85..0ac9be677ebb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER @@ -40,6 +41,7 @@ config ARM select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ @@ -175,6 +177,9 @@ config ARCH_HAS_CPUFREQ and that the relevant menu configurations are displayed for it. +config ARCH_HAS_BANDGAP + bool + config GENERIC_HWEIGHT bool default y @@ -1450,7 +1455,7 @@ config SMP depends on CPU_V6K || CPU_V7 depends on GENERIC_CLOCKEVENTS depends on HAVE_SMP - depends on MMU + depends on MMU || ARM_MPU select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have @@ -1471,7 +1476,7 @@ config SMP config SMP_ON_UP bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)" - depends on SMP && !XIP_KERNEL + depends on SMP && !XIP_KERNEL && MMU default y help SMP kernels contain instructions which fail on non-SMP processors. @@ -1744,6 +1749,14 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + depends on ARM_LPAE + +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + depends on ARM_LPAE + source "mm/Kconfig" config FORCE_MAX_ZONEORDER diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index c859495da480..aed66d5df7f1 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -50,3 +50,15 @@ config REMAP_VECTORS_TO_RAM Otherwise, say 'y' here. In this case, the kernel will require external support to redirect the hardware exception vectors to the writable versions located at DRAM_BASE. + +config ARM_MPU + bool 'Use the ARM v7 PMSA Compliant MPU' + depends on CPU_V7 + default y + help + Some ARM systems without an MMU have instead a Memory Protection + Unit (MPU) that defines the type and permissions for regions of + memory. + + If your CPU has an MPU then you should choose 'y' here unless you + know that you do not want to use the MPU. diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 77c1411c9a91..5b7be8d975b5 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -550,6 +550,13 @@ choice of the tiles using the RS1 memory map, including all new A-class core tiles, FPGA-based SMMs and software models. + config DEBUG_VEXPRESS_UART0_CRX + bool "Use PL011 UART0 at 0xb0090000 (Cortex-R compliant tiles)" + depends on ARCH_VEXPRESS && !MMU + help + This option selects UART0 at 0xb0090000. This is appropriate for + Cortex-R series tiles and SMMs, such as Cortex-R5 and Cortex-R7 + config DEBUG_VT8500_UART0 bool "Use UART0 on VIA/Wondermedia SoCs" depends on ARCH_VT8500 @@ -789,7 +796,8 @@ config DEBUG_LL_INCLUDE default "debug/u300.S" if DEBUG_U300_UART default "debug/ux500.S" if DEBUG_UX500_UART default "debug/vexpress.S" if DEBUG_VEXPRESS_UART0_DETECT || \ - DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1 + DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1 || \ + DEBUG_VEXPRESS_UART0_CRX default "debug/vt8500.S" if DEBUG_VT8500_UART0 default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1 default "mach/debug-macro.S" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 72caf82a8280..c0ac0f5e5e5c 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -59,38 +59,44 @@ comma = , # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) +arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m +arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) +arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) +arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) endif -arch-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) -arch-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -march=armv4t -arch-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 -march=armv4 -arch-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 -march=armv3 +arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) +arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t +arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 +arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3 + +# Evaluate arch cc-option calls now +arch-y := $(arch-y) # This selects how we optimise for the processor. -tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM720T) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM740T) :=-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM9TDMI) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM940T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) :=$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) -tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 -tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_XSC3) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_FEROCEON) :=$(call cc-option,-mtune=marvell-f,-mtune=xscale) -tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) -tune-$(CONFIG_CPU_V6K) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_ARM7TDMI) =-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) +tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi +tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 +tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 +tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale +tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale +tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale) +tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) + +# Evaluate tune cc-option calls now +tune-y := $(tune-y) ifeq ($(CONFIG_AEABI),y) CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork @@ -295,9 +301,10 @@ zImage Image xipImage bootpImage uImage: vmlinux zinstall uinstall install: vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ -%.dtb: scripts +%.dtb: | scripts $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@ +PHONY += dtbs dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) dtbs diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index f79a08efe000..47279aa96a6a 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -6,6 +6,7 @@ piggy.gzip piggy.lzo piggy.lzma piggy.xzkern +piggy.lz4 vmlinux vmlinux.lds diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 120b83bfde20..7ac1610252ba 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -27,7 +27,7 @@ OBJS += misc.o decompress.o ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o endif -FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c +FONTC = $(srctree)/lib/fonts/font_acorn_8x8.c # string library code (-Os is enforced to keep it much smaller) OBJS += string.o @@ -91,6 +91,7 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo suffix_$(CONFIG_KERNEL_LZMA) = lzma suffix_$(CONFIG_KERNEL_XZ) = xzkern +suffix_$(CONFIG_KERNEL_LZ4) = lz4 # Borrowed libfdt files for the ATAG compatibility mode @@ -115,7 +116,7 @@ targets := vmlinux vmlinux.lds \ font.o font.c head.o misc.o $(OBJS) # Make sure files are removed during clean -extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \ +extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \ lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs) \ hyp-stub.S diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index aabc02a68482..d1153c8a765a 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path, return fdt_getprop(fdt, offset, property, len); } +static uint32_t get_cell_size(const void *fdt) +{ + int len; + uint32_t cell_size = 1; + const uint32_t *size_len = getprop(fdt, "/", "#size-cells", &len); + + if (size_len) + cell_size = fdt32_to_cpu(*size_len); + return cell_size; +} + static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) { char cmdline[COMMAND_LINE_SIZE]; @@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) int atags_to_fdt(void *atag_list, void *fdt, int total_space) { struct tag *atag = atag_list; - uint32_t mem_reg_property[2 * NR_BANKS]; + /* In the case of 64 bits memory size, need to reserve 2 cells for + * address and size for each bank */ + uint32_t mem_reg_property[2 * 2 * NR_BANKS]; int memcount = 0; - int ret; + int ret, memsize; /* make sure we've got an aligned pointer */ if ((u32)atag_list & 0x3) @@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) continue; if (!atag->u.mem.size) continue; - mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start); - mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size); + memsize = get_cell_size(fdt); + + if (memsize == 2) { + /* if memsize is 2, that means that + * each data needs 2 cells of 32 bits, + * so the data are 64 bits */ + uint64_t *mem_reg_prop64 = + (uint64_t *)mem_reg_property; + mem_reg_prop64[memcount++] = + cpu_to_fdt64(atag->u.mem.start); + mem_reg_prop64[memcount++] = + cpu_to_fdt64(atag->u.mem.size); + } else { + mem_reg_property[memcount++] = + cpu_to_fdt32(atag->u.mem.start); + mem_reg_property[memcount++] = + cpu_to_fdt32(atag->u.mem.size); + } + } else if (atag->hdr.tag == ATAG_INITRD2) { uint32_t initrd_start, initrd_size; initrd_start = atag->u.initrd.start; @@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) } } - if (memcount) - setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount); + if (memcount) { + setprop(fdt, "/memory", "reg", mem_reg_property, + 4 * memcount * memsize); + } return fdt_pack(fdt); } diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index 24b0475cb8bf..bd245d34952d 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -51,6 +51,10 @@ extern char * strstr(const char * s1, const char *s2); #include "../../../../lib/decompress_unxz.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { return decompress(input, len, NULL, NULL, output, NULL, error); diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 032a8d987148..75189f13cf54 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -142,7 +142,6 @@ start: mov r7, r1 @ save architecture ID mov r8, r2 @ save atags pointer -#ifndef __ARM_ARCH_2__ /* * Booting from Angel - need to enter SVC mode and disable * FIQs/IRQs (numeric definitions from angel arm.h source). @@ -158,10 +157,6 @@ not_angel: safe_svcmode_maskall r0 msr spsr_cxsf, r9 @ Save the CPU boot mode in @ SPSR -#else - teqp pc, #0x0c000003 @ turn off interrupts -#endif - /* * Note that some cache flushing and other stuff may * be needed here - is there an Angel SWI call for this? @@ -183,7 +178,19 @@ not_angel: ldr r4, =zreladdr #endif - bl cache_on + /* + * Set up a page table only if it won't overwrite ourself. + * That means r4 < pc && r4 - 16k page directory > &_end. + * Given that r4 > &_end is most unfrequent, we add a rough + * additional 1MB of room for a possible appended DTB. + */ + mov r0, pc + cmp r0, r4 + ldrcc r0, LC0+32 + addcc r0, r0, pc + cmpcc r4, r0 + orrcc r4, r4, #1 @ remember we skipped cache_on + blcs cache_on restart: adr r0, LC0 ldmia r0, {r1, r2, r3, r6, r10, r11, r12} @@ -229,7 +236,7 @@ restart: adr r0, LC0 * r0 = delta * r2 = BSS start * r3 = BSS end - * r4 = final kernel address + * r4 = final kernel address (possibly with LSB set) * r5 = appended dtb size (still unknown) * r6 = _edata * r7 = architecture ID @@ -277,6 +284,7 @@ restart: adr r0, LC0 */ cmp r0, #1 sub r0, r4, #TEXT_OFFSET + bic r0, r0, #1 add r0, r0, #0x100 mov r1, r6 sub r2, sp, r6 @@ -323,12 +331,13 @@ dtb_check_done: /* * Check to see if we will overwrite ourselves. - * r4 = final kernel address + * r4 = final kernel address (possibly with LSB set) * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP * We basically want: * r4 - 16k page directory >= r10 -> OK * r4 + image length <= address of wont_overwrite -> OK + * Note: the possible LSB in r4 is harmless here. */ add r10, r10, #16384 cmp r4, r10 @@ -390,7 +399,8 @@ dtb_check_done: add sp, sp, r6 #endif - bl cache_clean_flush + tst r4, #1 + bleq cache_clean_flush adr r0, BSYM(restart) add r0, r0, r6 @@ -402,7 +412,7 @@ wont_overwrite: * r0 = delta * r2 = BSS start * r3 = BSS end - * r4 = kernel execution address + * r4 = kernel execution address (possibly with LSB set) * r5 = appended dtb size (0 if not present) * r7 = architecture ID * r8 = atags pointer @@ -465,6 +475,15 @@ not_relocated: mov r0, #0 cmp r2, r3 blo 1b + /* + * Did we skip the cache setup earlier? + * That is indicated by the LSB in r4. + * Do it now if so. + */ + tst r4, #1 + bic r4, r4, #1 + blne cache_on + /* * The C runtime environment should now be setup sufficiently. * Set up some pointers, and start decompressing. @@ -513,6 +532,7 @@ LC0: .word LC0 @ r1 .word _got_start @ r11 .word _got_end @ ip .word .L_user_stack_end @ sp + .word _end - restart + 16384 + 1024*1024 .size LC0, . - LC0 #ifdef CONFIG_ARCH_RPC diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S new file mode 100644 index 000000000000..3d9a575618a3 --- /dev/null +++ b/arch/arm/boot/compressed/piggy.lz4.S @@ -0,0 +1,6 @@ + .section .piggydata,#alloc + .globl input_data +input_data: + .incbin "arch/arm/boot/compressed/piggy.lz4" + .globl input_data_end +input_data_end: diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts index 04feaf8f1420..444b4ede0d60 100644 --- a/arch/arm/boot/dts/am335x-bone.dts +++ b/arch/arm/boot/dts/am335x-bone.dts @@ -214,10 +214,12 @@ &cpsw_emac0 { phy_id = <&davinci_mdio>, <0>; + phy-mode = "mii"; }; &cpsw_emac1 { phy_id = <&davinci_mdio>, <1>; + phy-mode = "mii"; }; &mac { diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index a16bb9691cc6..3aee1a43782d 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -467,8 +467,24 @@ &cpsw_emac0 { phy_id = <&davinci_mdio>, <0>; + phy-mode = "rgmii-txid"; }; &cpsw_emac1 { phy_id = <&davinci_mdio>, <1>; + phy-mode = "rgmii-txid"; +}; + +&tscadc { + status = "okay"; + tsc { + ti,wires = <4>; + ti,x-plate-resistance = <200>; + ti,coordiante-readouts = <5>; + ti,wire-config = <0x00 0x11 0x22 0x33>; + }; + + adc { + ti,adc-channels = <4 5 6 7>; + }; }; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 9e00eef9b74b..0c8ad173d2b0 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -392,3 +392,13 @@ pinctrl-0 = <&davinci_mdio_default>; pinctrl-1 = <&davinci_mdio_sleep>; }; + +&cpsw_emac0 { + phy_id = <&davinci_mdio>, <0>; + phy-mode = "rgmii-txid"; +}; + +&cpsw_emac1 { + phy_id = <&davinci_mdio>, <1>; + phy-mode = "rgmii-txid"; +}; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 0d4df90477f7..38b446ba1ce1 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -502,6 +502,23 @@ status = "disabled"; }; + tscadc: tscadc@44e0d000 { + compatible = "ti,am3359-tscadc"; + reg = <0x44e0d000 0x1000>; + interrupt-parent = <&intc>; + interrupts = <16>; + ti,hwmods = "adc_tsc"; + status = "disabled"; + + tsc { + compatible = "ti,am3359-tsc"; + }; + am335x_adc: adc { + #io-channel-cells = <1>; + compatible = "ti,am3359-adc"; + }; + }; + gpmc: gpmc@50000000 { compatible = "ti,am3352-gpmc"; ti,hwmods = "gpmc"; diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi index 7d1a27949c13..9866cd736dee 100644 --- a/arch/arm/boot/dts/atlas6.dtsi +++ b/arch/arm/boot/dts/atlas6.dtsi @@ -613,7 +613,7 @@ }; rtc-iobg { - compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus"; + compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus", "simple-bus"; #address-cells = <1>; #size-cells = <1>; reg = <0x80030000 0x10000>; diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index 17979d5f23b4..c0cdf66f8964 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -50,10 +50,10 @@ }; L2: l2-cache { - compatible = "arm,pl310-cache"; - reg = <0x3ff20000 0x1000>; - cache-unified; - cache-level = <2>; + compatible = "bcm,bcm11351-a2-pl310-cache"; + reg = <0x3ff20000 0x1000>; + cache-unified; + cache-level = <2>; }; timer@35006000 { diff --git a/arch/arm/boot/dts/cros5250-common.dtsi b/arch/arm/boot/dts/cros5250-common.dtsi index 3f0239ec1bc5..dc259e8b8a73 100644 --- a/arch/arm/boot/dts/cros5250-common.dtsi +++ b/arch/arm/boot/dts/cros5250-common.dtsi @@ -190,7 +190,7 @@ samsung,i2c-max-bus-freq = <66000>; hdmiddc@50 { - compatible = "samsung,exynos5-hdmiddc"; + compatible = "samsung,exynos4210-hdmiddc"; reg = <0x50>; }; }; @@ -224,7 +224,7 @@ samsung,i2c-max-bus-freq = <378000>; hdmiphy@38 { - compatible = "samsung,exynos5-hdmiphy"; + compatible = "samsung,exynos4212-hdmiphy"; reg = <0x38>; }; }; diff --git a/arch/arm/boot/dts/ecx-common.dtsi b/arch/arm/boot/dts/ecx-common.dtsi index d61b535f682a..e8559b753c9d 100644 --- a/arch/arm/boot/dts/ecx-common.dtsi +++ b/arch/arm/boot/dts/ecx-common.dtsi @@ -33,6 +33,8 @@ calxeda,port-phys = <&combophy5 0 &combophy0 0 &combophy0 1 &combophy0 2 &combophy0 3>; + calxeda,sgpio-gpio =<&gpioh 5 1 &gpioh 6 1 &gpioh 7 1>; + calxeda,led-order = <4 0 1 2 3>; }; sdhci@ffe0e000 { diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 35a66dee4011..49f18c24a576 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -105,7 +105,7 @@ samsung,i2c-max-bus-freq = <66000>; hdmiddc@50 { - compatible = "samsung,exynos5-hdmiddc"; + compatible = "samsung,exynos4210-hdmiddc"; reg = <0x50>; }; }; @@ -135,7 +135,7 @@ samsung,i2c-max-bus-freq = <66000>; hdmiphy@38 { - compatible = "samsung,exynos5-hdmiphy"; + compatible = "samsung,exynos4212-hdmiphy"; reg = <0x38>; }; }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 41cd625b6020..ef57277fc38f 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -599,7 +599,7 @@ }; hdmi { - compatible = "samsung,exynos5-hdmi"; + compatible = "samsung,exynos4212-hdmi"; reg = <0x14530000 0x70000>; interrupts = <0 95 0>; clocks = <&clock 333>, <&clock 136>, <&clock 137>, @@ -609,7 +609,7 @@ }; mixer { - compatible = "samsung,exynos5-mixer"; + compatible = "samsung,exynos5250-mixer"; reg = <0x14450000 0x10000>; interrupts = <0 94 0>; }; diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts index 3637bf3b1d59..1f0d38d7b16f 100644 --- a/arch/arm/boot/dts/imx28-evk.dts +++ b/arch/arm/boot/dts/imx28-evk.dts @@ -155,12 +155,14 @@ can0: can@80032000 { pinctrl-names = "default"; pinctrl-0 = <&can0_pins_a>; + xceiver-supply = <®_can_3v3>; status = "okay"; }; can1: can@80034000 { pinctrl-names = "default"; pinctrl-0 = <&can1_pins_a>; + xceiver-supply = <®_can_3v3>; status = "okay"; }; }; @@ -319,6 +321,16 @@ gpio = <&gpio3 30 0>; enable-active-high; }; + + reg_can_3v3: can-3v3 { + compatible = "regulator-fixed"; + regulator-name = "can-3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio2 13 0>; + enable-active-high; + }; + }; sound { diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 195451bf7706..6a8acb01b1d3 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -736,7 +736,7 @@ dcp@80028000 { reg = <0x80028000 0x2000>; interrupts = <52 53 54>; - status = "disabled"; + compatible = "fsl-dcp"; }; pxp@8002a000 { diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index af32eff9f4b7..7ef282795dd4 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -124,20 +124,21 @@ reg = <0 0 0>; /* CS0, offset 0 */ nand-bus-width = <16>; - gpmc,sync-clk = <0>; - gpmc,cs-on = <0>; - gpmc,cs-rd-off = <44>; - gpmc,cs-wr-off = <44>; - gpmc,adv-on = <6>; - gpmc,adv-rd-off = <34>; - gpmc,adv-wr-off = <44>; - gpmc,we-off = <40>; - gpmc,oe-off = <54>; - gpmc,access = <64>; - gpmc,rd-cycle = <82>; - gpmc,wr-cycle = <82>; - gpmc,wr-access = <40>; - gpmc,wr-data-mux-bus = <0>; + gpmc,device-nand; + gpmc,sync-clki-ps = <0>; + gpmc,cs-on-ns = <0>; + gpmc,cs-rd-off-ns = <44>; + gpmc,cs-wr-off-ns = <44>; + gpmc,adv-on-ns = <6>; + gpmc,adv-rd-off-ns = <34>; + gpmc,adv-wr-off-ns = <44>; + gpmc,we-off-ns = <40>; + gpmc,oe-off-ns = <54>; + gpmc,access-ns = <64>; + gpmc,rd-cycle-ns = <82>; + gpmc,wr-cycle-ns = <82>; + gpmc,wr-access-ns = <40>; + gpmc,wr-data-mux-bus-ns = <0>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi index 02edd8965f8a..05e9489cf95c 100644 --- a/arch/arm/boot/dts/prima2.dtsi +++ b/arch/arm/boot/dts/prima2.dtsi @@ -610,7 +610,7 @@ }; rtc-iobg { - compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus"; + compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus", "simple-bus"; #address-cells = <1>; #size-cells = <1>; reg = <0x80030000 0x10000>; diff --git a/arch/arm/boot/dts/sun4i-a10-cubieboard.dts b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts index 0e22a285dfe0..757c4cd900ee 100644 --- a/arch/arm/boot/dts/sun4i-a10-cubieboard.dts +++ b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts @@ -27,6 +27,21 @@ }; soc@01c20000 { + emac: ethernet@01c0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&emac_pins_a>; + phy = <&phy1>; + status = "okay"; + }; + + mdio@01c0b080 { + status = "okay"; + + phy1: ethernet-phy@1 { + reg = <1>; + }; + }; + pinctrl@01c20800 { led_pins_cubieboard: led_pins@0 { allwinner,pins = "PH20", "PH21"; diff --git a/arch/arm/boot/dts/sun4i-a10-hackberry.dts b/arch/arm/boot/dts/sun4i-a10-hackberry.dts index b9efac100c85..3514b37d66bc 100644 --- a/arch/arm/boot/dts/sun4i-a10-hackberry.dts +++ b/arch/arm/boot/dts/sun4i-a10-hackberry.dts @@ -23,10 +23,51 @@ }; soc@01c20000 { + emac: ethernet@01c0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&emac_pins_a>; + phy = <&phy0>; + status = "okay"; + }; + + mdio@01c0b080 { + phy-supply = <®_emac_3v3>; + status = "okay"; + + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + + pio: pinctrl@01c20800 { + pinctrl-names = "default"; + pinctrl-0 = <&hackberry_hogs>; + + hackberry_hogs: hogs@0 { + allwinner,pins = "PH19"; + allwinner,function = "gpio_out"; + allwinner,drive = <0>; + allwinner,pull = <0>; + }; + }; + uart0: serial@01c28000 { pinctrl-names = "default"; pinctrl-0 = <&uart0_pins_a>; status = "okay"; }; }; + + regulators { + compatible = "simple-bus"; + + reg_emac_3v3: emac-3v3 { + compatible = "regulator-fixed"; + regulator-name = "emac-3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + enable-active-high; + gpio = <&pio 7 19 0>; + }; + }; }; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 82e03d22f913..b2bd6e124250 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -167,6 +167,22 @@ reg = <0x01c20000 0x300000>; ranges; + emac: ethernet@01c0b000 { + compatible = "allwinner,sun4i-emac"; + reg = <0x01c0b000 0x1000>; + interrupts = <55>; + clocks = <&ahb_gates 17>; + status = "disabled"; + }; + + mdio@01c0b080 { + compatible = "allwinner,sun4i-mdio"; + reg = <0x01c0b080 0x14>; + status = "disabled"; + #address-cells = <1>; + #size-cells = <0>; + }; + intc: interrupt-controller@01c20400 { compatible = "allwinner,sun4i-ic"; reg = <0x01c20400 0x400>; @@ -226,6 +242,17 @@ allwinner,drive = <0>; allwinner,pull = <0>; }; + + emac_pins_a: emac0@0 { + allwinner,pins = "PA0", "PA1", "PA2", + "PA3", "PA4", "PA5", "PA6", + "PA7", "PA8", "PA9", "PA10", + "PA11", "PA12", "PA13", "PA14", + "PA15", "PA16"; + allwinner,function = "emac"; + allwinner,drive = <0>; + allwinner,pull = <0>; + }; }; timer@01c20c00 { diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 8178705c4b24..80f033614a1f 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -32,11 +32,11 @@ 1901: adr r0, 1902b bl printascii mov r0, r9 - bl printhex8 + bl printhex2 adr r0, 1903b bl printascii mov r0, r10 - bl printhex8 + bl printhex2 adr r0, 1904b bl printascii #endif diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 3caed0db6986..510e5b13aa2e 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -19,10 +19,6 @@ #include <asm/smp.h> #include <asm/smp_plat.h> -static void __init simple_smp_init_cpus(void) -{ -} - static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle) { unsigned int mpidr, pcpu, pcluster, ret; @@ -74,7 +70,6 @@ static void mcpm_cpu_die(unsigned int cpu) #endif static struct smp_operations __initdata mcpm_smp_ops = { - .smp_init_cpus = simple_smp_init_cpus, .smp_boot_secondary = mcpm_boot_secondary, .smp_secondary_init = mcpm_secondary_init, #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index ddc740769601..023ee63827a2 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -28,8 +28,8 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/hardware/arm_timer.h> #include <asm/hardware/timer-sp.h> diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig index e3bf2d65618e..65edf6d47215 100644 --- a/arch/arm/configs/bcm_defconfig +++ b/arch/arm/configs/bcm_defconfig @@ -78,6 +78,13 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_UNSAFE_RESUME=y +CONFIG_MMC_BLOCK_MINORS=32 +CONFIG_MMC_TEST=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_BCM_KONA=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 2ac0ffb12f03..a24c02443920 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -286,3 +286,4 @@ CONFIG_SOC_OMAP5=y CONFIG_TI_DAVINCI_MDIO=y CONFIG_TI_DAVINCI_CPDMA=y CONFIG_TI_CPSW=y +CONFIG_AT803X_PHY=y diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 7c1bfc0aea0c..accefe099182 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index cedd3721318b..6493802f880a 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -23,6 +23,11 @@ #define CR_RR (1 << 14) /* Round Robin cache replacement */ #define CR_L4 (1 << 15) /* LDR pc can set T bit */ #define CR_DT (1 << 16) +#ifdef CONFIG_MMU +#define CR_HA (1 << 17) /* Hardware management of Access Flag */ +#else +#define CR_BR (1 << 17) /* MPU Background region enable (PMSA) */ +#endif #define CR_IT (1 << 18) #define CR_ST (1 << 19) #define CR_FI (1 << 21) /* Fast interrupt (lower latency mode) */ diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index d7deb62554c9..8c25dc4e9851 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -8,6 +8,7 @@ #define CPUID_CACHETYPE 1 #define CPUID_TCM 2 #define CPUID_TLBTYPE 3 +#define CPUID_MPUIR 4 #define CPUID_MPIDR 5 #ifdef CONFIG_CPU_V7M diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index fe92ccf1d0b0..191ada6e4d2d 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -46,7 +46,7 @@ __rem; \ }) -#if __GNUC__ < 4 +#if __GNUC__ < 4 || !defined(CONFIG_AEABI) /* * gcc versions earlier than 4.0 are simply too problematic for the diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h index e6168c0c18e9..74a8b84f3cb1 100644 --- a/arch/arm/include/asm/glue-proc.h +++ b/arch/arm/include/asm/glue-proc.h @@ -230,21 +230,21 @@ # endif #endif -#ifdef CONFIG_CPU_PJ4B +#ifdef CONFIG_CPU_V7M # ifdef CPU_NAME # undef MULTI_CPU # define MULTI_CPU # else -# define CPU_NAME cpu_pj4b +# define CPU_NAME cpu_v7m # endif #endif -#ifdef CONFIG_CPU_V7M +#ifdef CONFIG_CPU_PJ4B # ifdef CPU_NAME # undef MULTI_CPU # define MULTI_CPU # else -# define CPU_NAME cpu_v7m +# define CPU_NAME cpu_pj4b # endif #endif diff --git a/arch/arm/include/asm/hardware/iop3xx.h b/arch/arm/include/asm/hardware/iop3xx.h index ed94b1a366ae..423744bf18eb 100644 --- a/arch/arm/include/asm/hardware/iop3xx.h +++ b/arch/arm/include/asm/hardware/iop3xx.h @@ -223,11 +223,12 @@ extern int iop3xx_get_init_atu(void); #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/reboot.h> void iop3xx_map_io(void); void iop_init_cp6_handler(void); void iop_init_time(unsigned long tickrate); -void iop3xx_restart(char, const char *); +void iop3xx_restart(enum reboot_mode, const char *); static inline u32 read_tmr0(void) { diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h new file mode 100644 index 000000000000..d4014fbe5ea3 --- /dev/null +++ b/arch/arm/include/asm/hugetlb-3level.h @@ -0,0 +1,71 @@ +/* + * arch/arm/include/asm/hugetlb-3level.h + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_ARM_HUGETLB_3LEVEL_H +#define _ASM_ARM_HUGETLB_3LEVEL_H + + +/* + * If our huge pte is non-zero then mark the valid bit. + * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero + * ptes. + * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes). + */ +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + pte_t retval = *ptep; + if (pte_val(retval)) + pte_val(retval) |= L_PTE_VALID; + return retval; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */ diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h new file mode 100644 index 000000000000..1f1b1cd112f3 --- /dev/null +++ b/arch/arm/include/asm/hugetlb.h @@ -0,0 +1,84 @@ +/* + * arch/arm/include/asm/hugetlb.h + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_ARM_HUGETLB_H +#define _ASM_ARM_HUGETLB_H + +#include <asm/page.h> +#include <asm-generic/hugetlb.h> + +#include <asm/hugetlb-3level.h> + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* _ASM_ARM_HUGETLB_H */ diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 652b56086de7..d070741b2b37 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -130,16 +130,16 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) */ extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long, size_t, unsigned int, void *); -extern void __iomem *__arm_ioremap_caller(unsigned long, size_t, unsigned int, +extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, void *); extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap(unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap_exec(unsigned long, size_t, bool cached); +extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int); +extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); extern void __iounmap(volatile void __iomem *addr); extern void __arm_iounmap(volatile void __iomem *addr); -extern void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, +extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); extern void (*arch_iounmap)(volatile void __iomem *); diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h deleted file mode 100644 index 68cb9e1dfb81..000000000000 --- a/arch/arm/include/asm/kvm_arch_timer.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __ASM_ARM_KVM_ARCH_TIMER_H -#define __ASM_ARM_KVM_ARCH_TIMER_H - -#include <linux/clocksource.h> -#include <linux/hrtimer.h> -#include <linux/workqueue.h> - -struct arch_timer_kvm { -#ifdef CONFIG_KVM_ARM_TIMER - /* Is the timer enabled */ - bool enabled; - - /* Virtual offset */ - cycle_t cntvoff; -#endif -}; - -struct arch_timer_cpu { -#ifdef CONFIG_KVM_ARM_TIMER - /* Registers: control register, timer value */ - u32 cntv_ctl; /* Saved/restored */ - cycle_t cntv_cval; /* Saved/restored */ - - /* - * Anything that is not used directly from assembly code goes - * here. - */ - - /* Background timer used when the guest is not running */ - struct hrtimer timer; - - /* Work queued with the above timer expires */ - struct work_struct expired; - - /* Background timer active */ - bool armed; - - /* Timer IRQ */ - const struct kvm_irq_level *irq; -#endif -}; - -#ifdef CONFIG_KVM_ARM_TIMER -int kvm_timer_hyp_init(void); -int kvm_timer_init(struct kvm *kvm); -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); -#else -static inline int kvm_timer_hyp_init(void) -{ - return 0; -}; - -static inline int kvm_timer_init(struct kvm *kvm) -{ - return 0; -} - -static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} -#endif - -#endif diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 124623e5ef14..64e96960de29 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -135,7 +135,6 @@ #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) -#define S2_PGD_SIZE (1 << S2_PGD_ORDER) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 18d50322a9e2..a2f43ddcc300 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -37,16 +37,18 @@ #define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ #define c6_DFAR 16 /* Data Fault Address Register */ #define c6_IFAR 17 /* Instruction Fault Address Register */ -#define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */ -#define c10_PRRR 19 /* Primary Region Remap Register */ -#define c10_NMRR 20 /* Normal Memory Remap Register */ -#define c12_VBAR 21 /* Vector Base Address Register */ -#define c13_CID 22 /* Context ID Register */ -#define c13_TID_URW 23 /* Thread ID, User R/W */ -#define c13_TID_URO 24 /* Thread ID, User R/O */ -#define c13_TID_PRIV 25 /* Thread ID, Privileged */ -#define c14_CNTKCTL 26 /* Timer Control Register (PL1) */ -#define NR_CP15_REGS 27 /* Number of regs (incl. invalid) */ +#define c7_PAR 18 /* Physical Address Register */ +#define c7_PAR_high 19 /* PAR top 32 bits */ +#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ +#define c10_PRRR 21 /* Primary Region Remap Register */ +#define c10_NMRR 22 /* Normal Memory Remap Register */ +#define c12_VBAR 23 /* Vector Base Address Register */ +#define c13_CID 24 /* Context ID Register */ +#define c13_TID_URW 25 /* Thread ID, User R/W */ +#define c13_TID_URO 26 /* Thread ID, User R/O */ +#define c13_TID_PRIV 27 /* Thread ID, Privileged */ +#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ +#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 @@ -72,8 +74,6 @@ extern char __kvm_hyp_vector[]; extern char __kvm_hyp_code_start[]; extern char __kvm_hyp_code_end[]; -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 82b4babead2c..a464e8d7b6c5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) return cpsr_mode > USR_MODE;; } -static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) -{ - return reg == 15; -} - static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) { return vcpu->arch.fault.hsr; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786a6203..7d22517d8071 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -23,9 +23,14 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/fpstate.h> -#include <asm/kvm_arch_timer.h> +#include <kvm/arm_arch_timer.h> +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -38,7 +43,7 @@ #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) -#include <asm/kvm_vgic.h> +#include <kvm/arm_vgic.h> struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); @@ -190,8 +195,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, - unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h deleted file mode 100644 index 343744e4809c..000000000000 --- a/arch/arm/include/asm/kvm_vgic.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __ASM_ARM_KVM_VGIC_H -#define __ASM_ARM_KVM_VGIC_H - -#include <linux/kernel.h> -#include <linux/kvm.h> -#include <linux/irqreturn.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/irqchip/arm-gic.h> - -#define VGIC_NR_IRQS 128 -#define VGIC_NR_SGIS 16 -#define VGIC_NR_PPIS 16 -#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) -#define VGIC_MAX_CPUS KVM_MAX_VCPUS -#define VGIC_MAX_LRS (1 << 6) - -/* Sanity checks... */ -#if (VGIC_MAX_CPUS > 8) -#error Invalid number of CPU interfaces -#endif - -#if (VGIC_NR_IRQS & 31) -#error "VGIC_NR_IRQS must be a multiple of 32" -#endif - -#if (VGIC_NR_IRQS > 1024) -#error "VGIC_NR_IRQS must be <= 1024" -#endif - -/* - * The GIC distributor registers describing interrupts have two parts: - * - 32 per-CPU interrupts (SGI + PPI) - * - a bunch of shared interrupts (SPI) - */ -struct vgic_bitmap { - union { - u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); - } percpu[VGIC_MAX_CPUS]; - union { - u32 reg[VGIC_NR_SHARED_IRQS / 32]; - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); - } shared; -}; - -struct vgic_bytemap { - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; - u32 shared[VGIC_NR_SHARED_IRQS / 4]; -}; - -struct vgic_dist { -#ifdef CONFIG_KVM_ARM_VGIC - spinlock_t lock; - bool ready; - - /* Virtual control interface mapping */ - void __iomem *vctrl_base; - - /* Distributor and vcpu interface mapping in the guest */ - phys_addr_t vgic_dist_base; - phys_addr_t vgic_cpu_base; - - /* Distributor enabled */ - u32 enabled; - - /* Interrupt enabled (one bit per IRQ) */ - struct vgic_bitmap irq_enabled; - - /* Interrupt 'pin' level */ - struct vgic_bitmap irq_state; - - /* Level-triggered interrupt in progress */ - struct vgic_bitmap irq_active; - - /* Interrupt priority. Not used yet. */ - struct vgic_bytemap irq_priority; - - /* Level/edge triggered */ - struct vgic_bitmap irq_cfg; - - /* Source CPU per SGI and target CPU */ - u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; - - /* Target CPU for each IRQ */ - u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; - struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; - - /* Bitmap indicating which CPU has something pending */ - unsigned long irq_pending_on_cpu; -#endif -}; - -struct vgic_cpu { -#ifdef CONFIG_KVM_ARM_VGIC - /* per IRQ to LR mapping */ - u8 vgic_irq_lr_map[VGIC_NR_IRQS]; - - /* Pending interrupts on this VCPU */ - DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); - - /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); - - /* Number of list registers on this CPU */ - int nr_lr; - - /* CPU vif control registers for world switch */ - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_MAX_LRS]; -#endif -}; - -#define LR_EMPTY 0xff - -struct kvm; -struct kvm_vcpu; -struct kvm_run; -struct kvm_exit_mmio; - -#ifdef CONFIG_KVM_ARM_VGIC -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); -int kvm_vgic_hyp_init(void); -int kvm_vgic_init(struct kvm *kvm); -int kvm_vgic_create(struct kvm *kvm); -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level); -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio); - -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) -#define vgic_initialized(k) ((k)->arch.vgic.ready) - -#else -static inline int kvm_vgic_hyp_init(void) -{ - return 0; -} - -static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - return 0; -} - -static inline int kvm_vgic_init(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_create(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - return false; -} - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 0; -} - -static inline bool vgic_initialized(struct kvm *kvm) -{ - return true; -} -#endif - -#endif diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 75bf07910b81..441efc491b50 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -11,6 +11,7 @@ #include <linux/types.h> #ifndef __ASSEMBLY__ +#include <linux/reboot.h> struct tag; struct meminfo; @@ -43,7 +44,7 @@ struct machine_desc { unsigned char reserve_lp0 :1; /* never has lp0 */ unsigned char reserve_lp1 :1; /* never has lp1 */ unsigned char reserve_lp2 :1; /* never has lp2 */ - char restart_mode; /* default restart mode */ + enum reboot_mode reboot_mode; /* default restart mode */ struct smp_operations *smp; /* SMP operations */ bool (*smp_init)(void); void (*fixup)(struct tag *, char **, @@ -58,7 +59,7 @@ struct machine_desc { #ifdef CONFIG_MULTI_IRQ_HANDLER void (*handle_irq)(struct pt_regs *); #endif - void (*restart)(char, const char *); + void (*restart)(enum reboot_mode, const char *); }; /* diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 57870ab313c5..e750a938fd3c 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,6 +18,8 @@ #include <linux/types.h> #include <linux/sizes.h> +#include <asm/cache.h> + #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif @@ -141,6 +143,20 @@ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) +/* + * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed + * around in head.S and proc-*.S are shifted by this amount, in order to + * leave spare high bits for systems with physical address extension. This + * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but + * gives us about 38-bits or so. + */ +#ifdef CONFIG_ARM_LPAE +#define ARCH_PGD_SHIFT L1_CACHE_SHIFT +#else +#define ARCH_PGD_SHIFT 0 +#endif +#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) + #ifndef __ASSEMBLY__ /* @@ -207,7 +223,7 @@ static inline unsigned long __phys_to_virt(unsigned long x) * direct-mapped view. We assume this is the first page * of RAM in the mem_map as well. */ -#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT) +#define PHYS_PFN_OFFSET ((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT)) /* * These are *only* valid on the kernel direct mapped RAM memory. @@ -260,12 +276,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x) /* * Conversion between a struct page and a physical address. * - * Note: when converting an unknown physical address to a - * struct page, the resulting pointer must be validated - * using VALID_PAGE(). It must return an invalid struct page - * for any physical address not corresponding to a system - * RAM address. - * * page_to_pfn(page) convert a struct page * to a PFN number * pfn_to_page(pfn) convert a _valid_ PFN number to struct page * * diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index a7b85e0d0cc1..b5792b7fd8d3 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -18,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/proc-fns.h> +#include <asm/smp_plat.h> #include <asm-generic/mm_hooks.h> void __check_vmalloc_seq(struct mm_struct *mm); @@ -27,7 +28,15 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) -DECLARE_PER_CPU(atomic64_t, active_asids); +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask); +#else /* !CONFIG_ARM_ERRATA_798181 */ +static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ +} +#endif /* CONFIG_ARM_ERRATA_798181 */ #else /* !CONFIG_CPU_HAS_ASID */ @@ -98,12 +107,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #ifdef CONFIG_MMU unsigned int cpu = smp_processor_id(); -#ifdef CONFIG_SMP - /* check for possible thread migration */ - if (!cpumask_empty(mm_cpumask(next)) && + /* + * __sync_icache_dcache doesn't broadcast the I-cache invalidation, + * so check for possible thread migration and invalidate the I-cache + * if we're new to this CPU. + */ + if (cache_ops_need_broadcast() && + !cpumask_empty(mm_cpumask(next)) && !cpumask_test_cpu(cpu, mm_cpumask(next))) __flush_icache_all(); -#endif + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) { check_and_switch_context(next, tsk); if (cache_is_vivt()) diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h new file mode 100644 index 000000000000..c3247cc2fe08 --- /dev/null +++ b/arch/arm/include/asm/mpu.h @@ -0,0 +1,76 @@ +#ifndef __ARM_MPU_H +#define __ARM_MPU_H + +#ifdef CONFIG_ARM_MPU + +/* MPUIR layout */ +#define MPUIR_nU 1 +#define MPUIR_DREGION 8 +#define MPUIR_IREGION 16 +#define MPUIR_DREGION_SZMASK (0xFF << MPUIR_DREGION) +#define MPUIR_IREGION_SZMASK (0xFF << MPUIR_IREGION) + +/* ID_MMFR0 data relevant to MPU */ +#define MMFR0_PMSA (0xF << 4) +#define MMFR0_PMSAv7 (3 << 4) + +/* MPU D/I Size Register fields */ +#define MPU_RSR_SZ 1 +#define MPU_RSR_EN 0 + +/* The D/I RSR value for an enabled region spanning the whole of memory */ +#define MPU_RSR_ALL_MEM 63 + +/* Individual bits in the DR/IR ACR */ +#define MPU_ACR_XN (1 << 12) +#define MPU_ACR_SHARED (1 << 2) + +/* C, B and TEX[2:0] bits only have semantic meanings when grouped */ +#define MPU_RGN_CACHEABLE 0xB +#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED) +#define MPU_RGN_STRONGLY_ORDERED 0 + +/* Main region should only be shared for SMP */ +#ifdef CONFIG_SMP +#define MPU_RGN_NORMAL (MPU_RGN_CACHEABLE | MPU_ACR_SHARED) +#else +#define MPU_RGN_NORMAL MPU_RGN_CACHEABLE +#endif + +/* Access permission bits of ACR (only define those that we use)*/ +#define MPU_AP_PL1RW_PL0RW (0x3 << 8) +#define MPU_AP_PL1RW_PL0R0 (0x2 << 8) +#define MPU_AP_PL1RW_PL0NA (0x1 << 8) + +/* For minimal static MPU region configurations */ +#define MPU_PROBE_REGION 0 +#define MPU_BG_REGION 1 +#define MPU_RAM_REGION 2 +#define MPU_VECTORS_REGION 3 + +/* Maximum number of regions Linux is interested in */ +#define MPU_MAX_REGIONS 16 + +#define MPU_DATA_SIDE 0 +#define MPU_INSTR_SIDE 1 + +#ifndef __ASSEMBLY__ + +struct mpu_rgn { + /* Assume same attributes for d/i-side */ + u32 drbar; + u32 drsr; + u32 dracr; +}; + +struct mpu_rgn_info { + u32 mpuir; + struct mpu_rgn rgns[MPU_MAX_REGIONS]; +}; +extern struct mpu_rgn_info mpu_rgn_info; + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_ARM_MPU */ + +#endif diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 812a4944e783..6363f3d1d505 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -13,7 +13,7 @@ /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index 18f5cef82ad5..626989fec4d3 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -30,6 +30,7 @@ #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) #define PMD_BIT4 (_AT(pmdval_t, 0)) #define PMD_DOMAIN(x) (_AT(pmdval_t, 0)) #define PMD_APTABLE_SHIFT (61) @@ -41,6 +42,8 @@ */ #define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2) #define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_nG (_AT(pmdval_t, 1) << 11) @@ -66,6 +69,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */ #define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */ #define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ @@ -79,4 +83,24 @@ #define PHYS_MASK_SHIFT (40) #define PHYS_MASK ((1ULL << PHYS_MASK_SHIFT) - 1) +/* + * TTBR0/TTBR1 split (PAGE_OFFSET): + * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) + * 0x80000000: T0SZ = 0, T1SZ = 1 + * 0xc0000000: T0SZ = 0, T1SZ = 2 + * + * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise + * booting secondary CPUs would end up using TTBR1 for the identity + * mapping set up in TTBR0. + */ +#if defined CONFIG_VMSPLIT_2G +#define TTBR1_OFFSET 16 /* skip two L1 entries */ +#elif defined CONFIG_VMSPLIT_3G +#define TTBR1_OFFSET (4096 * (1 + 3)) /* only L2, skip pgd + 3*pmd */ +#else +#define TTBR1_OFFSET 0 +#endif + +#define TTBR1_SIZE (((PAGE_OFFSET >> 30) - 1) << 16) + #endif diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 86b8fe398b95..5689c18c85f5 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -33,7 +33,7 @@ #define PTRS_PER_PMD 512 #define PTRS_PER_PGD 4 -#define PTE_HWTABLE_PTRS (PTRS_PER_PTE) +#define PTE_HWTABLE_PTRS (0) #define PTE_HWTABLE_OFF (0) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u64)) @@ -48,20 +48,28 @@ #define PMD_SHIFT 21 #define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) +#define PMD_MASK (~((1 << PMD_SHIFT) - 1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PGDIR_MASK (~((1 << PGDIR_SHIFT) - 1)) /* * section address mask and size definitions. */ #define SECTION_SHIFT 21 #define SECTION_SIZE (1UL << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) +#define SECTION_MASK (~((1 << SECTION_SHIFT) - 1)) #define USER_PTRS_PER_PGD (PAGE_OFFSET / PGDIR_SIZE) /* + * Hugetlb definitions. + */ +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +/* * "Linux" PTE definitions for LPAE. * * These bits overlap with the hardware bits but the naming is preserved for @@ -79,6 +87,11 @@ #define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ #define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) +#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) + /* * To be used in assembly code with the upper page attributes. */ @@ -166,8 +179,83 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) clean_pmd_entry(pmdp); \ } while (0) +/* + * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes + * that are written to a page table but not for ptes created with mk_pte. + * + * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to + * hugetlb_cow, where it is compared with an entry in a page table. + * This comparison test fails erroneously leading ultimately to a memory leak. + * + * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is + * present before running the comparison. + */ +#define __HAVE_ARCH_PTE_SAME +#define pte_same(pte_a,pte_b) ((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG \ + : pte_val(pte_a)) \ + == (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG \ + : pte_val(pte_b))) + #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext))) +#define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */ +#define pmd_mknotpresent(pmd) (__pmd(0)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY | + PMD_SECT_VALID | PMD_SECT_NONE; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + BUG_ON(addr >= TASK_SIZE); + + /* create a faulting entry if PROT_NONE protected */ + if (pmd_val(pmd) & PMD_SECT_NONE) + pmd_val(pmd) &= ~PMD_SECT_VALID; + + *pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG); + flush_pmd_entry(pmdp); +} + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_PGTABLE_3LEVEL_H */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 229e0dde9c71..04aeb02d2e11 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -24,6 +24,9 @@ #include <asm/memory.h> #include <asm/pgtable-hwdef.h> + +#include <asm/tlbflush.h> + #ifdef CONFIG_ARM_LPAE #include <asm/pgtable-3level.h> #else diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index f3628fb3d2b3..5324c1112f3a 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -60,7 +60,7 @@ extern struct processor { /* * Set the page table */ - void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm); + void (*switch_mm)(phys_addr_t pgd_phys, struct mm_struct *mm); /* * Set a possibly extended PTE. Non-extended PTEs should * ignore 'ext'. @@ -82,7 +82,7 @@ extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); extern void cpu_dcache_clean_area(void *, int); -extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); +extern void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); #ifdef CONFIG_ARM_LPAE extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte); #else @@ -116,13 +116,25 @@ extern void cpu_resume(void); #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) #ifdef CONFIG_ARM_LPAE + +#define cpu_get_ttbr(nr) \ + ({ \ + u64 ttbr; \ + __asm__("mrrc p15, " #nr ", %Q0, %R0, c2" \ + : "=r" (ttbr)); \ + ttbr; \ + }) + +#define cpu_set_ttbr(nr, val) \ + do { \ + u64 ttbr = val; \ + __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ + : : "r" (ttbr)); \ + } while (0) + #define cpu_get_pgd() \ ({ \ - unsigned long pg, pg2; \ - __asm__("mrrc p15, 0, %0, %1, c2" \ - : "=r" (pg), "=r" (pg2) \ - : \ - : "cc"); \ + u64 pg = cpu_get_ttbr(0); \ pg &= ~(PTRS_PER_PGD*sizeof(pgd_t)-1); \ (pgd_t *)phys_to_virt(pg); \ }) @@ -137,6 +149,10 @@ extern void cpu_resume(void); }) #endif +#else /*!CONFIG_MMU */ + +#define cpu_switch_mm(pgd,mm) { } + #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h index 3d520ddca61b..2389b71a8e7c 100644 --- a/arch/arm/include/asm/sched_clock.h +++ b/arch/arm/include/asm/sched_clock.h @@ -1,16 +1,4 @@ -/* - * sched_clock.h: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. +/* You shouldn't include this file. Use linux/sched_clock.h instead. + * Temporary file until all asm/sched_clock.h users are gone */ -#ifndef ASM_SCHED_CLOCK -#define ASM_SCHED_CLOCK - -extern void sched_clock_postinit(void); -extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); - -extern unsigned long long (*sched_clock_func)(void); - -#endif +#include <linux/sched_clock.h> diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index d3a22bebe6ce..a8cae71caceb 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -65,7 +65,10 @@ asmlinkage void secondary_start_kernel(void); * Initial data for bringing up a secondary CPU. */ struct secondary_data { - unsigned long pgdir; + union { + unsigned long mpu_rgn_szr; + unsigned long pgdir; + }; unsigned long swapper_pg_dir; void *stack; }; diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index e78983202737..6462a721ebd4 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -26,6 +26,9 @@ static inline bool is_smp(void) } /* all SMP configurations have the extended CPUID registers */ +#ifndef CONFIG_MMU +#define tlb_ops_need_broadcast() 0 +#else static inline int tlb_ops_need_broadcast(void) { if (!is_smp()) @@ -33,6 +36,7 @@ static inline int tlb_ops_need_broadcast(void) return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2; } +#endif #if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7 #define cache_ops_need_broadcast() 0 @@ -66,4 +70,22 @@ static inline int get_logical_index(u32 mpidr) return -EINVAL; } +/* + * NOTE ! Assembly code relies on the following + * structure memory layout in order to carry out load + * multiple from its base address. For more + * information check arch/arm/kernel/sleep.S + */ +struct mpidr_hash { + u32 mask; /* used by sleep.S */ + u32 shift_aff[3]; /* used by sleep.S */ + u32 bits; +}; + +extern struct mpidr_hash mpidr_hash; + +static inline u32 mpidr_hash_size(void) +{ + return 1 << mpidr_hash.bits; +} #endif diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 6220e9fdf4c7..f8b8965666e9 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -97,19 +97,22 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) static inline int arch_spin_trylock(arch_spinlock_t *lock) { - unsigned long tmp; + unsigned long contended, res; u32 slock; - __asm__ __volatile__( -" ldrex %0, [%2]\n" -" subs %1, %0, %0, ror #16\n" -" addeq %0, %0, %3\n" -" strexeq %1, %0, [%2]" - : "=&r" (slock), "=&r" (tmp) - : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) - : "cc"); - - if (tmp == 0) { + do { + __asm__ __volatile__( + " ldrex %0, [%3]\n" + " mov %2, #0\n" + " subs %1, %0, %0, ror #16\n" + " addeq %0, %0, %4\n" + " strexeq %2, %0, [%3]" + : "=&r" (slock), "=&r" (contended), "=r" (res) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) + : "cc"); + } while (res); + + if (!contended) { smp_mb(); return 1; } else { diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 1c0a551ae375..cd20029bcd94 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -1,6 +1,11 @@ #ifndef __ASM_ARM_SUSPEND_H #define __ASM_ARM_SUSPEND_H +struct sleep_save_sp { + u32 *save_ptr_stash; + u32 save_ptr_stash_phys; +}; + extern void cpu_resume(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index 21a23e378bbe..a3d61ad984af 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -6,11 +6,12 @@ #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/irqflags.h> +#include <linux/reboot.h> extern void cpu_init(void); void soft_restart(unsigned long); -extern void (*arm_pm_restart)(char str, const char *cmd); +extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); extern void (*arm_pm_idle)(void); #define UDBG_UNDEFINED (1 << 0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 1995d1a84060..214d4158089a 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -58,7 +58,7 @@ struct thread_info { struct cpu_context_save cpu_context; /* cpu context */ __u32 syscall; /* syscall number */ __u8 used_cp[16]; /* thread used copro */ - unsigned long tp_value; + unsigned long tp_value[2]; /* TLS registers */ #ifdef CONFIG_CRUNCH struct crunch_state crunchstate; #endif diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index bdf2b8458ec1..46e7cfb3e721 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -204,6 +204,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #endif } +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) #define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index a3625d141c1d..fdbb9e369745 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -535,8 +535,33 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, } #endif +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif -#endif /* CONFIG_MMU */ +#elif defined(CONFIG_SMP) /* !CONFIG_MMU */ + +#ifndef __ASSEMBLY__ + +#include <linux/mm_types.h> + +static inline void local_flush_tlb_all(void) { } +static inline void local_flush_tlb_mm(struct mm_struct *mm) { } +static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { } +static inline void local_flush_tlb_kernel_page(unsigned long kaddr) { } +static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { } +static inline void local_flush_bp_all(void) { } + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr); +extern void flush_tlb_kernel_page(unsigned long kaddr); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void flush_bp_all(void); +#endif /* __ASSEMBLY__ */ + +#endif #endif diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index 73409e6c0251..83259b873333 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -2,27 +2,30 @@ #define __ASMARM_TLS_H #ifdef __ASSEMBLY__ - .macro set_tls_none, tp, tmp1, tmp2 +#include <asm/asm-offsets.h> + .macro switch_tls_none, base, tp, tpuser, tmp1, tmp2 .endm - .macro set_tls_v6k, tp, tmp1, tmp2 + .macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2 + mrc p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register mcr p15, 0, \tp, c13, c0, 3 @ set TLS register - mov \tmp1, #0 - mcr p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register + mcr p15, 0, \tpuser, c13, c0, 2 @ and the user r/w register + str \tmp2, [\base, #TI_TP_VALUE + 4] @ save it .endm - .macro set_tls_v6, tp, tmp1, tmp2 + .macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2 ldr \tmp1, =elf_hwcap ldr \tmp1, [\tmp1, #0] mov \tmp2, #0xffff0fff tst \tmp1, #HWCAP_TLS @ hardware TLS available? - mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register - movne \tmp1, #0 - mcrne p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register streq \tp, [\tmp2, #-15] @ set TLS value at 0xffff0ff0 + mrcne p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register + mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register + mcrne p15, 0, \tpuser, c13, c0, 2 @ set user r/w register + strne \tmp2, [\base, #TI_TP_VALUE + 4] @ save it .endm - .macro set_tls_software, tp, tmp1, tmp2 + .macro switch_tls_software, base, tp, tpuser, tmp1, tmp2 mov \tmp1, #0xffff0fff str \tp, [\tmp1, #-15] @ set TLS value at 0xffff0ff0 .endm @@ -31,19 +34,30 @@ #ifdef CONFIG_TLS_REG_EMUL #define tls_emu 1 #define has_tls_reg 1 -#define set_tls set_tls_none +#define switch_tls switch_tls_none #elif defined(CONFIG_CPU_V6) #define tls_emu 0 #define has_tls_reg (elf_hwcap & HWCAP_TLS) -#define set_tls set_tls_v6 +#define switch_tls switch_tls_v6 #elif defined(CONFIG_CPU_32v6K) #define tls_emu 0 #define has_tls_reg 1 -#define set_tls set_tls_v6k +#define switch_tls switch_tls_v6k #else #define tls_emu 0 #define has_tls_reg 0 -#define set_tls set_tls_software +#define switch_tls switch_tls_software #endif +#ifndef __ASSEMBLY__ +static inline unsigned long get_tpuser(void) +{ + unsigned long reg = 0; + + if (has_tls_reg && !tls_emu) + __asm__("mrc p15, 0, %0, c13, c0, 2" : "=r" (reg)); + + return reg; +} +#endif #endif /* __ASMARM_TLS_H */ diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 799f42ecca63..7704e28c3483 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -47,6 +47,7 @@ unsigned long HYPERVISOR_hvm_op(int op, void *arg); int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); +int HYPERVISOR_tmem_op(void *arg); static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 30cdacb675af..359a7b50b158 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -1,7 +1,6 @@ #ifndef _ASM_ARM_XEN_PAGE_H #define _ASM_ARM_XEN_PAGE_H -#include <asm/mach/map.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -88,6 +87,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) return __set_phys_to_machine(pfn, mfn); } -#define xen_remap(cookie, size) __arm_ioremap((cookie), (size), MT_MEMORY); +#define xen_remap(cookie, size) ioremap_cached((cookie), (size)); #endif /* _ASM_ARM_XEN_PAGE_H */ diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S index dc8e882a6257..acafb229e2b6 100644 --- a/arch/arm/include/debug/vexpress.S +++ b/arch/arm/include/debug/vexpress.S @@ -16,6 +16,8 @@ #define DEBUG_LL_PHYS_BASE_RS1 0x1c000000 #define DEBUG_LL_UART_OFFSET_RS1 0x00090000 +#define DEBUG_LL_UART_PHYS_CRX 0xb0090000 + #define DEBUG_LL_VIRT_BASE 0xf8000000 #if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT) @@ -67,6 +69,14 @@ #include <asm/hardware/debug-pl01x.S> +#elif defined(CONFIG_DEBUG_VEXPRESS_UART0_CRX) + + .macro addruart,rp,tmp,tmp2 + ldr \rp, =DEBUG_LL_UART_PHYS_CRX + .endm + +#include <asm/hardware/debug-pl01x.S> + #else /* CONFIG_DEBUG_LL_UART_NONE */ .macro addruart, rp, rv, tmp diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 3688fd15a32d..6d34d080372a 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -25,6 +25,6 @@ #define HWCAP_IDIVT (1 << 18) #define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */ #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) - +#define HWCAP_LPAE (1 << 20) #endif /* _UAPI__ASMARM_HWCAP_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index f4285b5ffb05..86d10dd47dc4 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o sched_clock.o \ + process.o ptrace.o return_address.o \ setup.o signal.o stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o @@ -38,7 +38,10 @@ obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o -obj-$(CONFIG_SMP) += smp.o smp_tlb.o +obj-$(CONFIG_SMP) += smp.o +ifdef CONFIG_MMU +obj-$(CONFIG_SMP) += smp_tlb.o +endif obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index 59dcdced6e30..221f07b11ccb 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -11,9 +11,9 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/sched_clock.h> #include <asm/delay.h> -#include <asm/sched_clock.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index ee68cce6b48e..ded041711beb 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/procinfo.h> +#include <asm/suspend.h> #include <asm/hardware/cache-l2x0.h> #include <linux/kbuild.h> @@ -145,6 +146,11 @@ int main(void) #ifdef MULTI_CACHE DEFINE(CACHE_FLUSH_KERN_ALL, offsetof(struct cpu_cache_fns, flush_kern_all)); #endif +#ifdef CONFIG_ARM_CPU_SUSPEND + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif BLANK(); DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 582b405befc5..a39cfc2a1f90 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -685,15 +685,16 @@ ENTRY(__switch_to) UNWIND(.fnstart ) UNWIND(.cantunwind ) add ip, r1, #TI_CPU_SAVE - ldr r3, [r2, #TI_TP_VALUE] ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack THUMB( str sp, [ip], #4 ) THUMB( str lr, [ip], #4 ) + ldr r4, [r2, #TI_TP_VALUE] + ldr r5, [r2, #TI_TP_VALUE + 4] #ifdef CONFIG_CPU_USE_DOMAINS ldr r6, [r2, #TI_CPU_DOMAIN] #endif - set_tls r3, r4, r5 + switch_tls r1, r4, r5, r3, r7 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) ldr r7, [r2, #TI_TASK] ldr r8, =__stack_chk_guard diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 85a72b0809ca..94104bf69719 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -366,6 +366,16 @@ ENTRY(vector_swi) #endif zero_fp +#ifdef CONFIG_ALIGNMENT_TRAP + ldr ip, __cr_alignment + ldr ip, [ip] + mcr p15, 0, ip, c1, c0 @ update control register +#endif + + enable_irq + ct_user_exit + get_thread_info tsk + /* * Get the system call number. */ @@ -379,9 +389,9 @@ ENTRY(vector_swi) #ifdef CONFIG_ARM_THUMB tst r8, #PSR_T_BIT movne r10, #0 @ no thumb OABI emulation - ldreq r10, [lr, #-4] @ get SWI instruction + USER( ldreq r10, [lr, #-4] ) @ get SWI instruction #else - ldr r10, [lr, #-4] @ get SWI instruction + USER( ldr r10, [lr, #-4] ) @ get SWI instruction #endif #ifdef CONFIG_CPU_ENDIAN_BE8 rev r10, r10 @ little endian instruction @@ -396,22 +406,13 @@ ENTRY(vector_swi) /* Legacy ABI only, possibly thumb mode. */ tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in - ldreq scno, [lr, #-4] + USER( ldreq scno, [lr, #-4] ) #else /* Legacy ABI only. */ - ldr scno, [lr, #-4] @ get SWI instruction + USER( ldr scno, [lr, #-4] ) @ get SWI instruction #endif -#ifdef CONFIG_ALIGNMENT_TRAP - ldr ip, __cr_alignment - ldr ip, [ip] - mcr p15, 0, ip, c1, c0 @ update control register -#endif - enable_irq - ct_user_exit - - get_thread_info tsk adr tbl, sys_call_table @ load syscall table pointer #if defined(CONFIG_OABI_COMPAT) @@ -446,6 +447,21 @@ local_restart: eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall b sys_ni_syscall @ not private func + +#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) + /* + * We failed to handle a fault trying to access the page + * containing the swi instruction, but we're not really in a + * position to return -EFAULT. Instead, return back to the + * instruction and re-enter the user fault handling path trying + * to page it in. This will likely result in sending SEGV to the + * current task. + */ +9001: + sub lr, lr, #4 + str lr, [sp, #S_PC] + b ret_fast_syscall +#endif ENDPROC(vector_swi) /* diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 8812ce88f7a1..75f14cc3e073 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -17,9 +17,12 @@ #include <asm/assembler.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#include <asm/memory.h> #include <asm/cp15.h> #include <asm/thread_info.h> #include <asm/v7m.h> +#include <asm/mpu.h> +#include <asm/page.h> /* * Kernel startup entry point. @@ -63,12 +66,74 @@ ENTRY(stext) movs r10, r5 @ invalid processor (r5=0)? beq __error_p @ yes, error 'p' - adr lr, BSYM(__after_proc_init) @ return (PIC) address +#ifdef CONFIG_ARM_MPU + /* Calculate the size of a region covering just the kernel */ + ldr r5, =PHYS_OFFSET @ Region start: PHYS_OFFSET + ldr r6, =(_end) @ Cover whole kernel + sub r6, r6, r5 @ Minimum size of region to map + clz r6, r6 @ Region size must be 2^N... + rsb r6, r6, #31 @ ...so round up region size + lsl r6, r6, #MPU_RSR_SZ @ Put size in right field + orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit + bl __setup_mpu +#endif + ldr r13, =__mmap_switched @ address to jump to after + @ initialising sctlr + adr lr, BSYM(1f) @ return (PIC) address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) THUMB( mov pc, r12 ) + 1: b __after_proc_init ENDPROC(stext) +#ifdef CONFIG_SMP + __CPUINIT +ENTRY(secondary_startup) + /* + * Common entry point for secondary CPUs. + * + * Ensure that we're in SVC mode, and IRQs are disabled. Lookup + * the processor type - there is no need to check the machine type + * as it has already been validated by the primary processor. + */ + setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 +#ifndef CONFIG_CPU_CP15 + ldr r9, =CONFIG_PROCESSOR_ID +#else + mrc p15, 0, r9, c0, c0 @ get processor id +#endif + bl __lookup_processor_type @ r5=procinfo r9=cpuid + movs r10, r5 @ invalid processor? + beq __error_p @ yes, error 'p' + + adr r4, __secondary_data + ldmia r4, {r7, r12} + +#ifdef CONFIG_ARM_MPU + /* Use MPU region info supplied by __cpu_up */ + ldr r6, [r7] @ get secondary_data.mpu_szr + bl __setup_mpu @ Initialize the MPU +#endif + + adr lr, BSYM(__after_proc_init) @ return address + mov r13, r12 @ __secondary_switched address + ARM( add pc, r10, #PROCINFO_INITFUNC ) + THUMB( add r12, r10, #PROCINFO_INITFUNC ) + THUMB( mov pc, r12 ) +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) + ldr sp, [r7, #8] @ set up the stack pointer + mov fp, #0 + b secondary_start_kernel +ENDPROC(__secondary_switched) + + .type __secondary_data, %object +__secondary_data: + .long secondary_data + .long __secondary_switched +#endif /* CONFIG_SMP */ + /* * Set the Control Register and Read the process ID. */ @@ -99,10 +164,97 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - - b __mmap_switched @ clear the BSS and jump - @ to start_kernel + mov pc, r13 ENDPROC(__after_proc_init) .ltorg +#ifdef CONFIG_ARM_MPU + + +/* Set which MPU region should be programmed */ +.macro set_region_nr tmp, rgnr + mov \tmp, \rgnr @ Use static region numbers + mcr p15, 0, \tmp, c6, c2, 0 @ Write RGNR +.endm + +/* Setup a single MPU region, either D or I side (D-side for unified) */ +.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE + mcr p15, 0, \bar, c6, c1, (0 + \side) @ I/DRBAR + mcr p15, 0, \acr, c6, c1, (4 + \side) @ I/DRACR + mcr p15, 0, \sr, c6, c1, (2 + \side) @ I/DRSR +.endm + +/* + * Setup the MPU and initial MPU Regions. We create the following regions: + * Region 0: Use this for probing the MPU details, so leave disabled. + * Region 1: Background region - covers the whole of RAM as strongly ordered + * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6 + * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page + * + * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION +*/ + +ENTRY(__setup_mpu) + + /* Probe for v7 PMSA compliance */ + mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 + and r0, r0, #(MMFR0_PMSA) @ PMSA field + teq r0, #(MMFR0_PMSAv7) @ PMSA v7 + bne __error_p @ Fail: ARM_MPU on NOT v7 PMSA + + /* Determine whether the D/I-side memory map is unified. We set the + * flags here and continue to use them for the rest of this function */ + mrc p15, 0, r0, c0, c0, 4 @ MPUIR + ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU + beq __error_p @ Fail: ARM_MPU and no MPU + tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified + + /* Setup second region first to free up r6 */ + set_region_nr r0, #MPU_RAM_REGION + isb + /* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */ + ldr r0, =PHYS_OFFSET @ RAM starts at PHYS_OFFSET + ldr r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL) + + setup_region r0, r5, r6, MPU_DATA_SIDE @ PHYS_OFFSET, shared, enabled + beq 1f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled +1: isb + + /* First/background region */ + set_region_nr r0, #MPU_BG_REGION + isb + /* Execute Never, strongly ordered, inaccessible to PL0, rw PL1 */ + mov r0, #0 @ BG region starts at 0x0 + ldr r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA) + mov r6, #MPU_RSR_ALL_MEM @ 4GB region, enabled + + setup_region r0, r5, r6, MPU_DATA_SIDE @ 0x0, BG region, enabled + beq 2f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled +2: isb + + /* Vectors region */ + set_region_nr r0, #MPU_VECTORS_REGION + isb + /* Shared, inaccessible to PL0, rw PL1 */ + mov r0, #CONFIG_VECTORS_BASE @ Cover from VECTORS_BASE + ldr r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL) + /* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */ + mov r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN) + + setup_region r0, r5, r6, MPU_DATA_SIDE @ VECTORS_BASE, PL0 NA, enabled + beq 3f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ VECTORS_BASE, PL0 NA, enabled +3: isb + + /* Enable the MPU */ + mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR + bic r0, r0, #CR_BR @ Disable the 'default mem-map' + orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) + mcr p15, 0, r0, c1, c0, 0 @ Enable MPU + isb + mov pc,lr +ENDPROC(__setup_mpu) +#endif #include "head-common.S" diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 8bac553fe213..45e8935cae4e 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -156,7 +156,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = physical page table address + * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -331,6 +331,7 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table + mov r4, r4, lsr #ARCH_PGD_SHIFT #endif mov pc, lr ENDPROC(__create_page_tables) @@ -408,7 +409,7 @@ __secondary_data: * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table pointer + * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -427,10 +428,7 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifdef CONFIG_ARM_LPAE - mov r5, #0 - mcrr p15, 0, r4, r5, c2 @ load TTBR0 -#else +#ifndef CONFIG_ARM_LPAE mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 1315c4ccfa56..4910232c4833 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -153,6 +153,13 @@ THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL + mov r7, #0 + mcrr p15, 4, r7, r7, c14 @ CNTVOFF + + @ Disable virtual timer in case it was counting + mrc p15, 0, r7, c14, c3, 1 @ CNTV_CTL + bic r7, #1 @ Clear ENABLE + mcr p15, 0, r7, c14, c3, 1 @ CNTV_CTL 1: #endif diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 8c3094d0f7b7..d9f5cd4e533f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; } + perf_callchain_store(entry, regs->ARM_pc); tail = (struct frame_tail __user *)regs->ARM_fp - 1; while ((entry->nr < PERF_MAX_STACK_DEPTH) && diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 6e8931ccf13e..d3ca4f6915af 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/hw_breakpoint.h> #include <linux/cpuidle.h> #include <linux/leds.h> +#include <linux/reboot.h> #include <asm/cacheflush.h> #include <asm/idmap.h> @@ -39,6 +40,7 @@ #include <asm/thread_notify.h> #include <asm/stacktrace.h> #include <asm/mach/time.h> +#include <asm/tls.h> #ifdef CONFIG_CC_STACKPROTECTOR #include <linux/stackprotector.h> @@ -112,7 +114,7 @@ void soft_restart(unsigned long addr) BUG(); } -static void null_restart(char mode, const char *cmd) +static void null_restart(enum reboot_mode reboot_mode, const char *cmd) { } @@ -122,7 +124,7 @@ static void null_restart(char mode, const char *cmd) void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -void (*arm_pm_restart)(char str, const char *cmd) = null_restart; +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd) = null_restart; EXPORT_SYMBOL_GPL(arm_pm_restart); /* @@ -174,16 +176,6 @@ void arch_cpu_idle(void) default_idle(); } -static char reboot_mode = 'h'; - -int __init reboot_setup(char *str) -{ - reboot_mode = str[0]; - return 1; -} - -__setup("reboot=", reboot_setup); - /* * Called by kexec, immediately prior to machine_kexec(). * @@ -374,7 +366,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_ptrace_hw_breakpoint(p); if (clone_flags & CLONE_SETTLS) - thread->tp_value = childregs->ARM_r3; + thread->tp_value[0] = childregs->ARM_r3; + thread->tp_value[1] = get_tpuser(); thread_notify(THREAD_NOTIFY_COPY, thread); diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 23a11424c568..219f1d73572a 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu) /* We should never return */ panic("psci: cpu %d failed to shutdown\n", cpu); } -#else -#define psci_cpu_die NULL #endif bool __init psci_smp_available(void) @@ -80,5 +78,7 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU .cpu_die = psci_cpu_die, +#endif }; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 03deeffd9f6d..0dd3b79b15c3 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request, #endif case PTRACE_GET_THREAD_AREA: - ret = put_user(task_thread_info(child)->tp_value, + ret = put_user(task_thread_info(child)->tp_value[0], datap); break; @@ -886,20 +886,12 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_HAVE_HW_BREAKPOINT case PTRACE_GETHBPREGS: - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - ret = ptrace_gethbpregs(child, addr, (unsigned long __user *)data); - ptrace_put_breakpoints(child); break; case PTRACE_SETHBPREGS: - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - ret = ptrace_sethbpregs(child, addr, (unsigned long __user *)data); - ptrace_put_breakpoints(child); break; #endif diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c deleted file mode 100644 index e8edcaa0e432..000000000000 --- a/arch/arm/kernel/sched_clock.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * sched_clock.c: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/clocksource.h> -#include <linux/init.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/moduleparam.h> -#include <linux/sched.h> -#include <linux/syscore_ops.h> -#include <linux/timer.h> - -#include <asm/sched_clock.h> - -struct clock_data { - u64 epoch_ns; - u32 epoch_cyc; - u32 epoch_cyc_copy; - unsigned long rate; - u32 mult; - u32 shift; - bool suspended; - bool needs_suspend; -}; - -static void sched_clock_poll(unsigned long wrap_ticks); -static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); -static int irqtime = -1; - -core_param(irqtime, irqtime, int, 0400); - -static struct clock_data cd = { - .mult = NSEC_PER_SEC / HZ, -}; - -static u32 __read_mostly sched_clock_mask = 0xffffffff; - -static u32 notrace jiffy_sched_clock_read(void) -{ - return (u32)(jiffies - INITIAL_JIFFIES); -} - -static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; - -static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) -{ - return (cyc * mult) >> shift; -} - -static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) -{ - u64 epoch_ns; - u32 epoch_cyc; - - if (cd.suspended) - return cd.epoch_ns; - - /* - * Load the epoch_cyc and epoch_ns atomically. We do this by - * ensuring that we always write epoch_cyc, epoch_ns and - * epoch_cyc_copy in strict order, and read them in strict order. - * If epoch_cyc and epoch_cyc_copy are not equal, then we're in - * the middle of an update, and we should repeat the load. - */ - do { - epoch_cyc = cd.epoch_cyc; - smp_rmb(); - epoch_ns = cd.epoch_ns; - smp_rmb(); - } while (epoch_cyc != cd.epoch_cyc_copy); - - return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift); -} - -/* - * Atomically update the sched_clock epoch. - */ -static void notrace update_sched_clock(void) -{ - unsigned long flags; - u32 cyc; - u64 ns; - - cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); - /* - * Write epoch_cyc and epoch_ns in a way that the update is - * detectable in cyc_to_fixed_sched_clock(). - */ - raw_local_irq_save(flags); - cd.epoch_cyc_copy = cyc; - smp_wmb(); - cd.epoch_ns = ns; - smp_wmb(); - cd.epoch_cyc = cyc; - raw_local_irq_restore(flags); -} - -static void sched_clock_poll(unsigned long wrap_ticks) -{ - mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); - update_sched_clock(); -} - -void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) -{ - unsigned long r, w; - u64 res, wrap; - char r_unit; - - if (cd.rate > rate) - return; - - BUG_ON(bits > 32); - WARN_ON(!irqs_disabled()); - read_sched_clock = read; - sched_clock_mask = (1 << bits) - 1; - cd.rate = rate; - - /* calculate the mult/shift to convert counter ticks to ns. */ - clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); - - r = rate; - if (r >= 4000000) { - r /= 1000000; - r_unit = 'M'; - } else if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else - r_unit = ' '; - - /* calculate how many ns until we wrap */ - wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); - do_div(wrap, NSEC_PER_MSEC); - w = wrap; - - /* calculate the ns resolution of this counter */ - res = cyc_to_ns(1ULL, cd.mult, cd.shift); - pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", - bits, r, r_unit, res, w); - - /* - * Start the timer to keep sched_clock() properly updated and - * sets the initial epoch. - */ - sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); - update_sched_clock(); - - /* - * Ensure that sched_clock() starts off at 0ns - */ - cd.epoch_ns = 0; - - /* Enable IRQ time accounting if we have a fast enough sched_clock */ - if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) - enable_sched_clock_irqtime(); - - pr_debug("Registered %pF as sched_clock source\n", read); -} - -static unsigned long long notrace sched_clock_32(void) -{ - u32 cyc = read_sched_clock(); - return cyc_to_sched_clock(cyc, sched_clock_mask); -} - -unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; - -unsigned long long notrace sched_clock(void) -{ - return sched_clock_func(); -} - -void __init sched_clock_postinit(void) -{ - /* - * If no sched_clock function has been provided at that point, - * make it the final one one. - */ - if (read_sched_clock == jiffy_sched_clock_read) - setup_sched_clock(jiffy_sched_clock_read, 32, HZ); - - sched_clock_poll(sched_clock_timer.data); -} - -static int sched_clock_suspend(void) -{ - sched_clock_poll(sched_clock_timer.data); - cd.suspended = true; - return 0; -} - -static void sched_clock_resume(void) -{ - cd.epoch_cyc = read_sched_clock(); - cd.epoch_cyc_copy = cd.epoch_cyc; - cd.suspended = false; -} - -static struct syscore_ops sched_clock_ops = { - .suspend = sched_clock_suspend, - .resume = sched_clock_resume, -}; - -static int __init sched_clock_syscore_init(void) -{ - register_syscore_ops(&sched_clock_ops); - return 0; -} -device_initcall(sched_clock_syscore_init); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1c8278de6c46..63af9a7ae512 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -74,7 +74,7 @@ __setup("fpe=", fpe_setup); extern void paging_init(struct machine_desc *desc); extern void sanity_check_meminfo(void); -extern void reboot_setup(char *str); +extern enum reboot_mode reboot_mode; extern void setup_dma_zone(struct machine_desc *desc); unsigned int processor_id; @@ -367,7 +367,7 @@ void __init early_print(const char *str, ...) static void __init cpuid_init_hwcaps(void) { - unsigned int divide_instrs; + unsigned int divide_instrs, vmsa; if (cpu_architecture() < CPU_ARCH_ARMv7) return; @@ -380,6 +380,11 @@ static void __init cpuid_init_hwcaps(void) case 1: elf_hwcap |= HWCAP_IDIVT; } + + /* LPAE implies atomic ldrd/strd instructions */ + vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; + if (vmsa >= 5) + elf_hwcap |= HWCAP_LPAE; } static void __init feat_v6_fixup(void) @@ -470,9 +475,82 @@ void __init smp_setup_processor_id(void) for (i = 1; i < nr_cpu_ids; ++i) cpu_logical_map(i) = i == cpu ? 0 : i; + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); + printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr); } +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity; + u32 fs[3], bits[3], ls, mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits 0x%x\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 3; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 24 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + sync_cache_w(&mpidr_hash); +} +#endif + static void __init setup_processor(void) { struct proc_info_list *list; @@ -783,8 +861,8 @@ void __init setup_arch(char **cmdline_p) setup_dma_zone(mdesc); - if (mdesc->restart_mode) - reboot_setup(&mdesc->restart_mode); + if (mdesc->reboot_mode != REBOOT_HARD) + reboot_mode = mdesc->reboot_mode; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; @@ -820,6 +898,7 @@ void __init setup_arch(char **cmdline_p) smp_set_ops(mdesc->smp); } smp_init_cpus(); + smp_build_mpidr_hash(); } #endif @@ -892,6 +971,7 @@ static const char *hwcap_str[] = { "vfpv4", "idiva", "idivt", + "lpae", NULL }; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 296786bdbb73..1c16c35c271a 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -392,14 +392,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, if (ksig->ka.sa.sa_flags & SA_SIGINFO) idx += 3; + /* + * Put the sigreturn code on the stack no matter which return + * mechanism we use in order to remain ABI compliant + */ if (__put_user(sigreturn_codes[idx], rc) || __put_user(sigreturn_codes[idx+1], rc+1)) return 1; - if (cpsr & MODE32_BIT) { + if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) { /* * 32-bit code can use the new high-page - * signal return code support. + * signal return code support except when the MPU has + * protected the vectors page from PL0 */ retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb; } else { diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 987dcf33415c..db1536b8b30b 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -7,6 +7,49 @@ .text /* + * Implementation of MPIDR hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @mpidr: register containing MPIDR value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) { + * u32 aff0, aff1, aff2; + * u32 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2); + *} + * Input registers: rs0, rs1, rs2, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = r1 and dst = r1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask + and \mpidr, \mpidr, \mask @ mask out MPIDR bits + and \dst, \mpidr, #0xff @ mask=aff0 + ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0 + THUMB( lsr \dst, \dst, \rs0 ) + and \mask, \mpidr, #0xff00 @ mask = aff1 + ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1) + THUMB( lsr \mask, \mask, \rs1 ) + THUMB( orr \dst, \dst, \mask ) + and \mask, \mpidr, #0xff0000 @ mask = aff2 + ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2) + THUMB( lsr \mask, \mask, \rs2 ) + THUMB( orr \dst, \dst, \mask ) + .endm + +/* * Save CPU state for a suspend. This saves the CPU general purpose * registers, and allocates space on the kernel stack to save the CPU * specific registers and some other data for resume. @@ -29,12 +72,18 @@ ENTRY(__cpu_suspend) mov r1, r4 @ size of save block mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp -#ifdef CONFIG_SMP - ALT_SMP(mrc p15, 0, lr, c0, c0, 5) - ALT_UP(mov lr, #0) - and lr, lr, #15 + ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] + ALT_SMP(mrc p15, 0, r9, c0, c0, 5) + ALT_UP_B(1f) + ldr r8, =mpidr_hash + /* + * This ldmia relies on the memory layout of the mpidr_hash + * struct mpidr_hash. + */ + ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts + compute_mpidr_hash lr, r5, r6, r7, r9, r4 add r3, r3, lr, lsl #2 -#endif +1: bl __cpu_suspend_save adr lr, BSYM(cpu_suspend_abort) ldmfd sp!, {r0, pc} @ call suspend fn @@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu) .data .align ENTRY(cpu_resume) -#ifdef CONFIG_SMP - adr r0, sleep_save_sp - ALT_SMP(mrc p15, 0, r1, c0, c0, 5) - ALT_UP(mov r1, #0) - and r1, r1, #15 - ldr r0, [r0, r1, lsl #2] @ stack phys addr -#else - ldr r0, sleep_save_sp @ stack phys addr -#endif + mov r1, #0 + ALT_SMP(mrc p15, 0, r0, c0, c0, 5) + ALT_UP_B(1f) + adr r2, mpidr_hash_ptr + ldr r3, [r2] + add r2, r2, r3 @ r2 = struct mpidr_hash phys address + /* + * This ldmia relies on the memory layout of the mpidr_hash + * struct mpidr_hash. + */ + ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts + compute_mpidr_hash r1, r4, r5, r6, r0, r3 +1: + adr r0, _sleep_save_sp + ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr r0, [r0, r1, lsl #2] + setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1 @ set SVC, irqs off @ load phys pgd, stack, resume fn ARM( ldmia r0!, {r1, sp, pc} ) @@ -98,7 +155,11 @@ THUMB( mov sp, r2 ) THUMB( bx r3 ) ENDPROC(cpu_resume) -sleep_save_sp: - .rept CONFIG_NR_CPUS - .long 0 @ preserve stack phys ptr here - .endr + .align 2 +mpidr_hash_ptr: + .long mpidr_hash - . @ mpidr_hash struct offset + + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) +_sleep_save_sp: + .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb451bb9..c5fb5469054b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -45,6 +45,7 @@ #include <asm/smp_plat.h> #include <asm/virt.h> #include <asm/mach/arch.h> +#include <asm/mpu.h> /* * as from 2.5, kernels no longer have an init_tasks structure @@ -78,6 +79,13 @@ void __init smp_set_ops(struct smp_operations *ops) smp_ops = *ops; }; +static unsigned long get_arch_pgd(pgd_t *pgd) +{ + phys_addr_t pgdir = virt_to_phys(pgd); + BUG_ON(pgdir & ARCH_PGD_MASK); + return pgdir >> ARCH_PGD_SHIFT; +} + int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -87,8 +95,14 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) * its stack and the page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; - secondary_data.pgdir = virt_to_phys(idmap_pgd); - secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir); +#ifdef CONFIG_ARM_MPU + secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr; +#endif + +#ifdef CONFIG_MMU + secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); +#endif __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); @@ -112,9 +126,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } - secondary_data.stack = NULL; - secondary_data.pgdir = 0; + memset(&secondary_data, 0, sizeof(secondary_data)); return ret; } diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 9a52a07aa40e..a98b62dca2fa 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void) static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) { - int cpu, this_cpu; + int this_cpu; cpumask_t mask = { CPU_BITS_NONE }; if (!erratum_a15_798181()) @@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) dummy_flush_tlb_a15_erratum(); this_cpu = get_cpu(); - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - /* - * We only need to send an IPI if the other CPUs are running - * the same ASID as the one being invalidated. There is no - * need for locking around the active_asids check since the - * switch_mm() function has at least one dmb() (as required by - * this workaround) in case a context switch happens on - * another CPU after the condition below. - */ - if (atomic64_read(&mm->context.id) == - atomic64_read(&per_cpu(active_asids, cpu))) - cpumask_set_cpu(cpu, &mask); - } + a15_erratum_get_cpumask(this_cpu, mm, &mask); smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); put_cpu(); } diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index c59c97ea8268..41cf3cbf756d 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -1,15 +1,54 @@ #include <linux/init.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> #include <asm/idmap.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/memory.h> +#include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/tlbflush.h> extern int __cpu_suspend(unsigned long, int (*)(unsigned long)); extern void cpu_resume_mmu(void); +#ifdef CONFIG_MMU +/* + * Hide the first two arguments to __cpu_suspend - these are an implementation + * detail which platform code shouldn't have to know about. + */ +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +{ + struct mm_struct *mm = current->active_mm; + int ret; + + if (!idmap_pgd) + return -EINVAL; + + /* + * Provide a temporary page table with an identity mapping for + * the MMU-enable code, required for resuming. On successful + * resume (indicated by a zero return code), we need to switch + * back to the correct page tables. + */ + ret = __cpu_suspend(arg, fn); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); + local_flush_tlb_all(); + } + + return ret; +} +#else +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +{ + return __cpu_suspend(arg, fn); +} +#define idmap_pgd NULL +#endif + /* * This is called by __cpu_suspend() to save the state, and do whatever * flushing is required to ensure that when the CPU goes to sleep we have @@ -47,30 +86,19 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) virt_to_phys(save_ptr) + sizeof(*save_ptr)); } -/* - * Hide the first two arguments to __cpu_suspend - these are an implementation - * detail which platform code shouldn't have to know about. - */ -int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) -{ - struct mm_struct *mm = current->active_mm; - int ret; - - if (!idmap_pgd) - return -EINVAL; +extern struct sleep_save_sp sleep_save_sp; - /* - * Provide a temporary page table with an identity mapping for - * the MMU-enable code, required for resuming. On successful - * resume (indicated by a zero return code), we need to switch - * back to the correct page tables. - */ - ret = __cpu_suspend(arg, fn); - if (ret == 0) { - cpu_switch_mm(mm->pgd, mm); - local_flush_bp_all(); - local_flush_tlb_all(); - } +static int cpu_suspend_alloc_sp(void) +{ + void *ctx_ptr; + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL); - return ret; + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sync_cache_w(&sleep_save_sp); + return 0; } +early_initcall(cpu_suspend_alloc_sp); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index abff4e9aaee0..98aee3258398 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -24,9 +24,9 @@ #include <linux/timer.h> #include <linux/clocksource.h> #include <linux/irq.h> +#include <linux/sched_clock.h> #include <asm/thread_info.h> -#include <asm/sched_clock.h> #include <asm/stacktrace.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> @@ -120,6 +120,4 @@ void __init time_init(void) machine_desc->init_time(); else clocksource_of_init(); - - sched_clock_postinit(); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 486e12a0f26a..cab094c234ee 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -581,7 +581,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return regs->ARM_r0; case NR(set_tls): - thread->tp_value = regs->ARM_r0; + thread->tp_value[0] = regs->ARM_r0; if (tls_emu) return 0; if (has_tls_reg) { @@ -699,7 +699,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr) int reg = (instr >> 12) & 15; if (reg == 15) return 1; - regs->uregs[reg] = current_thread_info()->tp_value; + regs->uregs[reg] = current_thread_info()->tp_value[0]; regs->ARM_pc += 4; return 0; } diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 370e1a8af6ac..ebf5015508b5 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -41,9 +41,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST - default 4 if KVM_ARM_HOST - default 0 + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 help Static number of max supported virtual CPUs per VM. @@ -67,6 +67,4 @@ config KVM_ARM_TIMER ---help--- Adds support for the Architected Timers in virtual machines -source drivers/virtio/Kconfig - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 53c5ed83d16f..d99bee4950e5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I. AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) -kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +KVM := ../../../virt/kvm +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o -obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c deleted file mode 100644 index c55b6089e923..000000000000 --- a/arch/arm/kvm/arch_timer.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/of_irq.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> - -#include <clocksource/arm_arch_timer.h> -#include <asm/arch_timer.h> - -#include <asm/kvm_vgic.h> -#include <asm/kvm_arch_timer.h> - -static struct timecounter *timecounter; -static struct workqueue_struct *wqueue; -static struct kvm_irq_level timer_irq = { - .level = 1, -}; - -static cycle_t kvm_phys_timer_read(void) -{ - return timecounter->cc->read(timecounter->cc); -} - -static bool timer_is_armed(struct arch_timer_cpu *timer) -{ - return timer->armed; -} - -/* timer_arm: as in "arm the timer", not as in ARM the company */ -static void timer_arm(struct arch_timer_cpu *timer, u64 ns) -{ - timer->armed = true; - hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), - HRTIMER_MODE_ABS); -} - -static void timer_disarm(struct arch_timer_cpu *timer) -{ - if (timer_is_armed(timer)) { - hrtimer_cancel(&timer->timer); - cancel_work_sync(&timer->expired); - timer->armed = false; - } -} - -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vcpu->arch.timer_cpu.irq->irq, - vcpu->arch.timer_cpu.irq->level); -} - -static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) -{ - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; - - /* - * We disable the timer in the world switch and let it be - * handled by kvm_timer_sync_hwstate(). Getting a timer - * interrupt at this point is a sure sign of some major - * breakage. - */ - pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); - return IRQ_HANDLED; -} - -static void kvm_timer_inject_irq_work(struct work_struct *work) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); -} - -static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) -{ - struct arch_timer_cpu *timer; - timer = container_of(hrt, struct arch_timer_cpu, timer); - queue_work(wqueue, &timer->expired); - return HRTIMER_NORESTART; -} - -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); -} - -/** - * kvm_timer_sync_hwstate - sync timer state from cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. - */ -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; - - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - BUG_ON(timer_is_armed(timer)); - - if (cval <= now) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ - kvm_timer_inject_irq(vcpu); - return; - } - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now); - timer_arm(timer, ns); -} - -void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = kvm_timer_expire; - timer->irq = &timer_irq; -} - -static void kvm_timer_init_interrupt(void *info) -{ - enable_percpu_irq(timer_irq.irq, 0); -} - - -static int kvm_timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - kvm_timer_init_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(timer_irq.irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block kvm_timer_cpu_nb = { - .notifier_call = kvm_timer_cpu_notify, -}; - -static const struct of_device_id arch_timer_of_match[] = { - { .compatible = "arm,armv7-timer", }, - {}, -}; - -int kvm_timer_hyp_init(void) -{ - struct device_node *np; - unsigned int ppi; - int err; - - timecounter = arch_timer_get_timecounter(); - if (!timecounter) - return -ENODEV; - - np = of_find_matching_node(NULL, arch_timer_of_match); - if (!np) { - kvm_err("kvm_arch_timer: can't find DT node\n"); - return -ENODEV; - } - - ppi = irq_of_parse_and_map(np, 2); - if (!ppi) { - kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); - err = -EINVAL; - goto out; - } - - err = request_percpu_irq(ppi, kvm_arch_timer_handler, - "kvm guest timer", kvm_get_running_vcpus()); - if (err) { - kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", - ppi, err); - goto out; - } - - timer_irq.irq = ppi; - - err = register_cpu_notifier(&kvm_timer_cpu_nb); - if (err) { - kvm_err("Cannot register timer CPU notifier\n"); - goto out_free; - } - - wqueue = create_singlethread_workqueue("kvm_arch_timer"); - if (!wqueue) { - err = -ENOMEM; - goto out_free; - } - - kvm_info("%s IRQ%d\n", np->name, ppi); - on_each_cpu(kvm_timer_init_interrupt, NULL, 1); - - goto out; -out_free: - free_percpu_irq(ppi, kvm_get_running_vcpus()); -out: - of_node_put(np); - return err; -} - -void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer_disarm(timer); -} - -int kvm_timer_init(struct kvm *kvm) -{ - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); - kvm->arch.timer.enabled = 1; - } - - return 0; -} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ef1703b9587b..741f66a2edbd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *dummy) { - unsigned long long boot_pgd_ptr; - unsigned long long pgd_ptr; + phys_addr_t boot_pgd_ptr; + phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; @@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy) /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); - boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); - pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + pgd_ptr = kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 8eea97be1ed5..4a5199070430 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -180,6 +180,10 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c6_DFAR }, { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, NULL, reset_unknown, c6_IFAR }, + + /* PAR swapped by interrupt.S */ + { CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, + /* * DC{C,I,CI}SW operations: */ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3d74a0be47db..df4c82d47ad7 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f7793df62f58..16cd4ba5d7fd 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -49,6 +49,7 @@ __kvm_hyp_code_start: ENTRY(__kvm_tlb_flush_vmid_ipa) push {r2, r3} + dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0] mcrr p15, 6, r2, r3, c2 @ Write VTTBR @@ -291,6 +292,7 @@ THUMB( orr r2, r2, #PSR_T_BIT ) ldr r2, =BSYM(panic) msr ELR_hyp, r2 ldr r0, =\panic_str + clrex @ Clear exclusive monitor eret .endm @@ -414,6 +416,10 @@ guest_trap: mrcne p15, 4, r2, c6, c0, 4 @ HPFAR bne 3f + /* Preserve PAR */ + mrrc p15, 0, r0, r1, c7 @ PAR + push {r0, r1} + /* Resolve IPA using the xFAR */ mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR isb @@ -424,13 +430,20 @@ guest_trap: lsl r2, r2, #4 orr r2, r2, r1, lsl #24 + /* Restore PAR */ + pop {r0, r1} + mcrr p15, 0, r0, r1, c7 @ PAR + 3: load_vcpu @ Load VCPU pointer to r0 str r2, [r0, #VCPU_HPFAR] 1: mov r1, #ARM_EXCEPTION_HVC b __kvm_vcpu_return -4: pop {r0, r1, r2} @ Failed translation, return to guest +4: pop {r0, r1} @ Failed translation, return to guest + mcrr p15, 0, r0, r1, c7 @ PAR + clrex + pop {r0, r1, r2} eret /* @@ -456,6 +469,7 @@ switch_to_guest_vfp: pop {r3-r7} pop {r0-r2} + clrex eret #endif diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 3c8f2f0b4c5e..6f18695a09cb 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -302,11 +302,14 @@ vcpu .req r0 @ vcpu pointer always in r0 .endif mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL + mrrc p15, 0, r4, r5, c7 @ PAR .if \store_to_vcpu == 0 - push {r2} + push {r2,r4-r5} .else str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + add r12, vcpu, #CP15_OFFSET(c7_PAR) + strd r4, r5, [r12] .endif .endm @@ -319,12 +322,15 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 - pop {r2} + pop {r2,r4-r5} .else ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + add r12, vcpu, #CP15_OFFSET(c7_PAR) + ldrd r4, r5, [r12] .endif mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL + mcrr p15, 0, r4, r5, c7 @ PAR .if \read_from_vcpu == 0 pop {r2-r12} @@ -497,6 +503,10 @@ vcpu .req r0 @ vcpu pointer always in r0 add r5, vcpu, r4 strd r2, r3, [r5] + @ Ensure host CNTVCT == CNTPCT + mov r2, #0 + mcrr p15, 4, r2, r2, c14 @ CNTVOFF + 1: #endif @ Allow physical timer/counter access for the host diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 72a12f2171b2..b8e06b7a2833 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - if (kvm_vcpu_reg_is_pc(vcpu, rt)) { - /* IO memory trying to read/write pc */ - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - mmio->is_write = is_write; mmio->phys_addr = fault_ipa; mmio->len = len; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 84ba67b982c0..ca6bea4859b4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; - /* stage-2 pgd must be aligned to its size */ - VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 7ee5bb7a3667..86a693a02ba3 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * kvm_psci_call - handle PSCI call if r0 value is in range * @vcpu: Pointer to the VCPU struct * - * Handle PSCI calls from guests through traps from HVC or SMC instructions. + * Handle PSCI calls from guests through traps from HVC instructions. * The calling convention is similar to SMC calls to the secure world where * the function number is placed in r0 and this function returns true if the * function number specified in r0 is withing the PSCI range, and false diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554cd..b7840e7aa452 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -27,6 +27,8 @@ #include <asm/kvm_arm.h> #include <asm/kvm_coproc.h> +#include <kvm/arm_arch_timer.h> + /****************************************************************************** * Cortex-A15 Reset Values */ @@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level a15_vtimer_irq = { + .irq = 27, + .level = 1, +}; + /******************************************************************************* * Exported reset function @@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *cpu_reset; + const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: @@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return -EINVAL; cpu_reset = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); + cpu_vtimer_irq = &a15_vtimer_irq; break; default: return -ENODEV; @@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* Reset arch_timer context */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return 0; } diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c deleted file mode 100644 index 17c5ac7d10ed..000000000000 --- a/arch/arm/kvm/vgic.c +++ /dev/null @@ -1,1499 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <linux/irqchip/arm-gic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -/* - * How the whole thing works (courtesy of Christoffer Dall): - * - * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_state vgic - * bitmap (this bitmap is updated by both user land ioctls and guest - * mmio ops, and other in-kernel peripherals such as the - * arch. timers) and indicate the 'wire' state. - * - Every time the bitmap changes, the irq_pending_on_cpu oracle is - * recalculated - * - To calculate the oracle, we need info for each cpu from - * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_state & dist->irq_enable - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR - * registers, stored on each vcpu. We only keep one bit of - * information per interrupt, making sure that only one vcpu can - * accept the interrupt. - * - The same is true when injecting an interrupt, except that we only - * consider a single interrupt at a time. The irq_spi_cpu array - * contains the target CPU for each SPI. - * - * The handling of level interrupts adds some extra complexity. We - * need to track when the interrupt has been EOIed, so we can sample - * the 'line' again. This is achieved as such: - * - * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_active is set. As long as this bit is set, the line - * will be ignored for further interrupts. The interrupt is injected - * into the vcpu with the GICH_LR_EOI bit set (generate a - * maintenance interrupt on EOI). - * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_active. This allow the - * interrupt line to be sampled again. - */ - -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) - -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - -#define ACCESS_READ_VALUE (1 << 0) -#define ACCESS_READ_RAZ (0 << 0) -#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) -#define ACCESS_WRITE_IGNORED (0 << 1) -#define ACCESS_WRITE_SETBIT (1 << 1) -#define ACCESS_WRITE_CLEARBIT (2 << 1) -#define ACCESS_WRITE_VALUE (3 << 1) -#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) - -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_update_state(struct kvm *kvm); -static void vgic_kick_vcpus(struct kvm *kvm); -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); -static u32 vgic_nr_lr; - -static unsigned int vgic_maint_irq; - -static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, - int cpuid, u32 offset) -{ - offset >>= 2; - if (!offset) - return x->percpu[cpuid].reg; - else - return x->shared.reg + offset - 1; -} - -static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, - int cpuid, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->percpu[cpuid].reg_ul); - - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); -} - -static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val) -{ - unsigned long *reg; - - if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->percpu[cpuid].reg_ul; - } else { - reg = x->shared.reg_ul; - irq -= VGIC_NR_PRIVATE_IRQS; - } - - if (val) - set_bit(irq, reg); - else - clear_bit(irq, reg); -} - -static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) -{ - if (unlikely(cpuid >= VGIC_MAX_CPUS)) - return NULL; - return x->percpu[cpuid].reg_ul; -} - -static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) -{ - return x->shared.reg_ul; -} - -static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) -{ - offset >>= 2; - BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 4) - return x->percpu[cpuid] + offset; - else - return x->shared + offset - 8; -} - -#define VGIC_CFG_LEVEL 0 -#define VGIC_CFG_EDGE 1 - -static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int irq_val; - - irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); - return irq_val == VGIC_CFG_EDGE; -} - -static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); -} - -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); -} - -static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); -} - -static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - set_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - clear_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) -{ - return *((u32 *)mmio->data) & mask; -} - -static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) -{ - *((u32 *)mmio->data) = value & mask; -} - -/** - * vgic_reg_access - access vgic register - * @mmio: pointer to the data describing the mmio access - * @reg: pointer to the virtual backing of vgic distributor data - * @offset: least significant 2 bits used for word offset - * @mode: ACCESS_ mode (see defines above) - * - * Helper to make vgic register access easier using one of the access - * modes defined for vgic register access - * (read,raz,write-ignored,setbit,clearbit,write) - */ -static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode) -{ - int word_offset = (offset & 3) * 8; - u32 mask = (1UL << (mmio->len * 8)) - 1; - u32 regval; - - /* - * Any alignment fault should have been delivered to the guest - * directly (ARM ARM B3.12.7 "Prioritization of aborts"). - */ - - if (reg) { - regval = *reg; - } else { - BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); - regval = 0; - } - - if (mmio->is_write) { - u32 data = mmio_data_read(mmio, mask) << word_offset; - switch (ACCESS_WRITE_MASK(mode)) { - case ACCESS_WRITE_IGNORED: - return; - - case ACCESS_WRITE_SETBIT: - regval |= data; - break; - - case ACCESS_WRITE_CLEARBIT: - regval &= ~data; - break; - - case ACCESS_WRITE_VALUE: - regval = (regval & ~(mask << word_offset)) | data; - break; - } - *reg = regval; - } else { - switch (ACCESS_READ_MASK(mode)) { - case ACCESS_READ_RAZ: - regval = 0; - /* fall through */ - - case ACCESS_READ_VALUE: - mmio_data_write(mmio, mask, regval >> word_offset); - } - } -} - -static bool handle_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - u32 word_offset = offset & 3; - - switch (offset & ~3) { - case 0: /* CTLR */ - reg = vcpu->kvm->arch.vgic.enabled; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = reg & 1; - vgic_update_state(vcpu->kvm); - return true; - } - break; - - case 4: /* TYPER */ - reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (VGIC_NR_IRQS >> 5) - 1; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - - case 8: /* IIDR */ - reg = 0x4B00043B; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - } - - return false; -} - -static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - if (offset < 4) /* Force SGI enabled */ - *reg |= 0xffff; - vgic_retire_disabled_irqs(vcpu); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -#define GICD_ITARGETSR_SIZE 32 -#define GICD_CPUTARGETS_BITS 8 -#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) -static u32 vgic_get_target_reg(struct kvm *kvm, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 val = 0; - - irq -= VGIC_NR_PRIVATE_IRQS; - - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - if (test_bit(irq + i, bmap)) - val |= 1 << (c + i * 8); - } - - return val; -} - -static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 target; - - irq -= VGIC_NR_PRIVATE_IRQS; - - /* - * Pick the LSB in each byte. This ensures we target exactly - * one vcpu per IRQ. If the byte is null, assume we target - * CPU0. - */ - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { - int shift = i * GICD_CPUTARGETS_BITS; - target = ffs((val >> shift) & 0xffU); - target = target ? (target - 1) : 0; - dist->irq_spi_cpu[irq + i] = target; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - if (c == target) - set_bit(irq + i, bmap); - else - clear_bit(irq + i, bmap); - } - } -} - -static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - /* We treat the banked interrupts targets as read-only */ - if (offset < 32) { - u32 roreg = 1 << vcpu->vcpu_id; - roreg |= roreg << 8; - roreg |= roreg << 16; - - vgic_reg_access(mmio, &roreg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static u32 vgic_cfg_expand(u16 val) -{ - u32 res = 0; - int i; - - /* - * Turn a 16bit value like abcd...mnop into a 32bit word - * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. - */ - for (i = 0; i < 16; i++) - res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); - - return res; -} - -static u16 vgic_cfg_compress(u32 val) -{ - u16 res = 0; - int i; - - /* - * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like - * abcd...mnop which is what we really care about. - */ - for (i = 0; i < 16; i++) - res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; - - return res; -} - -/* - * The distributor uses 2 bits per IRQ for the CFG register, but the - * LSB is always 0. As such, we only keep the upper bit, and use the - * two above functions to compress/expand the bits - */ -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 val; - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - if (offset & 2) - val = *reg >> 16; - else - val = *reg & 0xffff; - - val = vgic_cfg_expand(val); - vgic_reg_access(mmio, &val, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - if (offset < 4) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ - return false; - } - - val = vgic_cfg_compress(val); - if (offset & 2) { - *reg &= 0xffff; - *reg |= val << 16; - } else { - *reg &= 0xffff << 16; - *reg |= val; - } - } - - return false; -} - -static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_dispatch_sgi(vcpu, reg); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -/* - * I would have liked to use the kvm_bus_io_*() API instead, but it - * cannot cope with banked registers (only the VM pointer is passed - * around, and we need the vcpu). One of these days, someone please - * fix it! - */ -struct mmio_range { - phys_addr_t base; - unsigned long len; - bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); -}; - -static const struct mmio_range vgic_ranges[] = { - { - .base = GIC_DIST_CTRL, - .len = 12, - .handle_mmio = handle_mmio_misc, - }, - { - .base = GIC_DIST_IGROUP, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ENABLE_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_set_enable_reg, - }, - { - .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_clear_enable_reg, - }, - { - .base = GIC_DIST_PENDING_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_set_pending_reg, - }, - { - .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_clear_pending_reg, - }, - { - .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_NR_IRQS / 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_PRI, - .len = VGIC_NR_IRQS, - .handle_mmio = handle_mmio_priority_reg, - }, - { - .base = GIC_DIST_TARGET, - .len = VGIC_NR_IRQS, - .handle_mmio = handle_mmio_target_reg, - }, - { - .base = GIC_DIST_CONFIG, - .len = VGIC_NR_IRQS / 4, - .handle_mmio = handle_mmio_cfg_reg, - }, - { - .base = GIC_DIST_SOFTINT, - .len = 4, - .handle_mmio = handle_mmio_sgi_reg, - }, - {} -}; - -static const -struct mmio_range *find_matching_range(const struct mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t base) -{ - const struct mmio_range *r = ranges; - phys_addr_t addr = mmio->phys_addr - base; - - while (r->len) { - if (addr >= r->base && - (addr + mmio->len) <= (r->base + r->len)) - return r; - r++; - } - - return NULL; -} - -/** - * vgic_handle_mmio - handle an in-kernel MMIO access - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - const struct mmio_range *range; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long base = dist->vgic_dist_base; - bool updated_state; - unsigned long offset; - - if (!irqchip_in_kernel(vcpu->kvm) || - mmio->phys_addr < base || - (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - range = find_matching_range(vgic_ranges, mmio, base); - if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; - } - - spin_lock(&vcpu->kvm->arch.vgic.lock); - offset = mmio->phys_addr - range->base - base; - updated_state = range->handle_mmio(vcpu, mmio, offset); - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); - kvm_handle_mmio_return(vcpu, run); - - if (updated_state) - vgic_kick_vcpus(vcpu->kvm); - - return true; -} - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int nrcpus = atomic_read(&kvm->online_vcpus); - u8 target_cpus; - int sgi, mode, c, vcpu_id; - - vcpu_id = vcpu->vcpu_id; - - sgi = reg & 0xf; - target_cpus = (reg >> 16) & 0xff; - mode = (reg >> 24) & 3; - - switch (mode) { - case 0: - if (!target_cpus) - return; - - case 1: - target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; - break; - - case 2: - target_cpus = 1 << vcpu_id; - break; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (target_cpus & 1) { - /* Flag the SGI as pending */ - vgic_dist_irq_set(vcpu, sgi); - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; - kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); - } - - target_cpus >>= 1; - } -} - -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pending, *enabled, *pend_percpu, *pend_shared; - unsigned long pending_private, pending_shared; - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; - pend_shared = vcpu->arch.vgic_cpu.pending_shared; - - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - - pending = vgic_bitmap_get_shared_map(&dist->irq_state); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); - bitmap_and(pend_shared, pend_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - VGIC_NR_SHARED_IRQS); - - pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); - return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < VGIC_NR_SHARED_IRQS); -} - -/* - * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. - */ -static void vgic_update_state(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int c; - - if (!dist->enabled) { - set_bit(0, &dist->irq_pending_on_cpu); - return; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); - set_bit(c, &dist->irq_pending_on_cpu); - } - } -} - -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) - -/* - * An interrupt may have been disabled after being made pending on the - * CPU interface (the classic case is a timer running while we're - * rebooting the guest - the interrupt would kick as soon as the CPU - * interface gets enabled, with deadly consequences). - * - * The solution is to examine already active LRs, and check the - * interrupt is still enabled. If not, just retire it. - */ -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int lr; - - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - clear_bit(lr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); - } - } -} - -/* - * Queue an interrupt to a CPU virtual interface. Return true on success, - * or false if it wasn't possible to queue it. - */ -static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int lr; - - /* Sanitize the input... */ - BUG_ON(sgi_source_id & ~7); - BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= VGIC_NR_IRQS); - - kvm_debug("Queue IRQ%d\n", irq); - - lr = vgic_cpu->vgic_irq_lr_map[irq]; - - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; - } - - /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) - return false; - - kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); - - if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; - - return true; -} - -static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long sources; - int vcpu_id = vcpu->vcpu_id; - int c; - - sources = dist->irq_sgi_sources[vcpu_id][irq]; - - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { - if (vgic_queue_irq(vcpu, c, irq)) - clear_bit(c, &sources); - } - - dist->irq_sgi_sources[vcpu_id][irq] = sources; - - /* - * If the sources bitmap has been cleared it means that we - * could queue all the SGIs onto link registers (see the - * clear_bit above), and therefore we are done with them in - * our emulated gic and can get rid of them. - */ - if (!sources) { - vgic_dist_irq_clear(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) -{ - if (vgic_irq_is_active(vcpu, irq)) - return true; /* level interrupt, already queued */ - - if (vgic_queue_irq(vcpu, 0, irq)) { - if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - } else { - vgic_irq_set_active(vcpu, irq); - } - - return true; - } - - return false; -} - -/* - * Fill the list registers with pending interrupts before running the - * guest. - */ -static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i, vcpu_id; - int overflow = 0; - - vcpu_id = vcpu->vcpu_id; - - /* - * We may not have any pending interrupt, or the interrupts - * may have been serviced from another vcpu. In all cases, - * move along. - */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); - goto epilog; - } - - /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { - if (!vgic_queue_sgi(vcpu, i)) - overflow = 1; - } - - /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { - if (!vgic_queue_hwirq(vcpu, i)) - overflow = 1; - } - - /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { - if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) - overflow = 1; - } - -epilog: - if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; - } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; - /* - * We're about to run this VCPU, and we've consumed - * everything the distributor had in store for - * us. Claim we don't have anything pending. We'll - * adjust that if needed while exiting. - */ - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); - } -} - -static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - bool level_pending = false; - - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { - /* - * Some level interrupts have been EOIed. Clear their - * active bit. - */ - int lr, irq; - - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, - vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; - - /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); - level_pending = true; - } else { - vgic_cpu_irq_clear(vcpu, irq); - } - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; - } - } - - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; - - return level_pending; -} - -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ -static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int lr, pending; - bool level_pending; - - level_pending = vgic_process_maintenance(vcpu); - - /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, - vgic_cpu->nr_lr) { - int irq; - - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) - continue; - - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; - - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - } - - /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, - vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); -} - -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_flush_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); -} - -static void vgic_kick_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int c; - - /* - * We've injected an interrupt, time to find out who deserves - * a good kick... - */ - kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) - kvm_vcpu_kick(vcpu); - } -} - -static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) -{ - int is_edge = vgic_irq_is_edge(vcpu, irq); - int state = vgic_dist_irq_is_pending(vcpu, irq); - - /* - * Only inject an interrupt if: - * - edge triggered and we have a rising edge - * - level triggered and we change level - */ - if (is_edge) - return level > state; - else - return level != state; -} - -static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int is_edge, is_level; - int enabled; - bool ret = true; - - spin_lock(&dist->lock); - - vcpu = kvm_get_vcpu(kvm, cpuid); - is_edge = vgic_irq_is_edge(vcpu, irq_num); - is_level = !is_edge; - - if (!vgic_validate_injection(vcpu, irq_num, level)) { - ret = false; - goto out; - } - - if (irq_num >= VGIC_NR_PRIVATE_IRQS) { - cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; - vcpu = kvm_get_vcpu(kvm, cpuid); - } - - kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - - if (level) - vgic_dist_irq_set(vcpu, irq_num); - else - vgic_dist_irq_clear(vcpu, irq_num); - - enabled = vgic_irq_is_enabled(vcpu, irq_num); - - if (!enabled) { - ret = false; - goto out; - } - - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { - /* - * Level interrupt in progress, will be picked up - * when EOId. - */ - ret = false; - goto out; - } - - if (level) { - vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, &dist->irq_pending_on_cpu); - } - -out: - spin_unlock(&dist->lock); - - return ret; -} - -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: activates an interrupt - * false: deactivates an interrupt - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level) -{ - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) - vgic_kick_vcpus(kvm); - - return 0; -} - -static irqreturn_t vgic_maintenance_handler(int irq, void *data) -{ - /* - * We cannot rely on the vgic maintenance interrupt to be - * delivered synchronously. This means we can only use it to - * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). - */ - return IRQ_HANDLED; -} - -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) - return -EBUSY; - - for (i = 0; i < VGIC_NR_IRQS; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } - - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_vmcr = 0; - - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ - - return 0; -} - -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic_maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -int kvm_vgic_hyp_init(void) -{ - int ret; - struct resource vctrl_res; - struct resource vcpu_res; - - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); - if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); - return -ENODEV; - } - - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } - - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); - goto out; - } - - ret = register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic_vcpu_base = vcpu_res.start; - - goto out; - -out_unmap: - iounmap(vgic_vctrl_base); -out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); - return ret; -} - -int kvm_vgic_init(struct kvm *kvm) -{ - int ret = 0, i; - - mutex_lock(&kvm->lock); - - if (vgic_initialized(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; - } - - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) - vgic_set_target_reg(kvm, 0, i); - - kvm_timer_init(kvm); - kvm->arch.vgic.ready = true; -out: - mutex_unlock(&kvm->lock); - return ret; -} - -int kvm_vgic_create(struct kvm *kvm) -{ - int ret = 0; - - mutex_lock(&kvm->lock); - - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { - ret = -EEXIST; - goto out; - } - - spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - -out: - mutex_unlock(&kvm->lock); - return ret; -} - -static bool vgic_ioaddr_overlap(struct kvm *kvm) -{ - phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; - phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; - - if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) - return 0; - if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || - (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) - return -EBUSY; - return 0; -} - -static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t size) -{ - int ret; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - if (addr + size < addr) - return -EINVAL; - - ret = vgic_ioaddr_overlap(kvm); - if (ret) - return ret; - *ioaddr = addr; - return ret; -} - -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - int r = 0; - struct vgic_dist *vgic = &kvm->arch.vgic; - - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - - mutex_lock(&kvm->lock); - switch (type) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - addr, KVM_VGIC_V2_DIST_SIZE); - break; - case KVM_VGIC_V2_ADDR_TYPE_CPU: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - addr, KVM_VGIC_V2_CPU_SIZE); - break; - default: - r = -ENODEV; - } - - mutex_unlock(&kvm->lock); - return r; -} diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c index 9eb574397ee1..4aad93d54d6f 100644 --- a/arch/arm/mach-at91/at91rm9200.c +++ b/arch/arm/mach-at91/at91rm9200.c @@ -11,6 +11,7 @@ */ #include <linux/module.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/mach/arch.h> @@ -304,7 +305,7 @@ static void at91rm9200_idle(void) at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK); } -static void at91rm9200_restart(char mode, const char *cmd) +static void at91rm9200_restart(enum reboot_mode reboot_mode, const char *cmd) { /* * Perform a hardware reset with the use of the Watchdog timer. diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h index f6de36aefe85..dc6e2f5f804d 100644 --- a/arch/arm/mach-at91/generic.h +++ b/arch/arm/mach-at91/generic.h @@ -10,6 +10,7 @@ #include <linux/clkdev.h> #include <linux/of.h> +#include <linux/reboot.h> /* Map io */ extern void __init at91_map_io(void); @@ -60,8 +61,8 @@ extern void at91sam9_idle(void); /* reset */ extern void at91_ioremap_rstc(u32 base_addr); -extern void at91sam9_alt_restart(char, const char *); -extern void at91sam9g45_restart(char, const char *); +extern void at91sam9_alt_restart(enum reboot_mode, const char *); +extern void at91sam9g45_restart(enum reboot_mode, const char *); /* shutdown */ extern void at91_ioremap_shdwc(u32 base_addr); diff --git a/arch/arm/mach-bcm2835/bcm2835.c b/arch/arm/mach-bcm2835/bcm2835.c index 740fa9ebe249..40686d7ef500 100644 --- a/arch/arm/mach-bcm2835/bcm2835.c +++ b/arch/arm/mach-bcm2835/bcm2835.c @@ -53,7 +53,7 @@ static void bcm2835_setup_restart(void) WARN(!wdt_regs, "failed to remap watchdog regs"); } -static void bcm2835_restart(char mode, const char *cmd) +static void bcm2835_restart(enum reboot_mode mode, const char *cmd) { u32 val; @@ -91,7 +91,7 @@ static void bcm2835_power_off(void) writel_relaxed(val, wdt_regs + PM_RSTS); /* Continue with normal reset mechanism */ - bcm2835_restart(0, ""); + bcm2835_restart(REBOOT_HARD, ""); } static struct map_desc io_map __initdata = { diff --git a/arch/arm/mach-clps711x/common.c b/arch/arm/mach-clps711x/common.c index f6d1746366d4..4ca2f3ca2de4 100644 --- a/arch/arm/mach-clps711x/common.c +++ b/arch/arm/mach-clps711x/common.c @@ -384,7 +384,7 @@ void __init clps711x_timer_init(void) setup_irq(IRQ_TC2OI, &clps711x_timer_irq); } -void clps711x_restart(char mode, const char *cmd) +void clps711x_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0); } diff --git a/arch/arm/mach-clps711x/common.h b/arch/arm/mach-clps711x/common.h index 2a22f4c6cc75..9a6767bfdc47 100644 --- a/arch/arm/mach-clps711x/common.h +++ b/arch/arm/mach-clps711x/common.h @@ -4,6 +4,8 @@ * Common bits. */ +#include <linux/reboot.h> + #define CLPS711X_NR_IRQS (33) #define CLPS711X_NR_GPIO (4 * 8 + 3) #define CLPS711X_GPIO(prt, bit) ((prt) * 8 + (bit)) @@ -12,5 +14,5 @@ extern void clps711x_map_io(void); extern void clps711x_init_irq(void); extern void clps711x_timer_init(void); extern void clps711x_handle_irq(struct pt_regs *regs); -extern void clps711x_restart(char mode, const char *cmd); +extern void clps711x_restart(enum reboot_mode mode, const char *cmd); extern void clps711x_init_early(void); diff --git a/arch/arm/mach-cns3xxx/core.h b/arch/arm/mach-cns3xxx/core.h index b23b17b4da10..5218b6198dc2 100644 --- a/arch/arm/mach-cns3xxx/core.h +++ b/arch/arm/mach-cns3xxx/core.h @@ -11,6 +11,8 @@ #ifndef __CNS3XXX_CORE_H #define __CNS3XXX_CORE_H +#include <linux/reboot.h> + extern void cns3xxx_timer_init(void); #ifdef CONFIG_CACHE_L2X0 @@ -22,6 +24,6 @@ static inline void cns3xxx_l2x0_init(void) {} void __init cns3xxx_map_io(void); void __init cns3xxx_init_irq(void); void cns3xxx_power_off(void); -void cns3xxx_restart(char, const char *); +void cns3xxx_restart(enum reboot_mode, const char *); #endif /* __CNS3XXX_CORE_H */ diff --git a/arch/arm/mach-cns3xxx/pm.c b/arch/arm/mach-cns3xxx/pm.c index 79e3d47aad65..fb38c726e987 100644 --- a/arch/arm/mach-cns3xxx/pm.c +++ b/arch/arm/mach-cns3xxx/pm.c @@ -89,7 +89,7 @@ void cns3xxx_pwr_soft_rst(unsigned int block) } EXPORT_SYMBOL(cns3xxx_pwr_soft_rst); -void cns3xxx_restart(char mode, const char *cmd) +void cns3xxx_restart(enum reboot_mode mode, const char *cmd) { /* * To reset, we hit the on-board reset register diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index a075b3e0c5c7..e026b19b23ea 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -40,6 +40,7 @@ config ARCH_DAVINCI_DA850 bool "DA850/OMAP-L138/AM18x based system" select ARCH_DAVINCI_DA8XX select ARCH_HAS_CPUFREQ + select CPU_FREQ_TABLE select CP_INTC config ARCH_DAVINCI_DA8XX diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 8a24b6c6339f..bea6793a7ede 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -985,7 +985,6 @@ static struct regulator_init_data tps65070_regulator_data[] = { static struct touchscreen_init_data tps6507x_touchscreen_data = { .poll_period = 30, /* ms between touch samples */ .min_pressure = 0x30, /* minimum pressure to trigger touch */ - .vref = 0, /* turn off vref when not using A/D */ .vendor = 0, /* /sys/class/input/input?/id/vendor */ .product = 65070, /* /sys/class/input/input?/id/product */ .version = 0x100, /* /sys/class/input/input?/id/version */ diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 4d6933848abf..a0d4f6038b60 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -1004,7 +1004,7 @@ static const struct da850_opp da850_opp_96 = { #define OPP(freq) \ { \ - .index = (unsigned int) &da850_opp_##freq, \ + .driver_data = (unsigned int) &da850_opp_##freq, \ .frequency = freq * 1000, \ } @@ -1016,7 +1016,7 @@ static struct cpufreq_frequency_table da850_freq_table[] = { OPP(200), OPP(96), { - .index = 0, + .driver_data = 0, .frequency = CPUFREQ_TABLE_END, }, }; @@ -1044,7 +1044,7 @@ static int da850_set_voltage(unsigned int index) if (!cvdd) return -ENODEV; - opp = (struct da850_opp *) cpufreq_info.freq_table[index].index; + opp = (struct da850_opp *) cpufreq_info.freq_table[index].driver_data; return regulator_set_voltage(cvdd, opp->cvdd_min, opp->cvdd_max); } @@ -1125,7 +1125,7 @@ static int da850_set_pll0rate(struct clk *clk, unsigned long index) struct pll_data *pll = clk->pll_data; int ret; - opp = (struct da850_opp *) cpufreq_info.freq_table[index].index; + opp = (struct da850_opp *) cpufreq_info.freq_table[index].driver_data; prediv = opp->prediv; mult = opp->mult; postdiv = opp->postdiv; diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index eb254fe861ac..71a46a348761 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -16,6 +16,7 @@ #include <linux/serial_8250.h> #include <linux/ahci_platform.h> #include <linux/clk.h> +#include <linux/reboot.h> #include <mach/cputype.h> #include <mach/common.h> @@ -366,7 +367,7 @@ static struct platform_device da8xx_wdt_device = { .resource = da8xx_watchdog_resources, }; -void da8xx_restart(char mode, const char *cmd) +void da8xx_restart(enum reboot_mode mode, const char *cmd) { struct device *dev; diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 90b83d00fe2b..111573c0aad1 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -13,6 +13,7 @@ #include <linux/platform_device.h> #include <linux/dma-mapping.h> #include <linux/io.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <linux/platform_data/i2c-davinci.h> @@ -307,7 +308,7 @@ struct platform_device davinci_wdt_device = { .resource = wdt_resources, }; -void davinci_restart(char mode, const char *cmd) +void davinci_restart(enum reboot_mode mode, const char *cmd) { davinci_watchdog_reset(&davinci_wdt_device); } diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h index b124b77c90c5..cce316b92c06 100644 --- a/arch/arm/mach-davinci/include/mach/common.h +++ b/arch/arm/mach-davinci/include/mach/common.h @@ -14,6 +14,7 @@ #include <linux/compiler.h> #include <linux/types.h> +#include <linux/reboot.h> extern void davinci_timer_init(void); @@ -81,7 +82,7 @@ extern struct davinci_soc_info davinci_soc_info; extern void davinci_common_init(struct davinci_soc_info *soc_info); extern void davinci_init_ide(void); -void davinci_restart(char mode, const char *cmd); +void davinci_restart(enum reboot_mode mode, const char *cmd); void davinci_init_late(void); #ifdef CONFIG_DAVINCI_RESET_CLOCKS diff --git a/arch/arm/mach-davinci/include/mach/da8xx.h b/arch/arm/mach-davinci/include/mach/da8xx.h index 3c797e2272f8..7b41a5e9bc31 100644 --- a/arch/arm/mach-davinci/include/mach/da8xx.h +++ b/arch/arm/mach-davinci/include/mach/da8xx.h @@ -17,6 +17,7 @@ #include <linux/davinci_emac.h> #include <linux/spi/spi.h> #include <linux/platform_data/davinci_asp.h> +#include <linux/reboot.h> #include <linux/videodev2.h> #include <mach/serial.h> @@ -106,7 +107,7 @@ int da850_register_vpif_display (struct vpif_display_config *display_config); int da850_register_vpif_capture (struct vpif_capture_config *capture_config); -void da8xx_restart(char mode, const char *cmd); +void da8xx_restart(enum reboot_mode mode, const char *cmd); void da8xx_rproc_reserve_cma(void); int da8xx_register_rproc(void); diff --git a/arch/arm/mach-davinci/include/mach/tnetv107x.h b/arch/arm/mach-davinci/include/mach/tnetv107x.h index 366e975effa8..16314c64f755 100644 --- a/arch/arm/mach-davinci/include/mach/tnetv107x.h +++ b/arch/arm/mach-davinci/include/mach/tnetv107x.h @@ -35,6 +35,7 @@ #include <linux/serial_8250.h> #include <linux/input/matrix_keypad.h> #include <linux/mfd/ti_ssp.h> +#include <linux/reboot.h> #include <linux/platform_data/mmc-davinci.h> #include <linux/platform_data/mtd-davinci.h> @@ -54,7 +55,7 @@ extern struct platform_device tnetv107x_serial_device; extern void tnetv107x_init(void); extern void tnetv107x_devices_init(struct tnetv107x_device_info *); extern void tnetv107x_irq_init(void); -void tnetv107x_restart(char mode, const char *cmd); +void tnetv107x_restart(enum reboot_mode mode, const char *cmd); #endif diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index bad361ec1666..7a55b5c95971 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -18,8 +18,8 @@ #include <linux/clk.h> #include <linux/err.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> diff --git a/arch/arm/mach-davinci/tnetv107x.c b/arch/arm/mach-davinci/tnetv107x.c index 3b2a70d43efa..4545667ecd3c 100644 --- a/arch/arm/mach-davinci/tnetv107x.c +++ b/arch/arm/mach-davinci/tnetv107x.c @@ -19,6 +19,7 @@ #include <linux/io.h> #include <linux/err.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <asm/mach/map.h> @@ -730,7 +731,7 @@ static void tnetv107x_watchdog_reset(struct platform_device *pdev) __raw_writel(1, ®s->kick); } -void tnetv107x_restart(char mode, const char *cmd) +void tnetv107x_restart(enum reboot_mode mode, const char *cmd) { tnetv107x_watchdog_reset(&tnetv107x_wdt_device); } diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 2a9443d04d92..00247c771313 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -381,7 +381,7 @@ void __init dove_init(void) dove_xor1_init(); } -void dove_restart(char mode, const char *cmd) +void dove_restart(enum reboot_mode mode, const char *cmd) { /* * Enable soft reset to assert RSTOUTn. diff --git a/arch/arm/mach-dove/common.h b/arch/arm/mach-dove/common.h index e86347928b67..1d725224d146 100644 --- a/arch/arm/mach-dove/common.h +++ b/arch/arm/mach-dove/common.h @@ -11,6 +11,8 @@ #ifndef __ARCH_DOVE_COMMON_H #define __ARCH_DOVE_COMMON_H +#include <linux/reboot.h> + struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -42,6 +44,6 @@ void dove_spi1_init(void); void dove_i2c_init(void); void dove_sdio0_init(void); void dove_sdio1_init(void); -void dove_restart(char, const char *); +void dove_restart(enum reboot_mode, const char *); #endif diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c index b13cc74114db..68ac934d4565 100644 --- a/arch/arm/mach-ebsa110/core.c +++ b/arch/arm/mach-ebsa110/core.c @@ -116,7 +116,7 @@ static void __init ebsa110_map_io(void) iotable_init(ebsa110_io_desc, ARRAY_SIZE(ebsa110_io_desc)); } -static void __iomem *ebsa110_ioremap_caller(unsigned long cookie, size_t size, +static void __iomem *ebsa110_ioremap_caller(phys_addr_t cookie, size_t size, unsigned int flags, void *caller) { return (void __iomem *)cookie; @@ -311,7 +311,7 @@ static int __init ebsa110_init(void) arch_initcall(ebsa110_init); -static void ebsa110_restart(char mode, const char *cmd) +static void ebsa110_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0x80000000); } @@ -321,7 +321,6 @@ MACHINE_START(EBSA110, "EBSA110") .atag_offset = 0x400, .reserve_lp0 = 1, .reserve_lp2 = 1, - .restart_mode = 's', .map_io = ebsa110_map_io, .init_early = ebsa110_init_early, .init_irq = ebsa110_init_irq, diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index c49ed3dc1aea..df8612fbbc9c 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -35,6 +35,7 @@ #include <linux/spi/spi.h> #include <linux/export.h> #include <linux/irqchip/arm-vic.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <linux/platform_data/video-ep93xx.h> @@ -921,7 +922,7 @@ void __init ep93xx_init_devices(void) gpio_led_register_device(-1, &ep93xx_led_data); } -void ep93xx_restart(char mode, const char *cmd) +void ep93xx_restart(enum reboot_mode mode, const char *cmd) { /* * Set then clear the SWRST bit to initiate a software reset diff --git a/arch/arm/mach-ep93xx/include/mach/platform.h b/arch/arm/mach-ep93xx/include/mach/platform.h index a14e1b37beff..e256e0baec2e 100644 --- a/arch/arm/mach-ep93xx/include/mach/platform.h +++ b/arch/arm/mach-ep93xx/include/mach/platform.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include <linux/reboot.h> + struct i2c_gpio_platform_data; struct i2c_board_info; struct spi_board_info; @@ -55,7 +57,7 @@ void ep93xx_ide_release_gpio(struct platform_device *pdev); void ep93xx_init_devices(void); extern void ep93xx_timer_init(void); -void ep93xx_restart(char, const char *); +void ep93xx_restart(enum reboot_mode, const char *); void ep93xx_init_late(void); #ifdef CONFIG_CRUNCH diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 2d503b3684c4..f5f65b58181e 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -93,7 +93,7 @@ config SOC_EXYNOS5440 default y depends on ARCH_EXYNOS5 select ARCH_HAS_OPP - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select AUTO_ZRELADDR select MIGHT_HAVE_PCI select PCI_DOMAINS if PCI diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index 2c655db4b78e..164685bd25c8 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -285,12 +285,12 @@ static struct map_desc exynos5440_iodesc0[] __initdata = { }, }; -void exynos4_restart(char mode, const char *cmd) +void exynos4_restart(enum reboot_mode mode, const char *cmd) { __raw_writel(0x1, S5P_SWRESET); } -void exynos5_restart(char mode, const char *cmd) +void exynos5_restart(enum reboot_mode mode, const char *cmd) { struct device_node *np; u32 val; diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index 38d45fd23be4..3e156bcddcb4 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -12,6 +12,7 @@ #ifndef __ARCH_ARM_MACH_EXYNOS_COMMON_H #define __ARCH_ARM_MACH_EXYNOS_COMMON_H +#include <linux/reboot.h> #include <linux/of.h> void mct_init(void __iomem *base, int irq_g0, int irq_l0, int irq_l1); @@ -20,8 +21,8 @@ extern unsigned long xxti_f, xusbxti_f; struct map_desc; void exynos_init_io(void); -void exynos4_restart(char mode, const char *cmd); -void exynos5_restart(char mode, const char *cmd); +void exynos4_restart(enum reboot_mode mode, const char *cmd); +void exynos5_restart(enum reboot_mode mode, const char *cmd); void exynos_init_late(void); /* ToDo: remove these after migrating legacy exynos4 platforms to dt */ diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c index 6987a09ec219..9669cc0b6318 100644 --- a/arch/arm/mach-footbridge/cats-hw.c +++ b/arch/arm/mach-footbridge/cats-hw.c @@ -86,7 +86,7 @@ fixup_cats(struct tag *tags, char **cmdline, struct meminfo *mi) MACHINE_START(CATS, "Chalice-CATS") /* Maintainer: Philip Blundell */ .atag_offset = 0x100, - .restart_mode = 's', + .reboot_mode = REBOOT_SOFT, .fixup = fixup_cats, .map_io = footbridge_map_io, .init_irq = footbridge_init_irq, diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index a42b369bc439..2739ca2c1334 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -198,9 +198,9 @@ void __init footbridge_map_io(void) } } -void footbridge_restart(char mode, const char *cmd) +void footbridge_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into the ROM */ soft_restart(0x41000000); } else { diff --git a/arch/arm/mach-footbridge/common.h b/arch/arm/mach-footbridge/common.h index a846e50a07b8..56607b3a773e 100644 --- a/arch/arm/mach-footbridge/common.h +++ b/arch/arm/mach-footbridge/common.h @@ -1,3 +1,4 @@ +#include <linux/reboot.h> extern void footbridge_timer_init(void); extern void isa_timer_init(void); @@ -8,4 +9,4 @@ extern void footbridge_map_io(void); extern void footbridge_init_irq(void); extern void isa_init_irq(unsigned int irq); -extern void footbridge_restart(char, const char *); +extern void footbridge_restart(enum reboot_mode, const char *); diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index 90ea23fdce4c..1fd2cf097e30 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -634,9 +634,9 @@ fixup_netwinder(struct tag *tags, char **cmdline, struct meminfo *mi) #endif } -static void netwinder_restart(char mode, const char *cmd) +static void netwinder_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into the ROM */ soft_restart(0x41000000); } else { diff --git a/arch/arm/mach-highbank/core.h b/arch/arm/mach-highbank/core.h index 3f65206a9b92..aea1ec5ab6f8 100644 --- a/arch/arm/mach-highbank/core.h +++ b/arch/arm/mach-highbank/core.h @@ -1,8 +1,10 @@ #ifndef __HIGHBANK_CORE_H #define __HIGHBANK_CORE_H +#include <linux/reboot.h> + extern void highbank_set_cpu_jump(int cpu, void *jump_addr); -extern void highbank_restart(char, const char *); +extern void highbank_restart(enum reboot_mode, const char *); extern void __iomem *scu_base_addr; #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-highbank/system.c b/arch/arm/mach-highbank/system.c index 37d8384dcf19..2df5870b7583 100644 --- a/arch/arm/mach-highbank/system.c +++ b/arch/arm/mach-highbank/system.c @@ -15,13 +15,14 @@ */ #include <linux/io.h> #include <asm/proc-fns.h> +#include <linux/reboot.h> #include "core.h" #include "sysregs.h" -void highbank_restart(char mode, const char *cmd) +void highbank_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 'h') + if (mode == REBOOT_HARD) highbank_set_pwr_hard_reset(); else highbank_set_pwr_soft_reset(); diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 60661a4b0e24..f54656091a9d 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -108,7 +108,6 @@ config SOC_IMX25 select ARCH_MXC_IOMUX_V3 select COMMON_CLK select CPU_ARM926T - select HAVE_CAN_FLEXCAN if CAN select MXC_AVIC config SOC_IMX27 @@ -134,7 +133,6 @@ config SOC_IMX35 select ARCH_MXC_IOMUX_V3 select COMMON_CLK select CPU_V6K - select HAVE_CAN_FLEXCAN if CAN select HAVE_EPIT select MXC_AVIC select SMP_ON_UP if SMP @@ -774,7 +772,6 @@ comment "Device tree only" config SOC_IMX53 bool "i.MX53 support" - select HAVE_CAN_FLEXCAN if CAN select HAVE_IMX_SRC select IMX_HAVE_PLATFORM_IMX2_WDT select PINCTRL @@ -797,7 +794,6 @@ config SOC_IMX6Q select CPU_V7 select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if LOCAL_TIMERS - select HAVE_CAN_FLEXCAN if CAN select HAVE_IMX_ANATOP select HAVE_IMX_GPC select HAVE_IMX_MMDC diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index ee78847abf47..cb6c838b63ed 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -11,6 +11,8 @@ #ifndef __ASM_ARCH_MXC_COMMON_H__ #define __ASM_ARCH_MXC_COMMON_H__ +#include <linux/reboot.h> + struct platform_device; struct pt_regs; struct clk; @@ -71,7 +73,7 @@ extern int mx53_clocks_init_dt(void); extern struct platform_device *mxc_register_gpio(char *name, int id, resource_size_t iobase, resource_size_t iosize, int irq, int irq_high); extern void mxc_set_cpu_type(unsigned int type); -extern void mxc_restart(char, const char *); +extern void mxc_restart(enum reboot_mode, const char *); extern void mxc_arch_reset_init(void __iomem *); extern void mxc_arch_reset_init_dt(void); extern int mx53_revision(void); diff --git a/arch/arm/mach-imx/devices-imx25.h b/arch/arm/mach-imx/devices-imx25.h index 0d2922bc575c..769563fdeaa0 100644 --- a/arch/arm/mach-imx/devices-imx25.h +++ b/arch/arm/mach-imx/devices-imx25.h @@ -13,10 +13,10 @@ extern const struct imx_fec_data imx25_fec_data; imx_add_fec(&imx25_fec_data, pdata) extern const struct imx_flexcan_data imx25_flexcan_data[]; -#define imx25_add_flexcan(id, pdata) \ - imx_add_flexcan(&imx25_flexcan_data[id], pdata) -#define imx25_add_flexcan0(pdata) imx25_add_flexcan(0, pdata) -#define imx25_add_flexcan1(pdata) imx25_add_flexcan(1, pdata) +#define imx25_add_flexcan(id) \ + imx_add_flexcan(&imx25_flexcan_data[id]) +#define imx25_add_flexcan0() imx25_add_flexcan(0) +#define imx25_add_flexcan1() imx25_add_flexcan(1) extern const struct imx_fsl_usb2_udc_data imx25_fsl_usb2_udc_data; #define imx25_add_fsl_usb2_udc(pdata) \ diff --git a/arch/arm/mach-imx/devices-imx35.h b/arch/arm/mach-imx/devices-imx35.h index e2675f1b141c..780d8240281b 100644 --- a/arch/arm/mach-imx/devices-imx35.h +++ b/arch/arm/mach-imx/devices-imx35.h @@ -17,10 +17,10 @@ extern const struct imx_fsl_usb2_udc_data imx35_fsl_usb2_udc_data; imx_add_fsl_usb2_udc(&imx35_fsl_usb2_udc_data, pdata) extern const struct imx_flexcan_data imx35_flexcan_data[]; -#define imx35_add_flexcan(id, pdata) \ - imx_add_flexcan(&imx35_flexcan_data[id], pdata) -#define imx35_add_flexcan0(pdata) imx35_add_flexcan(0, pdata) -#define imx35_add_flexcan1(pdata) imx35_add_flexcan(1, pdata) +#define imx35_add_flexcan(id) \ + imx_add_flexcan(&imx35_flexcan_data[id]) +#define imx35_add_flexcan0() imx35_add_flexcan(0) +#define imx35_add_flexcan1() imx35_add_flexcan(1) extern const struct imx_imx2_wdt_data imx35_imx2_wdt_data; #define imx35_add_imx2_wdt() \ diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig index 3dd2b1b041d1..68c74fb0373c 100644 --- a/arch/arm/mach-imx/devices/Kconfig +++ b/arch/arm/mach-imx/devices/Kconfig @@ -4,7 +4,6 @@ config IMX_HAVE_PLATFORM_FEC config IMX_HAVE_PLATFORM_FLEXCAN bool - select HAVE_CAN_FLEXCAN if CAN config IMX_HAVE_PLATFORM_FSL_USB2_UDC bool diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 453e20bc2657..c13b76b9f6b3 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -50,7 +50,6 @@ struct platform_device *__init imx_add_fec( const struct imx_fec_data *data, const struct fec_platform_data *pdata); -#include <linux/can/platform/flexcan.h> struct imx_flexcan_data { int id; resource_size_t iobase; @@ -58,8 +57,7 @@ struct imx_flexcan_data { resource_size_t irq; }; struct platform_device *__init imx_add_flexcan( - const struct imx_flexcan_data *data, - const struct flexcan_platform_data *pdata); + const struct imx_flexcan_data *data); #include <linux/fsl_devices.h> struct imx_fsl_usb2_udc_data { diff --git a/arch/arm/mach-imx/devices/platform-flexcan.c b/arch/arm/mach-imx/devices/platform-flexcan.c index 1078bf0a94ef..55d61eaf63c6 100644 --- a/arch/arm/mach-imx/devices/platform-flexcan.c +++ b/arch/arm/mach-imx/devices/platform-flexcan.c @@ -38,8 +38,7 @@ const struct imx_flexcan_data imx35_flexcan_data[] __initconst = { #endif /* ifdef CONFIG_SOC_IMX35 */ struct platform_device *__init imx_add_flexcan( - const struct imx_flexcan_data *data, - const struct flexcan_platform_data *pdata) + const struct imx_flexcan_data *data) { struct resource res[] = { { @@ -54,5 +53,5 @@ struct platform_device *__init imx_add_flexcan( }; return imx_add_platform_device("flexcan", data->id, - res, ARRAY_SIZE(res), pdata, sizeof(*pdata)); + res, ARRAY_SIZE(res), NULL, 0); } diff --git a/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c b/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c index e2b70f4c1a2c..e77cc3af6db2 100644 --- a/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c @@ -279,7 +279,7 @@ void __init eukrea_mbimxsd25_baseboard_init(void) imx25_add_imx_fb(&eukrea_mximxsd_fb_pdata); imx25_add_imx_ssi(0, &eukrea_mbimxsd_ssi_pdata); - imx25_add_flexcan1(NULL); + imx25_add_flexcan1(); imx25_add_sdhci_esdhc_imx(0, &sd1_pdata); gpio_request(GPIO_LED1, "LED1"); diff --git a/arch/arm/mach-imx/eukrea_mbimxsd35-baseboard.c b/arch/arm/mach-imx/eukrea_mbimxsd35-baseboard.c index 5a2d5ef12dd5..14d6c8249b76 100644 --- a/arch/arm/mach-imx/eukrea_mbimxsd35-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimxsd35-baseboard.c @@ -287,7 +287,7 @@ void __init eukrea_mbimxsd35_baseboard_init(void) imx35_add_imx_ssi(0, &eukrea_mbimxsd_ssi_pdata); - imx35_add_flexcan1(NULL); + imx35_add_flexcan1(); imx35_add_sdhci_esdhc_imx(0, &sd1_pdata); gpio_request(GPIO_LED1, "LED1"); diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index f5965220a4d8..7be13f8e69a0 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -27,6 +27,7 @@ #include <linux/of_platform.h> #include <linux/opp.h> #include <linux/phy.h> +#include <linux/reboot.h> #include <linux/regmap.h> #include <linux/micrel_phy.h> #include <linux/mfd/syscon.h> @@ -67,7 +68,7 @@ static void __init imx6q_init_revision(void) mxc_set_cpu_type(rev >> 16 & 0xff); } -static void imx6q_restart(char mode, const char *cmd) +static void imx6q_restart(enum reboot_mode mode, const char *cmd) { struct device_node *np; void __iomem *wdog_base; diff --git a/arch/arm/mach-imx/mach-mx25_3ds.c b/arch/arm/mach-imx/mach-mx25_3ds.c index 8bcda688a006..13490c203050 100644 --- a/arch/arm/mach-imx/mach-mx25_3ds.c +++ b/arch/arm/mach-imx/mach-mx25_3ds.c @@ -249,7 +249,7 @@ static void __init mx25pdk_init(void) imx25_add_imx_i2c0(&mx25_3ds_i2c0_data); gpio_request_one(MX25PDK_CAN_PWDN, GPIOF_OUT_INIT_LOW, "can-pwdn"); - imx25_add_flexcan0(NULL); + imx25_add_flexcan0(); } static void __init mx25pdk_timer_init(void) diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c index dae4cd7be040..6f424eced181 100644 --- a/arch/arm/mach-imx/mach-mx31moboard.c +++ b/arch/arm/mach-imx/mach-mx31moboard.c @@ -268,10 +268,11 @@ static struct mc13xxx_led_platform_data moboard_led[] = { static struct mc13xxx_leds_platform_data moboard_leds = { .num_leds = ARRAY_SIZE(moboard_led), .led = moboard_led, - .flags = MC13783_LED_SLEWLIMTC, - .abmode = MC13783_LED_AB_DISABLED, - .tc1_period = MC13783_LED_PERIOD_10MS, - .tc2_period = MC13783_LED_PERIOD_10MS, + .led_control[0] = MC13783_LED_C0_ENABLE | MC13783_LED_C0_ABMODE(0), + .led_control[1] = MC13783_LED_C1_SLEWLIM, + .led_control[2] = MC13783_LED_C2_SLEWLIM, + .led_control[3] = MC13783_LED_C3_PERIOD(0), + .led_control[4] = MC13783_LED_C3_PERIOD(0), }; static struct mc13xxx_buttons_platform_data moboard_buttons = { diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c index 8ed533f0f8ca..b726cb1c5fdd 100644 --- a/arch/arm/mach-imx/mach-pcm043.c +++ b/arch/arm/mach-imx/mach-pcm043.c @@ -385,7 +385,7 @@ static void __init pcm043_init(void) if (!otg_mode_host) imx35_add_fsl_usb2_udc(&otg_device_pdata); - imx35_add_flexcan1(NULL); + imx35_add_flexcan1(); imx35_add_sdhci_esdhc_imx(0, &sd1_pdata); } diff --git a/arch/arm/mach-imx/mm-imx3.c b/arch/arm/mach-imx/mm-imx3.c index 8f0f60697f55..0884ca90d15a 100644 --- a/arch/arm/mach-imx/mm-imx3.c +++ b/arch/arm/mach-imx/mm-imx3.c @@ -65,7 +65,7 @@ static void imx3_idle(void) : "=r" (reg)); } -static void __iomem *imx3_ioremap_caller(unsigned long phys_addr, size_t size, +static void __iomem *imx3_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { if (mtype == MT_DEVICE) { diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c index 7cdc79a9657c..6fe81bb4d3c9 100644 --- a/arch/arm/mach-imx/system.c +++ b/arch/arm/mach-imx/system.c @@ -37,7 +37,7 @@ static struct clk *wdog_clk; /* * Reset the system. It is called by machine_restart(). */ -void mxc_restart(char mode, const char *cmd) +void mxc_restart(enum reboot_mode mode, const char *cmd) { unsigned int wcr_enable; diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index fea91313678b..cd46529e9eaa 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -26,8 +26,8 @@ #include <linux/clockchips.h> #include <linux/clk.h> #include <linux/err.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <asm/mach/time.h> #include "common.h" diff --git a/arch/arm/mach-integrator/common.h b/arch/arm/mach-integrator/common.h index 72516658be1e..ad0ac5547b2c 100644 --- a/arch/arm/mach-integrator/common.h +++ b/arch/arm/mach-integrator/common.h @@ -1,7 +1,8 @@ +#include <linux/reboot.h> #include <linux/amba/serial.h> extern struct amba_pl010_data ap_uart_data; void integrator_init_early(void); int integrator_init(bool is_cp); void integrator_reserve(void); -void integrator_restart(char, const char *); +void integrator_restart(enum reboot_mode, const char *); void integrator_init_sysfs(struct device *parent, u32 id); diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 81461d218717..4cdfd7365925 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -124,7 +124,7 @@ void __init integrator_reserve(void) /* * To reset, we hit the on-board reset register in the system FPGA */ -void integrator_restart(char mode, const char *cmd) +void integrator_restart(enum reboot_mode mode, const char *cmd) { cm_control(CM_CTRL_RESET, CM_CTRL_RESET); } diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index a5b15c4e8def..d9e95e612fcb 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -41,6 +41,7 @@ #include <linux/stat.h> #include <linux/sys_soc.h> #include <linux/termios.h> +#include <linux/sched_clock.h> #include <mach/hardware.h> #include <mach/platform.h> @@ -48,7 +49,6 @@ #include <asm/setup.h> #include <asm/param.h> /* HZ */ #include <asm/mach-types.h> -#include <asm/sched_clock.h> #include <mach/lm.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-iop13xx/include/mach/iop13xx.h b/arch/arm/mach-iop13xx/include/mach/iop13xx.h index 7480f58267aa..17b40279e0a4 100644 --- a/arch/arm/mach-iop13xx/include/mach/iop13xx.h +++ b/arch/arm/mach-iop13xx/include/mach/iop13xx.h @@ -2,6 +2,9 @@ #define _IOP13XX_HW_H_ #ifndef __ASSEMBLY__ + +#include <linux/reboot.h> + /* The ATU offsets can change based on the strapping */ extern u32 iop13xx_atux_pmmr_offset; extern u32 iop13xx_atue_pmmr_offset; @@ -11,7 +14,7 @@ void iop13xx_map_io(void); void iop13xx_platform_init(void); void iop13xx_add_tpmi_devices(void); void iop13xx_init_irq(void); -void iop13xx_restart(char, const char *); +void iop13xx_restart(enum reboot_mode, const char *); /* CPUID CP6 R0 Page 0 */ static inline int iop13xx_cpu_id(void) diff --git a/arch/arm/mach-iop13xx/io.c b/arch/arm/mach-iop13xx/io.c index 183dc8b5511b..faaf7d4482c5 100644 --- a/arch/arm/mach-iop13xx/io.c +++ b/arch/arm/mach-iop13xx/io.c @@ -23,7 +23,7 @@ #include "pci.h" -static void __iomem *__iop13xx_ioremap_caller(unsigned long cookie, +static void __iomem *__iop13xx_ioremap_caller(phys_addr_t cookie, size_t size, unsigned int mtype, void *caller) { void __iomem * retval; diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 3181f61ea63e..96e6c7a6793b 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -469,7 +469,6 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); - dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_1: @@ -479,7 +478,6 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); - dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_2: @@ -489,7 +487,6 @@ void __init iop13xx_platform_init(void) dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); - dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); dma_cap_set(DMA_PQ, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); @@ -597,7 +594,7 @@ __setup("iop13xx_init_adma", iop13xx_init_adma_setup); __setup("iop13xx_init_uart", iop13xx_init_uart_setup); __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup); -void iop13xx_restart(char mode, const char *cmd) +void iop13xx_restart(enum reboot_mode mode, const char *cmd) { /* * Reset the internal bus (warning both cores are reset) diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index ea0984a7449e..069144300b77 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -286,7 +286,7 @@ static void n2100_power_off(void) ; } -static void n2100_restart(char mode, const char *cmd) +static void n2100_restart(enum reboot_mode mode, const char *cmd) { gpio_line_set(N2100_HARDWARE_RESET, GPIO_LOW); gpio_line_config(N2100_HARDWARE_RESET, GPIO_OUT); diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 6600cff6bd92..5327decde5a0 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -30,6 +30,7 @@ #include <linux/export.h> #include <linux/gpio.h> #include <linux/cpu.h> +#include <linux/sched_clock.h> #include <mach/udc.h> #include <mach/hardware.h> @@ -38,7 +39,6 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <asm/system_misc.h> #include <asm/mach/map.h> @@ -531,9 +531,9 @@ static void __init ixp4xx_clockevent_init(void) 0xf, 0xfffffffe); } -void ixp4xx_restart(char mode, const char *cmd) +void ixp4xx_restart(enum reboot_mode mode, const char *cmd) { - if ( 1 && mode == 's') { + if ( 1 && mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); } else { @@ -559,7 +559,7 @@ void ixp4xx_restart(char mode, const char *cmd) * fallback to the default. */ -static void __iomem *ixp4xx_ioremap_caller(unsigned long addr, size_t size, +static void __iomem *ixp4xx_ioremap_caller(phys_addr_t addr, size_t size, unsigned int mtype, void *caller) { if (!is_pci_memory(addr)) diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index 5d413f8c5700..686ef34c69f5 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -27,6 +27,7 @@ #include <linux/i2c.h> #include <linux/i2c-gpio.h> +#include <mach/hardware.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/flash.h> diff --git a/arch/arm/mach-ixp4xx/include/mach/platform.h b/arch/arm/mach-ixp4xx/include/mach/platform.h index db5afb69c123..4c4c6a6f4526 100644 --- a/arch/arm/mach-ixp4xx/include/mach/platform.h +++ b/arch/arm/mach-ixp4xx/include/mach/platform.h @@ -13,6 +13,8 @@ #ifndef __ASSEMBLY__ +#include <linux/reboot.h> + #include <asm/types.h> #ifndef __ARMEB__ @@ -123,7 +125,7 @@ extern void ixp4xx_init_early(void); extern void ixp4xx_init_irq(void); extern void ixp4xx_sys_init(void); extern void ixp4xx_timer_init(void); -extern void ixp4xx_restart(char, const char *); +extern void ixp4xx_restart(enum reboot_mode, const char *); extern void ixp4xx_pci_preinit(void); struct pci_sys_data; extern int ixp4xx_setup(int nr, struct pci_sys_data *sys); diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 7c72c725b711..e9238b5567ee 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -20,6 +20,7 @@ #include <linux/mv643xx_i2c.h> #include <linux/timex.h> #include <linux/kexec.h> +#include <linux/reboot.h> #include <net/dsa.h> #include <asm/page.h> #include <asm/mach/map.h> @@ -722,7 +723,7 @@ void __init kirkwood_init(void) #endif } -void kirkwood_restart(char mode, const char *cmd) +void kirkwood_restart(enum reboot_mode mode, const char *cmd) { /* * Enable soft reset to assert RSTOUTn. diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h index 1c09f3f93fbb..fcf3ba682e24 100644 --- a/arch/arm/mach-kirkwood/common.h +++ b/arch/arm/mach-kirkwood/common.h @@ -11,6 +11,8 @@ #ifndef __ARCH_KIRKWOOD_COMMON_H #define __ARCH_KIRKWOOD_COMMON_H +#include <linux/reboot.h> + struct dsa_platform_data; struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -53,7 +55,7 @@ void kirkwood_audio_init(void); void kirkwood_cpuidle_init(void); void kirkwood_cpufreq_init(void); -void kirkwood_restart(char, const char *); +void kirkwood_restart(enum reboot_mode, const char *); void kirkwood_clk_init(void); /* board init functions for boards not fully converted to fdt */ diff --git a/arch/arm/mach-ks8695/generic.h b/arch/arm/mach-ks8695/generic.h index 6e97ce462d73..43253f8e6de4 100644 --- a/arch/arm/mach-ks8695/generic.h +++ b/arch/arm/mach-ks8695/generic.h @@ -12,5 +12,5 @@ extern __init void ks8695_map_io(void); extern __init void ks8695_init_irq(void); -extern void ks8695_restart(char, const char *); +extern void ks8695_restart(enum reboot_mode, const char *); extern void ks8695_timer_init(void); diff --git a/arch/arm/mach-ks8695/time.c b/arch/arm/mach-ks8695/time.c index c272a3863d5f..426c97662f5b 100644 --- a/arch/arm/mach-ks8695/time.c +++ b/arch/arm/mach-ks8695/time.c @@ -154,11 +154,11 @@ void __init ks8695_timer_init(void) setup_irq(KS8695_IRQ_TIMER1, &ks8695_timer_irq); } -void ks8695_restart(char mode, const char *cmd) +void ks8695_restart(enum reboot_mode reboot_mode, const char *cmd) { unsigned int reg; - if (mode == 's') + if (reboot_mode == REBOOT_SOFT) soft_restart(0); /* disable timer0 */ diff --git a/arch/arm/mach-lpc32xx/common.c b/arch/arm/mach-lpc32xx/common.c index 0d4db8c544b5..d7aa54c25c59 100644 --- a/arch/arm/mach-lpc32xx/common.c +++ b/arch/arm/mach-lpc32xx/common.c @@ -207,11 +207,11 @@ void __init lpc32xx_map_io(void) iotable_init(lpc32xx_io_desc, ARRAY_SIZE(lpc32xx_io_desc)); } -void lpc23xx_restart(char mode, const char *cmd) +void lpc23xx_restart(enum reboot_mode mode, const char *cmd) { switch (mode) { - case 's': - case 'h': + case REBOOT_SOFT: + case REBOOT_HARD: lpc32xx_watchdog_reset(); break; diff --git a/arch/arm/mach-lpc32xx/common.h b/arch/arm/mach-lpc32xx/common.h index e0b26062a272..1cd8853b2f9b 100644 --- a/arch/arm/mach-lpc32xx/common.h +++ b/arch/arm/mach-lpc32xx/common.h @@ -21,6 +21,7 @@ #include <mach/board.h> #include <linux/platform_device.h> +#include <linux/reboot.h> /* * Other arch specific structures and functions @@ -29,7 +30,7 @@ extern void lpc32xx_timer_init(void); extern void __init lpc32xx_init_irq(void); extern void __init lpc32xx_map_io(void); extern void __init lpc32xx_serial_init(void); -extern void lpc23xx_restart(char, const char *); +extern void lpc23xx_restart(enum reboot_mode, const char *); /* diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c index c1cd5a943ab1..e54f87ec2e4a 100644 --- a/arch/arm/mach-lpc32xx/phy3250.c +++ b/arch/arm/mach-lpc32xx/phy3250.c @@ -182,8 +182,8 @@ static void pl08x_put_signal(const struct pl08x_channel_data *cd, int ch) static struct pl08x_platform_data pl08x_pd = { .slave_channels = &pl08x_slave_channels[0], .num_slave_channels = ARRAY_SIZE(pl08x_slave_channels), - .get_signal = pl08x_get_signal, - .put_signal = pl08x_put_signal, + .get_xfer_signal = pl08x_get_signal, + .put_xfer_signal = pl08x_put_signal, .lli_buses = PL08X_AHB1, .mem_buses = PL08X_AHB1, }; diff --git a/arch/arm/mach-mmp/aspenite.c b/arch/arm/mach-mmp/aspenite.c index 5b660ec09ef5..0c002099c3a3 100644 --- a/arch/arm/mach-mmp/aspenite.c +++ b/arch/arm/mach-mmp/aspenite.c @@ -210,7 +210,7 @@ struct pxa168fb_mach_info aspenite_lcd_info = { .invert_pixclock = 0, }; -static unsigned int aspenite_matrix_key_map[] = { +static const unsigned int aspenite_matrix_key_map[] = { KEY(0, 6, KEY_UP), /* SW 4 */ KEY(0, 7, KEY_DOWN), /* SW 5 */ KEY(1, 6, KEY_LEFT), /* SW 6 */ @@ -219,11 +219,15 @@ static unsigned int aspenite_matrix_key_map[] = { KEY(4, 7, KEY_ESC), /* SW 9 */ }; +static struct matrix_keymap_data aspenite_matrix_keymap_data = { + .keymap = aspenite_matrix_key_map, + .keymap_size = ARRAY_SIZE(aspenite_matrix_key_map), +}; + static struct pxa27x_keypad_platform_data aspenite_keypad_info __initdata = { .matrix_key_rows = 5, .matrix_key_cols = 8, - .matrix_key_map = aspenite_matrix_key_map, - .matrix_key_map_size = ARRAY_SIZE(aspenite_matrix_key_map), + .matrix_keymap_data = &aspenite_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-mmp/common.c b/arch/arm/mach-mmp/common.c index 9292b7966e3b..c03b4ab582db 100644 --- a/arch/arm/mach-mmp/common.c +++ b/arch/arm/mach-mmp/common.c @@ -47,7 +47,7 @@ void __init mmp_map_io(void) mmp_chip_id = __raw_readl(MMP_CHIPID); } -void mmp_restart(char mode, const char *cmd) +void mmp_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0); } diff --git a/arch/arm/mach-mmp/common.h b/arch/arm/mach-mmp/common.h index 0bdc50b134ce..991d7e9877de 100644 --- a/arch/arm/mach-mmp/common.h +++ b/arch/arm/mach-mmp/common.h @@ -1,10 +1,11 @@ +#include <linux/reboot.h> #define ARRAY_AND_SIZE(x) (x), ARRAY_SIZE(x) extern void timer_init(int irq); extern void __init icu_init_irq(void); extern void __init mmp_map_io(void); -extern void mmp_restart(char, const char *); +extern void mmp_restart(enum reboot_mode, const char *); extern void __init pxa168_clk_init(void); extern void __init pxa910_clk_init(void); extern void __init mmp2_clk_init(void); diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h index 7ed1df21ea1c..459c2d03eb5c 100644 --- a/arch/arm/mach-mmp/include/mach/pxa168.h +++ b/arch/arm/mach-mmp/include/mach/pxa168.h @@ -1,9 +1,11 @@ #ifndef __ASM_MACH_PXA168_H #define __ASM_MACH_PXA168_H +#include <linux/reboot.h> + extern void pxa168_timer_init(void); extern void __init pxa168_init_irq(void); -extern void pxa168_restart(char, const char *); +extern void pxa168_restart(enum reboot_mode, const char *); extern void pxa168_clear_keypad_wakeup(void); #include <linux/i2c.h> diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c index a30dcf3b7d9e..144e997624c0 100644 --- a/arch/arm/mach-mmp/pxa168.c +++ b/arch/arm/mach-mmp/pxa168.c @@ -172,7 +172,7 @@ int __init pxa168_add_usb_host(struct mv_usb_platform_data *pdata) return platform_device_register(&pxa168_device_usb_host); } -void pxa168_restart(char mode, const char *cmd) +void pxa168_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0xffff0000); } diff --git a/arch/arm/mach-mmp/teton_bga.c b/arch/arm/mach-mmp/teton_bga.c index e4d95b4c6bb2..6aa53fb29d26 100644 --- a/arch/arm/mach-mmp/teton_bga.c +++ b/arch/arm/mach-mmp/teton_bga.c @@ -61,11 +61,15 @@ static unsigned int teton_bga_matrix_key_map[] = { KEY(1, 7, KEY_RIGHT), }; +static struct matrix_keymap_data teton_bga_matrix_keymap_data = { + .keymap = teton_bga_matrix_key_map, + .keymap_size = ARRAY_SIZE(teton_bga_matrix_key_map), +}; + static struct pxa27x_keypad_platform_data teton_bga_keypad_info __initdata = { .matrix_key_rows = 2, .matrix_key_cols = 8, - .matrix_key_map = teton_bga_matrix_key_map, - .matrix_key_map_size = ARRAY_SIZE(teton_bga_matrix_key_map), + .matrix_keymap_data = &teton_bga_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 86a18b3d252e..7ac41e83cfef 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -28,8 +28,8 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <mach/addr-map.h> #include <mach/regs-timers.h> #include <mach/regs-apbc.h> diff --git a/arch/arm/mach-msm/common.h b/arch/arm/mach-msm/common.h index ce8215a269e5..421cf7751a80 100644 --- a/arch/arm/mach-msm/common.h +++ b/arch/arm/mach-msm/common.h @@ -23,7 +23,7 @@ extern void msm_map_msm8x60_io(void); extern void msm_map_msm8960_io(void); extern void msm_map_qsd8x50_io(void); -extern void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size, +extern void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller); extern struct smp_operations msm_smp_ops; diff --git a/arch/arm/mach-msm/io.c b/arch/arm/mach-msm/io.c index efa113e4de86..3dc04ccaf59f 100644 --- a/arch/arm/mach-msm/io.c +++ b/arch/arm/mach-msm/io.c @@ -168,7 +168,7 @@ void __init msm_map_msm7x30_io(void) } #endif /* CONFIG_ARCH_MSM7X30 */ -void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { if (mtype == MT_DEVICE) { diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 284313f3e02c..b6418fd5fe0d 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -23,10 +23,10 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/localtimer.h> -#include <asm/sched_clock.h> #include "common.h" diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 749a7f8c4992..75062eff2494 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -413,7 +413,7 @@ void __init mv78xx0_init(void) clk_init(); } -void mv78xx0_restart(char mode, const char *cmd) +void mv78xx0_restart(enum reboot_mode mode, const char *cmd) { /* * Enable soft reset to assert RSTOUTn. diff --git a/arch/arm/mach-mv78xx0/common.h b/arch/arm/mach-mv78xx0/common.h index 5e9485bad0ac..6889af26077d 100644 --- a/arch/arm/mach-mv78xx0/common.h +++ b/arch/arm/mach-mv78xx0/common.h @@ -11,6 +11,8 @@ #ifndef __ARCH_MV78XX0_COMMON_H #define __ARCH_MV78XX0_COMMON_H +#include <linux/reboot.h> + struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -45,7 +47,7 @@ void mv78xx0_uart1_init(void); void mv78xx0_uart2_init(void); void mv78xx0_uart3_init(void); void mv78xx0_i2c_init(void); -void mv78xx0_restart(char, const char *); +void mv78xx0_restart(enum reboot_mode, const char *); extern void mv78xx0_timer_init(void); diff --git a/arch/arm/mach-mvebu/common.h b/arch/arm/mach-mvebu/common.h index 98defd5e92cd..e366010e1d91 100644 --- a/arch/arm/mach-mvebu/common.h +++ b/arch/arm/mach-mvebu/common.h @@ -17,7 +17,9 @@ #define ARMADA_XP_MAX_CPUS 4 -void mvebu_restart(char mode, const char *cmd); +#include <linux/reboot.h> + +void mvebu_restart(enum reboot_mode mode, const char *cmd); void armada_370_xp_init_irq(void); void armada_370_xp_handle_irq(struct pt_regs *regs); diff --git a/arch/arm/mach-mvebu/system-controller.c b/arch/arm/mach-mvebu/system-controller.c index b8079df8c986..f875124ff4f9 100644 --- a/arch/arm/mach-mvebu/system-controller.c +++ b/arch/arm/mach-mvebu/system-controller.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/of_address.h> #include <linux/io.h> +#include <linux/reboot.h> static void __iomem *system_controller_base; @@ -63,7 +64,7 @@ static struct of_device_id of_system_controller_table[] = { { /* end of list */ }, }; -void mvebu_restart(char mode, const char *cmd) +void mvebu_restart(enum reboot_mode mode, const char *cmd) { if (!system_controller_base) { pr_err("Cannot restart, system-controller not available: check the device tree\n"); diff --git a/arch/arm/mach-mxs/Kconfig b/arch/arm/mach-mxs/Kconfig index 616fe0210da1..8cde9e05b5d6 100644 --- a/arch/arm/mach-mxs/Kconfig +++ b/arch/arm/mach-mxs/Kconfig @@ -10,7 +10,6 @@ config SOC_IMX28 select ARM_AMBA select ARM_CPU_SUSPEND if PM select CPU_ARM926T - select HAVE_CAN_FLEXCAN if CAN select PINCTRL_IMX28 config ARCH_MXS diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 7fa611c1b287..4ce27b536dc9 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -14,12 +14,12 @@ #include <linux/clk/mxs.h> #include <linux/clkdev.h> #include <linux/clocksource.h> -#include <linux/can/platform/flexcan.h> #include <linux/delay.h> #include <linux/err.h> #include <linux/gpio.h> #include <linux/init.h> #include <linux/irqchip/mxs.h> +#include <linux/reboot.h> #include <linux/micrel_phy.h> #include <linux/of_address.h> #include <linux/of_platform.h> @@ -76,41 +76,6 @@ static inline void __mxs_togl(u32 mask, void __iomem *reg) __raw_writel(mask, reg + MXS_TOG_ADDR); } -/* - * MX28EVK_FLEXCAN_SWITCH is shared between both flexcan controllers - */ -#define MX28EVK_FLEXCAN_SWITCH MXS_GPIO_NR(2, 13) - -static int flexcan0_en, flexcan1_en; - -static void mx28evk_flexcan_switch(void) -{ - if (flexcan0_en || flexcan1_en) - gpio_set_value(MX28EVK_FLEXCAN_SWITCH, 1); - else - gpio_set_value(MX28EVK_FLEXCAN_SWITCH, 0); -} - -static void mx28evk_flexcan0_switch(int enable) -{ - flexcan0_en = enable; - mx28evk_flexcan_switch(); -} - -static void mx28evk_flexcan1_switch(int enable) -{ - flexcan1_en = enable; - mx28evk_flexcan_switch(); -} - -static struct flexcan_platform_data flexcan_pdata[2]; - -static struct of_dev_auxdata mxs_auxdata_lookup[] __initdata = { - OF_DEV_AUXDATA("fsl,imx28-flexcan", 0x80032000, NULL, &flexcan_pdata[0]), - OF_DEV_AUXDATA("fsl,imx28-flexcan", 0x80034000, NULL, &flexcan_pdata[1]), - { /* sentinel */ } -}; - #define OCOTP_WORD_OFFSET 0x20 #define OCOTP_WORD_COUNT 0x20 @@ -270,15 +235,6 @@ static void __init imx28_evk_init(void) mxs_saif_clkmux_select(MXS_DIGCTL_SAIF_CLKMUX_EXTMSTR0); } -static void __init imx28_evk_post_init(void) -{ - if (!gpio_request_one(MX28EVK_FLEXCAN_SWITCH, GPIOF_DIR_OUT, - "flexcan-switch")) { - flexcan_pdata[0].transceiver_switch = mx28evk_flexcan0_switch; - flexcan_pdata[1].transceiver_switch = mx28evk_flexcan1_switch; - } -} - static int apx4devkit_phy_fixup(struct phy_device *phy) { phy->dev_flags |= MICREL_PHY_50MHZ_CLK; @@ -484,13 +440,10 @@ static void __init mxs_machine_init(void) crystalfontz_init(); of_platform_populate(NULL, of_default_bus_match_table, - mxs_auxdata_lookup, parent); + NULL, parent); if (of_machine_is_compatible("karo,tx28")) tx28_post_init(); - - if (of_machine_is_compatible("fsl,imx28-evk")) - imx28_evk_post_init(); } #define MX23_CLKCTRL_RESET_OFFSET 0x120 @@ -500,7 +453,7 @@ static void __init mxs_machine_init(void) /* * Reset the system. It is called by machine_restart(). */ -static void mxs_restart(char mode, const char *cmd) +static void mxs_restart(enum reboot_mode mode, const char *cmd) { struct device_node *np; void __iomem *reset_addr; diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index 1504b68f4c66..db25b0cef3a7 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -24,6 +24,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/irqchip/arm-vic.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/mach/map.h> #include <mach/netx-regs.h> @@ -187,7 +188,7 @@ static int __init netx_init(void) subsys_initcall(netx_init); -void netx_restart(char mode, const char *cmd) +void netx_restart(enum reboot_mode mode, const char *cmd) { writel(NETX_SYSTEM_RES_CR_FIRMW_RES_EN | NETX_SYSTEM_RES_CR_FIRMW_RES, NETX_SYSTEM_RES_CR); diff --git a/arch/arm/mach-netx/generic.h b/arch/arm/mach-netx/generic.h index 768b26bbb42b..bb2ce471cc28 100644 --- a/arch/arm/mach-netx/generic.h +++ b/arch/arm/mach-netx/generic.h @@ -17,8 +17,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/reboot.h> + extern void __init netx_map_io(void); extern void __init netx_init_irq(void); -extern void netx_restart(char, const char *); +extern void netx_restart(enum reboot_mode, const char *); extern void netx_timer_init(void); diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c index 2df209ed1a07..13e0df9c11ce 100644 --- a/arch/arm/mach-nomadik/cpu-8815.c +++ b/arch/arm/mach-nomadik/cpu-8815.c @@ -103,7 +103,7 @@ static void __init cpu8815_map_io(void) iotable_init(cpu8815_io_desc, ARRAY_SIZE(cpu8815_io_desc)); } -static void cpu8815_restart(char mode, const char *cmd) +static void cpu8815_restart(enum reboot_mode mode, const char *cmd) { void __iomem *srcbase = ioremap(NOMADIK_SRC_BASE, SZ_4K); diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c index 6c116e1a4b01..4677a9ccb3cb 100644 --- a/arch/arm/mach-omap1/board-voiceblue.c +++ b/arch/arm/mach-omap1/board-voiceblue.c @@ -26,6 +26,7 @@ #include <linux/serial_reg.h> #include <linux/smc91x.h> #include <linux/export.h> +#include <linux/reboot.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -215,7 +216,7 @@ void voiceblue_wdt_ping(void) gpio_set_value(0, wdt_gpio_state); } -static void voiceblue_restart(char mode, const char *cmd) +static void voiceblue_restart(enum reboot_mode mode, const char *cmd) { /* * Workaround for 5912/1611b bug mentioned in sprz209d.pdf p. 28 diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h index 14f7e9920479..abec019a5281 100644 --- a/arch/arm/mach-omap1/common.h +++ b/arch/arm/mach-omap1/common.h @@ -28,6 +28,7 @@ #include <linux/mtd/mtd.h> #include <linux/i2c-omap.h> +#include <linux/reboot.h> #include <plat/i2c.h> @@ -70,7 +71,7 @@ static inline int omap_serial_wakeup_init(void) void omap1_init_early(void); void omap1_init_irq(void); void omap1_init_late(void); -void omap1_restart(char, const char *); +void omap1_restart(enum reboot_mode, const char *); extern void __init omap_check_revision(void); diff --git a/arch/arm/mach-omap1/reset.c b/arch/arm/mach-omap1/reset.c index 5eebd7e889d0..72bf4bf4a702 100644 --- a/arch/arm/mach-omap1/reset.c +++ b/arch/arm/mach-omap1/reset.c @@ -3,6 +3,7 @@ */ #include <linux/kernel.h> #include <linux/io.h> +#include <linux/reboot.h> #include <mach/hardware.h> @@ -22,7 +23,7 @@ #define OMAP_EXTWARM_RST_SRC_ID_SHIFT 5 -void omap1_restart(char mode, const char *cmd) +void omap1_restart(enum reboot_mode mode, const char *cmd) { /* * Workaround for 5912/1611b bug mentioned in sprz209d.pdf p. 28 diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 726ec23d29c7..80603d2fef77 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -43,9 +43,9 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/io.h> +#include <linux/sched_clock.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <mach/hardware.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 1fdb46216590..c7b32a966f67 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -4,6 +4,7 @@ config ARCH_OMAP config ARCH_OMAP2PLUS bool "TI OMAP2/3/4/5 SoCs with device tree support" if (ARCH_MULTI_V6 || ARCH_MULTI_V7) select ARCH_HAS_CPUFREQ + select ARCH_HAS_BANDGAP select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_OMAP select ARCH_REQUIRE_GPIOLIB diff --git a/arch/arm/mach-omap2/am33xx-restart.c b/arch/arm/mach-omap2/am33xx-restart.c index 88e4fa8af031..1eae96212315 100644 --- a/arch/arm/mach-omap2/am33xx-restart.c +++ b/arch/arm/mach-omap2/am33xx-restart.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ #include <linux/kernel.h> +#include <linux/reboot.h> #include "common.h" #include "prm-regbits-33xx.h" @@ -19,7 +20,7 @@ * Resets the SoC. For @cmd, see the 'reboot' syscall in * kernel/sys.c. No return value. */ -void am33xx_restart(char mode, const char *cmd) +void am33xx_restart(enum reboot_mode mode, const char *cmd) { /* TODO: Handle mode and cmd if necessary */ diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 72cab3f4f16d..dfcc182ecff9 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -31,6 +31,7 @@ #include <linux/i2c.h> #include <linux/i2c/twl.h> #include <linux/i2c-omap.h> +#include <linux/reboot.h> #include <asm/proc-fns.h> @@ -119,33 +120,33 @@ static inline void omap_soc_device_init(void) #endif #if defined(CONFIG_SOC_OMAP2420) || defined(CONFIG_SOC_OMAP2430) -void omap2xxx_restart(char mode, const char *cmd); +void omap2xxx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void omap2xxx_restart(char mode, const char *cmd) +static inline void omap2xxx_restart(enum reboot_mode mode, const char *cmd) { } #endif #ifdef CONFIG_SOC_AM33XX -void am33xx_restart(char mode, const char *cmd); +void am33xx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void am33xx_restart(char mode, const char *cmd) +static inline void am33xx_restart(enum reboot_mode mode, const char *cmd) { } #endif #ifdef CONFIG_ARCH_OMAP3 -void omap3xxx_restart(char mode, const char *cmd); +void omap3xxx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void omap3xxx_restart(char mode, const char *cmd) +static inline void omap3xxx_restart(enum reboot_mode mode, const char *cmd) { } #endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) -void omap44xx_restart(char mode, const char *cmd); +void omap44xx_restart(enum reboot_mode mode, const char *cmd); #else -static inline void omap44xx_restart(char mode, const char *cmd) +static inline void omap44xx_restart(enum reboot_mode mode, const char *cmd) { } #endif diff --git a/arch/arm/mach-omap2/omap2-restart.c b/arch/arm/mach-omap2/omap2-restart.c index 719b716a4494..68423e26399d 100644 --- a/arch/arm/mach-omap2/omap2-restart.c +++ b/arch/arm/mach-omap2/omap2-restart.c @@ -31,7 +31,7 @@ static struct clk *reset_virt_prcm_set_ck, *reset_sys_ck; * Set the DPLL to bypass so that reboot completes successfully. No * return value. */ -void omap2xxx_restart(char mode, const char *cmd) +void omap2xxx_restart(enum reboot_mode mode, const char *cmd) { u32 rate; diff --git a/arch/arm/mach-omap2/omap3-restart.c b/arch/arm/mach-omap2/omap3-restart.c index 923c582189e5..5de2a0c2979d 100644 --- a/arch/arm/mach-omap2/omap3-restart.c +++ b/arch/arm/mach-omap2/omap3-restart.c @@ -12,6 +12,7 @@ */ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/reboot.h> #include "iomap.h" #include "common.h" @@ -28,7 +29,7 @@ * Resets the SoC. For @cmd, see the 'reboot' syscall in * kernel/sys.c. No return value. */ -void omap3xxx_restart(char mode, const char *cmd) +void omap3xxx_restart(enum reboot_mode mode, const char *cmd) { omap3_ctrl_write_boot_mode((cmd ? (u8)*cmd : 0)); omap3xxx_prm_dpll3_reset(); /* never returns */ diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 38cd3a69cff3..57911430324e 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/irqchip/arm-gic.h> #include <linux/of_address.h> +#include <linux/reboot.h> #include <asm/hardware/cache-l2x0.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-omap2/omap4-restart.c b/arch/arm/mach-omap2/omap4-restart.c index f90e02e11898..41dfd7da8170 100644 --- a/arch/arm/mach-omap2/omap4-restart.c +++ b/arch/arm/mach-omap2/omap4-restart.c @@ -8,6 +8,7 @@ */ #include <linux/types.h> +#include <linux/reboot.h> #include "prminst44xx.h" /** @@ -18,7 +19,7 @@ * Resets the SoC. For @cmd, see the 'reboot' syscall in * kernel/sys.c. No return value. */ -void omap44xx_restart(char mode, const char *cmd) +void omap44xx_restart(enum reboot_mode mode, const char *cmd) { /* XXX Should save 'cmd' into scratchpad for use after reboot */ omap4_prminst_global_warm_sw_reset(); /* never returns */ diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 68be532f8688..5cc92874be7e 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -588,11 +588,6 @@ static int _od_runtime_suspend(struct device *dev) return ret; } -static int _od_runtime_idle(struct device *dev) -{ - return pm_generic_runtime_idle(dev); -} - static int _od_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -648,7 +643,7 @@ static int _od_resume_noirq(struct device *dev) struct dev_pm_domain omap_device_pm_domain = { .ops = { SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume, - _od_runtime_idle) + NULL) USE_PLATFORM_PM_SLEEP_OPS .suspend_noirq = _od_suspend_noirq, .resume_noirq = _od_resume_noirq, diff --git a/arch/arm/mach-omap2/smartreflex-class3.c b/arch/arm/mach-omap2/smartreflex-class3.c index aee3c8940a30..7a42e1960c3b 100644 --- a/arch/arm/mach-omap2/smartreflex-class3.c +++ b/arch/arm/mach-omap2/smartreflex-class3.c @@ -26,14 +26,14 @@ static int sr_class3_enable(struct omap_sr *sr) } omap_vp_enable(sr->voltdm); - return sr_enable(sr->voltdm, volt); + return sr_enable(sr, volt); } static int sr_class3_disable(struct omap_sr *sr, int is_volt_reset) { - sr_disable_errgen(sr->voltdm); + sr_disable_errgen(sr); omap_vp_disable(sr->voltdm); - sr_disable(sr->voltdm); + sr_disable(sr); if (is_volt_reset) voltdm_reset(sr->voltdm); @@ -42,7 +42,7 @@ static int sr_class3_disable(struct omap_sr *sr, int is_volt_reset) static int sr_class3_configure(struct omap_sr *sr) { - return sr_configure_errgen(sr->voltdm); + return sr_configure_errgen(sr); } /* SR class3 structure */ diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 3bdb0fb02028..29ac667b7a8b 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -41,10 +41,10 @@ #include <linux/of_irq.h> #include <linux/platform_device.h> #include <linux/platform_data/dmtimer-omap.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> #include <asm/smp_twd.h> -#include <asm/sched_clock.h> #include "omap_hwmod.h" #include "omap_device.h" diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index f8a6db9239bf..b41599f98a8e 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -347,7 +347,7 @@ void __init orion5x_init(void) orion5x_wdt_init(); } -void orion5x_restart(char mode, const char *cmd) +void orion5x_restart(enum reboot_mode mode, const char *cmd) { /* * Enable and issue soft reset diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index cdaa01f3d186..a909afb384fb 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -1,6 +1,8 @@ #ifndef __ARCH_ORION5X_COMMON_H #define __ARCH_ORION5X_COMMON_H +#include <linux/reboot.h> + struct dsa_platform_data; struct mv643xx_eth_platform_data; struct mv_sata_platform_data; @@ -29,7 +31,7 @@ void orion5x_spi_init(void); void orion5x_uart0_init(void); void orion5x_uart1_init(void); void orion5x_xor_init(void); -void orion5x_restart(char, const char *); +void orion5x_restart(enum reboot_mode, const char *); /* * PCIe/PCI functions. diff --git a/arch/arm/mach-orion5x/ls-chl-setup.c b/arch/arm/mach-orion5x/ls-chl-setup.c index 24f4e14e5893..6234977b5aea 100644 --- a/arch/arm/mach-orion5x/ls-chl-setup.c +++ b/arch/arm/mach-orion5x/ls-chl-setup.c @@ -139,7 +139,7 @@ static struct mv_sata_platform_data lschl_sata_data = { static void lschl_power_off(void) { - orion5x_restart('h', NULL); + orion5x_restart(REBOOT_HARD, NULL); } /***************************************************************************** diff --git a/arch/arm/mach-orion5x/ls_hgl-setup.c b/arch/arm/mach-orion5x/ls_hgl-setup.c index fc653bb41e78..fe04c4b64569 100644 --- a/arch/arm/mach-orion5x/ls_hgl-setup.c +++ b/arch/arm/mach-orion5x/ls_hgl-setup.c @@ -185,7 +185,7 @@ static struct mv_sata_platform_data ls_hgl_sata_data = { static void ls_hgl_power_off(void) { - orion5x_restart('h', NULL); + orion5x_restart(REBOOT_HARD, NULL); } diff --git a/arch/arm/mach-orion5x/lsmini-setup.c b/arch/arm/mach-orion5x/lsmini-setup.c index 18e66e617dc2..ca4dbe973daf 100644 --- a/arch/arm/mach-orion5x/lsmini-setup.c +++ b/arch/arm/mach-orion5x/lsmini-setup.c @@ -185,7 +185,7 @@ static struct mv_sata_platform_data lsmini_sata_data = { static void lsmini_power_off(void) { - orion5x_restart('h', NULL); + orion5x_restart(REBOOT_HARD, NULL); } diff --git a/arch/arm/mach-picoxcell/common.c b/arch/arm/mach-picoxcell/common.c index b13f51bc35cf..ec79fea82704 100644 --- a/arch/arm/mach-picoxcell/common.c +++ b/arch/arm/mach-picoxcell/common.c @@ -11,6 +11,7 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_platform.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -63,7 +64,7 @@ static const char *picoxcell_dt_match[] = { NULL }; -static void picoxcell_wdt_restart(char mode, const char *cmd) +static void picoxcell_wdt_restart(enum reboot_mode mode, const char *cmd) { /* * Configure the watchdog to reset with the shortest possible timeout diff --git a/arch/arm/mach-prima2/common.h b/arch/arm/mach-prima2/common.h index 81135cd88e54..a6304858474a 100644 --- a/arch/arm/mach-prima2/common.h +++ b/arch/arm/mach-prima2/common.h @@ -10,6 +10,8 @@ #define __MACH_PRIMA2_COMMON_H__ #include <linux/init.h> +#include <linux/reboot.h> + #include <asm/mach/time.h> #include <asm/exception.h> @@ -22,7 +24,7 @@ extern void sirfsoc_cpu_die(unsigned int cpu); extern void __init sirfsoc_of_irq_init(void); extern void __init sirfsoc_of_clk_init(void); -extern void sirfsoc_restart(char, const char *); +extern void sirfsoc_restart(enum reboot_mode, const char *); extern asmlinkage void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs); #ifndef CONFIG_DEBUG_LL diff --git a/arch/arm/mach-prima2/rstc.c b/arch/arm/mach-prima2/rstc.c index d5e0cbc934c0..ccb53391147a 100644 --- a/arch/arm/mach-prima2/rstc.c +++ b/arch/arm/mach-prima2/rstc.c @@ -13,6 +13,7 @@ #include <linux/device.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/reboot.h> void __iomem *sirfsoc_rstc_base; static DEFINE_MUTEX(rstc_lock); @@ -84,7 +85,7 @@ int sirfsoc_reset_device(struct device *dev) #define SIRFSOC_SYS_RST_BIT BIT(31) -void sirfsoc_restart(char mode, const char *cmd) +void sirfsoc_restart(enum reboot_mode mode, const char *cmd) { writel(SIRFSOC_SYS_RST_BIT, sirfsoc_rstc_base); } diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 96100dbf5a2e..a8427115ee07 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -615,12 +615,14 @@ endmenu config PXA25x bool select CPU_XSCALE + select CPU_FREQ_TABLE if CPU_FREQ help Select code specific to PXA21x/25x/26x variants config PXA27x bool select CPU_XSCALE + select CPU_FREQ_TABLE if CPU_FREQ help Select code specific to PXA27x variants @@ -633,6 +635,7 @@ config CPU_PXA26x config PXA3xx bool select CPU_XSC3 + select CPU_FREQ_TABLE if CPU_FREQ help Select code specific to PXA3xx variants diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index a5b8fead7d61..f162f1b77cd2 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -663,16 +663,16 @@ static void corgi_poweroff(void) /* Green LED off tells the bootloader to halt */ gpio_set_value(CORGI_GPIO_LED_GREEN, 0); - pxa_restart('h', NULL); + pxa_restart(REBOOT_HARD, NULL); } -static void corgi_restart(char mode, const char *cmd) +static void corgi_restart(enum reboot_mode mode, const char *cmd) { if (!machine_is_corgi()) /* Green LED on tells the bootloader to reboot */ gpio_set_value(CORGI_GPIO_LED_GREEN, 1); - pxa_restart('h', cmd); + pxa_restart(REBOOT_HARD, cmd); } static void __init corgi_init(void) diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 446563a7d1ad..f6726bb4eb95 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -833,21 +833,25 @@ static inline void em_x270_init_ac97(void) {} #endif #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int em_x270_module_matrix_keys[] = { +static const unsigned int em_x270_module_matrix_keys[] = { KEY(0, 0, KEY_A), KEY(1, 0, KEY_UP), KEY(2, 1, KEY_B), KEY(0, 2, KEY_LEFT), KEY(1, 1, KEY_ENTER), KEY(2, 0, KEY_RIGHT), KEY(0, 1, KEY_C), KEY(1, 2, KEY_DOWN), KEY(2, 2, KEY_D), }; +static struct matrix_keymap_data em_x270_matrix_keymap_data = { + .keymap = em_x270_module_matrix_keys, + .keymap_size = ARRAY_SIZE(em_x270_module_matrix_keys), +}; + struct pxa27x_keypad_platform_data em_x270_module_keypad_info = { /* code map for the matrix keys */ .matrix_key_rows = 3, .matrix_key_cols = 3, - .matrix_key_map = em_x270_module_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(em_x270_module_matrix_keys), + .matrix_keymap_data = &em_x270_matrix_keymap_data, }; -static unsigned int em_x270_exeda_matrix_keys[] = { +static const unsigned int em_x270_exeda_matrix_keys[] = { KEY(0, 0, KEY_RIGHTSHIFT), KEY(0, 1, KEY_RIGHTCTRL), KEY(0, 2, KEY_RIGHTALT), KEY(0, 3, KEY_SPACE), KEY(0, 4, KEY_LEFTALT), KEY(0, 5, KEY_LEFTCTRL), @@ -889,12 +893,16 @@ static unsigned int em_x270_exeda_matrix_keys[] = { KEY(7, 6, 0), KEY(7, 7, 0), }; +static struct matrix_keymap_data em_x270_exeda_matrix_keymap_data = { + .keymap = em_x270_exeda_matrix_keys, + .keymap_size = ARRAY_SIZE(em_x270_exeda_matrix_keys), +}; + struct pxa27x_keypad_platform_data em_x270_exeda_keypad_info = { /* code map for the matrix keys */ .matrix_key_rows = 8, .matrix_key_cols = 8, - .matrix_key_map = em_x270_exeda_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(em_x270_exeda_matrix_keys), + .matrix_keymap_data = &em_x270_exeda_matrix_keymap_data, }; static void __init em_x270_init_keypad(void) diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c index dca10709be8f..fe2eb8394dff 100644 --- a/arch/arm/mach-pxa/ezx.c +++ b/arch/arm/mach-pxa/ezx.c @@ -392,7 +392,7 @@ static unsigned long e6_pin_config[] __initdata = { /* KEYPAD */ #ifdef CONFIG_MACH_EZX_A780 -static unsigned int a780_key_map[] = { +static const unsigned int a780_key_map[] = { KEY(0, 0, KEY_SEND), KEY(0, 1, KEY_BACK), KEY(0, 2, KEY_END), @@ -424,11 +424,15 @@ static unsigned int a780_key_map[] = { KEY(4, 4, KEY_DOWN), }; +static struct matrix_keymap_data a780_matrix_keymap_data = { + .keymap = a780_key_map, + .keymap_size = ARRAY_SIZE(a780_key_map), +}; + static struct pxa27x_keypad_platform_data a780_keypad_platform_data = { .matrix_key_rows = 5, .matrix_key_cols = 5, - .matrix_key_map = a780_key_map, - .matrix_key_map_size = ARRAY_SIZE(a780_key_map), + .matrix_keymap_data = &a780_matrix_keymap_data, .direct_key_map = { KEY_CAMERA }, .direct_key_num = 1, @@ -438,7 +442,7 @@ static struct pxa27x_keypad_platform_data a780_keypad_platform_data = { #endif /* CONFIG_MACH_EZX_A780 */ #ifdef CONFIG_MACH_EZX_E680 -static unsigned int e680_key_map[] = { +static const unsigned int e680_key_map[] = { KEY(0, 0, KEY_UP), KEY(0, 1, KEY_RIGHT), KEY(0, 2, KEY_RESERVED), @@ -455,11 +459,15 @@ static unsigned int e680_key_map[] = { KEY(2, 3, KEY_KPENTER), }; +static struct matrix_keymap_data e680_matrix_keymap_data = { + .keymap = e680_key_map, + .keymap_size = ARRAY_SIZE(e680_key_map), +}; + static struct pxa27x_keypad_platform_data e680_keypad_platform_data = { .matrix_key_rows = 3, .matrix_key_cols = 4, - .matrix_key_map = e680_key_map, - .matrix_key_map_size = ARRAY_SIZE(e680_key_map), + .matrix_keymap_data = &e680_matrix_keymap_data, .direct_key_map = { KEY_CAMERA, @@ -476,7 +484,7 @@ static struct pxa27x_keypad_platform_data e680_keypad_platform_data = { #endif /* CONFIG_MACH_EZX_E680 */ #ifdef CONFIG_MACH_EZX_A1200 -static unsigned int a1200_key_map[] = { +static const unsigned int a1200_key_map[] = { KEY(0, 0, KEY_RESERVED), KEY(0, 1, KEY_RIGHT), KEY(0, 2, KEY_PAGEDOWN), @@ -513,18 +521,22 @@ static unsigned int a1200_key_map[] = { KEY(4, 5, KEY_RESERVED), }; +static struct matrix_keymap_data a1200_matrix_keymap_data = { + .keymap = a1200_key_map, + .keymap_size = ARRAY_SIZE(a1200_key_map), +}; + static struct pxa27x_keypad_platform_data a1200_keypad_platform_data = { .matrix_key_rows = 5, .matrix_key_cols = 6, - .matrix_key_map = a1200_key_map, - .matrix_key_map_size = ARRAY_SIZE(a1200_key_map), + .matrix_keymap_data = &a1200_matrix_keymap_data, .debounce_interval = 30, }; #endif /* CONFIG_MACH_EZX_A1200 */ #ifdef CONFIG_MACH_EZX_E6 -static unsigned int e6_key_map[] = { +static const unsigned int e6_key_map[] = { KEY(0, 0, KEY_RESERVED), KEY(0, 1, KEY_RIGHT), KEY(0, 2, KEY_PAGEDOWN), @@ -561,18 +573,22 @@ static unsigned int e6_key_map[] = { KEY(4, 5, KEY_PREVIOUSSONG), }; +static struct matrix_keymap_data e6_keymap_data = { + .keymap = e6_key_map, + .keymap_size = ARRAY_SIZE(e6_key_map), +}; + static struct pxa27x_keypad_platform_data e6_keypad_platform_data = { .matrix_key_rows = 5, .matrix_key_cols = 6, - .matrix_key_map = e6_key_map, - .matrix_key_map_size = ARRAY_SIZE(e6_key_map), + .matrix_keymap_data = &e6_keymap_data, .debounce_interval = 30, }; #endif /* CONFIG_MACH_EZX_E6 */ #ifdef CONFIG_MACH_EZX_A910 -static unsigned int a910_key_map[] = { +static const unsigned int a910_key_map[] = { KEY(0, 0, KEY_NUMERIC_6), KEY(0, 1, KEY_RIGHT), KEY(0, 2, KEY_PAGEDOWN), @@ -609,18 +625,22 @@ static unsigned int a910_key_map[] = { KEY(4, 5, KEY_RESERVED), }; +static struct matrix_keymap_data a910_matrix_keymap_data = { + .keymap = a910_key_map, + .keymap_size = ARRAY_SIZE(a910_key_map), +}; + static struct pxa27x_keypad_platform_data a910_keypad_platform_data = { .matrix_key_rows = 5, .matrix_key_cols = 6, - .matrix_key_map = a910_key_map, - .matrix_key_map_size = ARRAY_SIZE(a910_key_map), + .matrix_keymap_data = &a910_matrix_keymap_data, .debounce_interval = 30, }; #endif /* CONFIG_MACH_EZX_A910 */ #ifdef CONFIG_MACH_EZX_E2 -static unsigned int e2_key_map[] = { +static const unsigned int e2_key_map[] = { KEY(0, 0, KEY_NUMERIC_6), KEY(0, 1, KEY_RIGHT), KEY(0, 2, KEY_NUMERIC_9), @@ -657,11 +677,15 @@ static unsigned int e2_key_map[] = { KEY(4, 5, KEY_RESERVED), }; +static struct matrix_keymap_data e2_matrix_keymap_data = { + .keymap = e2_key_map, + .keymap_size = ARRAY_SIZE(e2_key_map), +}; + static struct pxa27x_keypad_platform_data e2_keypad_platform_data = { .matrix_key_rows = 5, .matrix_key_cols = 6, - .matrix_key_map = e2_key_map, - .matrix_key_map_size = ARRAY_SIZE(e2_key_map), + .matrix_keymap_data = &e2_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h index fd7ea39b78c0..8963984d1f43 100644 --- a/arch/arm/mach-pxa/generic.h +++ b/arch/arm/mach-pxa/generic.h @@ -9,6 +9,8 @@ * published by the Free Software Foundation. */ +#include <linux/reboot.h> + struct irq_data; extern void pxa_timer_init(void); @@ -56,4 +58,4 @@ void __init pxa_set_btuart_info(void *info); void __init pxa_set_stuart_info(void *info); void __init pxa_set_hwuart_info(void *info); -void pxa_restart(char, const char *); +void pxa_restart(enum reboot_mode, const char *); diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index e848c4607baf..5d665588c7eb 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -222,7 +222,7 @@ static inline void littleton_init_spi(void) {} #endif #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int littleton_matrix_key_map[] = { +static const unsigned int littleton_matrix_key_map[] = { /* KEY(row, col, key_code) */ KEY(1, 3, KEY_0), KEY(0, 0, KEY_1), KEY(1, 0, KEY_2), KEY(2, 0, KEY_3), KEY(0, 1, KEY_4), KEY(1, 1, KEY_5), KEY(2, 1, KEY_6), KEY(0, 2, KEY_7), @@ -249,11 +249,15 @@ static unsigned int littleton_matrix_key_map[] = { KEY(3, 1, KEY_F23), /* soft2 */ }; +static struct matrix_keymap_data littleton_matrix_keymap_data = { + .keymap = littleton_matrix_key_map, + .keymap_size = ARRAY_SIZE(littleton_matrix_key_map), +}; + static struct pxa27x_keypad_platform_data littleton_keypad_info = { .matrix_key_rows = 6, .matrix_key_cols = 5, - .matrix_key_map = littleton_matrix_key_map, - .matrix_key_map_size = ARRAY_SIZE(littleton_matrix_key_map), + .matrix_keymap_data = &littleton_matrix_keymap_data, .enable_rotary0 = 1, .rotary0_up_key = KEY_UP, diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 7a12c1ba90ff..d2c652318376 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -498,7 +498,7 @@ static struct pxaohci_platform_data mainstone_ohci_platform_data = { }; #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int mainstone_matrix_keys[] = { +static const unsigned int mainstone_matrix_keys[] = { KEY(0, 0, KEY_A), KEY(1, 0, KEY_B), KEY(2, 0, KEY_C), KEY(3, 0, KEY_D), KEY(4, 0, KEY_E), KEY(5, 0, KEY_F), KEY(0, 1, KEY_G), KEY(1, 1, KEY_H), KEY(2, 1, KEY_I), @@ -527,11 +527,15 @@ static unsigned int mainstone_matrix_keys[] = { KEY(4, 6, KEY_SELECT), }; +static struct matrix_keymap_data mainstone_matrix_keymap_data = { + .keymap = mainstone_matrix_keys, + .keymap_size = ARRAY_SIZE(mainstone_matrix_keys), +}; + struct pxa27x_keypad_platform_data mainstone_keypad_info = { .matrix_key_rows = 6, .matrix_key_cols = 7, - .matrix_key_map = mainstone_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(mainstone_matrix_keys), + .matrix_keymap_data = &mainstone_matrix_keymap_data, .enable_rotary0 = 1, .rotary0_up_key = KEY_UP, diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c index f8979b943cbf..acc9d3cc0762 100644 --- a/arch/arm/mach-pxa/mioa701.c +++ b/arch/arm/mach-pxa/mioa701.c @@ -37,6 +37,7 @@ #include <linux/wm97xx.h> #include <linux/mtd/physmap.h> #include <linux/usb/gpio_vbus.h> +#include <linux/reboot.h> #include <linux/regulator/max1586.h> #include <linux/slab.h> #include <linux/i2c/pxa-i2c.h> @@ -222,7 +223,7 @@ static struct pxafb_mach_info mioa701_pxafb_info = { /* * Keyboard configuration */ -static unsigned int mioa701_matrix_keys[] = { +static const unsigned int mioa701_matrix_keys[] = { KEY(0, 0, KEY_UP), KEY(0, 1, KEY_RIGHT), KEY(0, 2, KEY_MEDIA), @@ -233,11 +234,16 @@ static unsigned int mioa701_matrix_keys[] = { KEY(2, 1, KEY_PHONE), /* Phone Green key */ KEY(2, 2, KEY_CAMERA) /* Camera key */ }; + +static struct matrix_keymap_data mioa701_matrix_keymap_data = { + .keymap = mioa701_matrix_keys, + .keymap_size = ARRAY_SIZE(mioa701_matrix_keys), +}; + static struct pxa27x_keypad_platform_data mioa701_keypad_info = { .matrix_key_rows = 3, .matrix_key_cols = 3, - .matrix_key_map = mioa701_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(mioa701_matrix_keys), + .matrix_keymap_data = &mioa701_matrix_keymap_data, }; /* @@ -691,13 +697,13 @@ static void mioa701_machine_exit(void); static void mioa701_poweroff(void) { mioa701_machine_exit(); - pxa_restart('s', NULL); + pxa_restart(REBOOT_SOFT, NULL); } -static void mioa701_restart(char c, const char *cmd) +static void mioa701_restart(enum reboot_mode c, const char *cmd) { mioa701_machine_exit(); - pxa_restart('s', cmd); + pxa_restart(REBOOT_SOFT, cmd); } static struct gpio global_gpios[] = { @@ -756,7 +762,6 @@ static void mioa701_machine_exit(void) MACHINE_START(MIOA701, "MIO A701") .atag_offset = 0x100, - .restart_mode = 's', .map_io = &pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = &pxa27x_init_irq, diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c index 909b713e5789..cf210b11ffcc 100644 --- a/arch/arm/mach-pxa/palmld.c +++ b/arch/arm/mach-pxa/palmld.c @@ -173,7 +173,7 @@ static inline void palmld_nor_init(void) {} * GPIO keyboard ******************************************************************************/ #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int palmld_matrix_keys[] = { +static const unsigned int palmld_matrix_keys[] = { KEY(0, 1, KEY_F2), KEY(0, 2, KEY_UP), @@ -190,11 +190,15 @@ static unsigned int palmld_matrix_keys[] = { KEY(3, 2, KEY_LEFT), }; +static struct matrix_keymap_data palmld_matrix_keymap_data = { + .keymap = palmld_matrix_keys, + .keymap_size = ARRAY_SIZE(palmld_matrix_keys), +}; + static struct pxa27x_keypad_platform_data palmld_keypad_platform_data = { .matrix_key_rows = 4, .matrix_key_cols = 3, - .matrix_key_map = palmld_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(palmld_matrix_keys), + .matrix_keymap_data = &palmld_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c index 5033fd07968f..3ed9b029428b 100644 --- a/arch/arm/mach-pxa/palmt5.c +++ b/arch/arm/mach-pxa/palmt5.c @@ -108,7 +108,7 @@ static unsigned long palmt5_pin_config[] __initdata = { * GPIO keyboard ******************************************************************************/ #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int palmt5_matrix_keys[] = { +static const unsigned int palmt5_matrix_keys[] = { KEY(0, 0, KEY_POWER), KEY(0, 1, KEY_F1), KEY(0, 2, KEY_ENTER), @@ -124,11 +124,15 @@ static unsigned int palmt5_matrix_keys[] = { KEY(3, 2, KEY_LEFT), }; +static struct matrix_keymap_data palmt5_matrix_keymap_data = { + .keymap = palmt5_matrix_keys, + .keymap_size = ARRAY_SIZE(palmt5_matrix_keys), +}; + static struct pxa27x_keypad_platform_data palmt5_keypad_platform_data = { .matrix_key_rows = 4, .matrix_key_cols = 3, - .matrix_key_map = palmt5_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(palmt5_matrix_keys), + .matrix_keymap_data = &palmt5_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c index d82a50b4a803..d8b937c870de 100644 --- a/arch/arm/mach-pxa/palmtreo.c +++ b/arch/arm/mach-pxa/palmtreo.c @@ -168,7 +168,7 @@ static unsigned long centro685_pin_config[] __initdata = { * GPIO keyboard ******************************************************************************/ #if IS_ENABLED(CONFIG_KEYBOARD_PXA27x) -static unsigned int treo680_matrix_keys[] = { +static const unsigned int treo680_matrix_keys[] = { KEY(0, 0, KEY_F8), /* Red/Off/Power */ KEY(0, 1, KEY_LEFT), KEY(0, 2, KEY_LEFTCTRL), /* Alternate */ @@ -227,7 +227,7 @@ static unsigned int treo680_matrix_keys[] = { KEY(7, 5, KEY_I), }; -static unsigned int centro_matrix_keys[] = { +static const unsigned int centro_matrix_keys[] = { KEY(0, 0, KEY_F9), /* Home */ KEY(0, 1, KEY_LEFT), KEY(0, 2, KEY_LEFTCTRL), /* Alternate */ @@ -286,11 +286,20 @@ static unsigned int centro_matrix_keys[] = { KEY(7, 5, KEY_I), }; +static struct matrix_keymap_data treo680_matrix_keymap_data = { + .keymap = treo680_matrix_keys, + .keymap_size = ARRAY_SIZE(treo680_matrix_keys), +}; + +static struct matrix_keymap_data centro_matrix_keymap_data = { + .keymap = centro_matrix_keys, + .keymap_size = ARRAY_SIZE(centro_matrix_keys), +}; + static struct pxa27x_keypad_platform_data treo680_keypad_pdata = { .matrix_key_rows = 8, .matrix_key_cols = 7, - .matrix_key_map = treo680_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(treo680_matrix_keys), + .matrix_keymap_data = &treo680_matrix_keymap_data, .direct_key_map = { KEY_CONNECT }, .direct_key_num = 1, @@ -301,10 +310,8 @@ static void __init palmtreo_kpc_init(void) { static struct pxa27x_keypad_platform_data *data = &treo680_keypad_pdata; - if (machine_is_centro()) { - data->matrix_key_map = centro_matrix_keys; - data->matrix_key_map_size = ARRAY_SIZE(centro_matrix_keys); - } + if (machine_is_centro()) + data->matrix_keymap_data = ¢ro_matrix_keymap_data; pxa_set_keypad_info(&treo680_keypad_pdata); } diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index 627c93a7364c..83f830dd8ad8 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -176,7 +176,7 @@ static inline void palmtx_nor_init(void) {} * GPIO keyboard ******************************************************************************/ #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int palmtx_matrix_keys[] = { +static const unsigned int palmtx_matrix_keys[] = { KEY(0, 0, KEY_POWER), KEY(0, 1, KEY_F1), KEY(0, 2, KEY_ENTER), @@ -192,11 +192,15 @@ static unsigned int palmtx_matrix_keys[] = { KEY(3, 2, KEY_LEFT), }; +static struct matrix_keymap_data palmtx_matrix_keymap_data = { + .keymap = palmtx_matrix_keys, + .keymap_size = ARRAY_SIZE(palmtx_matrix_keys), +}; + static struct pxa27x_keypad_platform_data palmtx_keypad_platform_data = { .matrix_key_rows = 4, .matrix_key_cols = 3, - .matrix_key_map = palmtx_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(palmtx_matrix_keys), + .matrix_keymap_data = &palmtx_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 18b7fcd98592..1a35ddf218da 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -140,7 +140,7 @@ static unsigned long palmz72_pin_config[] __initdata = { * GPIO keyboard ******************************************************************************/ #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int palmz72_matrix_keys[] = { +static const unsigned int palmz72_matrix_keys[] = { KEY(0, 0, KEY_POWER), KEY(0, 1, KEY_F1), KEY(0, 2, KEY_ENTER), @@ -156,11 +156,15 @@ static unsigned int palmz72_matrix_keys[] = { KEY(3, 2, KEY_LEFT), }; +static struct matrix_keymap_data almz72_matrix_keymap_data = { + .keymap = palmz72_matrix_keys, + .keymap_size = ARRAY_SIZE(palmz72_matrix_keys), +}; + static struct pxa27x_keypad_platform_data palmz72_keypad_platform_data = { .matrix_key_rows = 4, .matrix_key_cols = 3, - .matrix_key_map = palmz72_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(palmz72_matrix_keys), + .matrix_keymap_data = &almz72_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 50ccd5f1d560..711d37e26bd8 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -422,7 +422,7 @@ static struct i2c_board_info __initdata poodle_i2c_devices[] = { static void poodle_poweroff(void) { - pxa_restart('h', NULL); + pxa_restart(REBOOT_HARD, NULL); } static void __init poodle_init(void) diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c index 3fab583755d4..0d5dd646f61f 100644 --- a/arch/arm/mach-pxa/reset.c +++ b/arch/arm/mach-pxa/reset.c @@ -83,7 +83,7 @@ static void do_hw_reset(void) writel_relaxed(readl_relaxed(OSCR) + 368640, OSMR3); } -void pxa_restart(char mode, const char *cmd) +void pxa_restart(enum reboot_mode mode, const char *cmd) { local_irq_disable(); local_fiq_disable(); @@ -91,14 +91,14 @@ void pxa_restart(char mode, const char *cmd) clear_reset_status(RESET_STATUS_ALL); switch (mode) { - case 's': + case REBOOT_SOFT: /* Jump into ROM at address 0 */ soft_restart(0); break; - case 'g': + case REBOOT_GPIO: do_gpio_reset(); break; - case 'h': + case REBOOT_HARD: default: do_hw_reset(); break; diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 362726c49c70..2125df0444e7 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -31,6 +31,7 @@ #include <linux/regulator/machine.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/reboot.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -924,10 +925,10 @@ static inline void spitz_i2c_init(void) {} ******************************************************************************/ static void spitz_poweroff(void) { - pxa_restart('g', NULL); + pxa_restart(REBOOT_GPIO, NULL); } -static void spitz_restart(char mode, const char *cmd) +static void spitz_restart(enum reboot_mode mode, const char *cmd) { uint32_t msc0 = __raw_readl(MSC0); /* Bootloader magic for a reboot */ @@ -979,7 +980,6 @@ static void __init spitz_fixup(struct tag *tags, char **cmdline, #ifdef CONFIG_MACH_SPITZ MACHINE_START(SPITZ, "SHARP Spitz") - .restart_mode = 'g', .fixup = spitz_fixup, .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, @@ -993,7 +993,6 @@ MACHINE_END #ifdef CONFIG_MACH_BORZOI MACHINE_START(BORZOI, "SHARP Borzoi") - .restart_mode = 'g', .fixup = spitz_fixup, .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, @@ -1007,7 +1006,6 @@ MACHINE_END #ifdef CONFIG_MACH_AKITA MACHINE_START(AKITA, "SHARP Akita") - .restart_mode = 'g', .fixup = spitz_fixup, .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, diff --git a/arch/arm/mach-pxa/tavorevb.c b/arch/arm/mach-pxa/tavorevb.c index f55979c09a5f..4680efe55345 100644 --- a/arch/arm/mach-pxa/tavorevb.c +++ b/arch/arm/mach-pxa/tavorevb.c @@ -106,7 +106,7 @@ static struct platform_device smc91x_device = { }; #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int tavorevb_matrix_key_map[] = { +static const unsigned int tavorevb_matrix_key_map[] = { /* KEY(row, col, key_code) */ KEY(0, 4, KEY_A), KEY(0, 5, KEY_B), KEY(0, 6, KEY_C), KEY(1, 4, KEY_E), KEY(1, 5, KEY_F), KEY(1, 6, KEY_G), @@ -147,11 +147,15 @@ static unsigned int tavorevb_matrix_key_map[] = { KEY(3, 3, KEY_F23), /* soft2 */ }; +static struct matrix_keymap_data tavorevb_matrix_keymap_data = { + .keymap = tavorevb_matrix_key_map, + .keymap_size = ARRAY_SIZE(tavorevb_matrix_key_map), +}; + static struct pxa27x_keypad_platform_data tavorevb_keypad_info = { .matrix_key_rows = 7, .matrix_key_cols = 7, - .matrix_key_map = tavorevb_matrix_key_map, - .matrix_key_map_size = ARRAY_SIZE(tavorevb_matrix_key_map), + .matrix_keymap_data = &tavorevb_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 8f1ee92aea30..9aa852a8fab9 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -16,11 +16,11 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/clockchips.h> +#include <linux/sched_clock.h> #include <asm/div64.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <mach/regs-ost.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 3d91d2e5bf3a..0206b915a6f6 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -36,6 +36,7 @@ #include <linux/input/matrix_keypad.h> #include <linux/i2c/pxa-i2c.h> #include <linux/usb/gpio_vbus.h> +#include <linux/reboot.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -911,10 +912,10 @@ static struct platform_device *devices[] __initdata = { static void tosa_poweroff(void) { - pxa_restart('g', NULL); + pxa_restart(REBOOT_GPIO, NULL); } -static void tosa_restart(char mode, const char *cmd) +static void tosa_restart(enum reboot_mode mode, const char *cmd) { uint32_t msc0 = __raw_readl(MSC0); @@ -969,7 +970,6 @@ static void __init fixup_tosa(struct tag *tags, char **cmdline, } MACHINE_START(TOSA, "SHARP Tosa") - .restart_mode = 'g', .fixup = fixup_tosa, .map_io = pxa25x_map_io, .nr_irqs = TOSA_NR_IRQS, diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c index 989903a7e467..2513d8f4931f 100644 --- a/arch/arm/mach-pxa/z2.c +++ b/arch/arm/mach-pxa/z2.c @@ -345,7 +345,7 @@ static inline void z2_leds_init(void) {} * GPIO keyboard ******************************************************************************/ #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int z2_matrix_keys[] = { +static const unsigned int z2_matrix_keys[] = { KEY(0, 0, KEY_OPTION), KEY(1, 0, KEY_UP), KEY(2, 0, KEY_DOWN), @@ -405,11 +405,15 @@ static unsigned int z2_matrix_keys[] = { KEY(5, 7, KEY_DOT), }; +static struct matrix_keymap_data z2_matrix_keymap_data = { + .keymap = z2_matrix_keys, + .keymap_size = ARRAY_SIZE(z2_matrix_keys), +}; + static struct pxa27x_keypad_platform_data z2_keypad_platform_data = { .matrix_key_rows = 7, .matrix_key_cols = 8, - .matrix_key_map = z2_matrix_keys, - .matrix_key_map_size = ARRAY_SIZE(z2_matrix_keys), + .matrix_keymap_data = &z2_matrix_keymap_data, .debounce_interval = 30, }; diff --git a/arch/arm/mach-pxa/zylonite.c b/arch/arm/mach-pxa/zylonite.c index 1f00d650ac27..36cf7cf95ec1 100644 --- a/arch/arm/mach-pxa/zylonite.c +++ b/arch/arm/mach-pxa/zylonite.c @@ -263,7 +263,7 @@ static inline void zylonite_init_mmc(void) {} #endif #if defined(CONFIG_KEYBOARD_PXA27x) || defined(CONFIG_KEYBOARD_PXA27x_MODULE) -static unsigned int zylonite_matrix_key_map[] = { +static const unsigned int zylonite_matrix_key_map[] = { /* KEY(row, col, key_code) */ KEY(0, 0, KEY_A), KEY(0, 1, KEY_B), KEY(0, 2, KEY_C), KEY(0, 5, KEY_D), KEY(1, 0, KEY_E), KEY(1, 1, KEY_F), KEY(1, 2, KEY_G), KEY(1, 5, KEY_H), @@ -306,11 +306,15 @@ static unsigned int zylonite_matrix_key_map[] = { KEY(0, 3, KEY_AUX), /* contact */ }; +static struct matrix_keymap_data zylonite_matrix_keymap_data = { + .keymap = zylonite_matrix_key_map, + .keymap_size = ARRAY_SIZE(zylonite_matrix_key_map), +}; + static struct pxa27x_keypad_platform_data zylonite_keypad_info = { .matrix_key_rows = 8, .matrix_key_cols = 8, - .matrix_key_map = zylonite_matrix_key_map, - .matrix_key_map_size = ARRAY_SIZE(zylonite_matrix_key_map), + .matrix_keymap_data = &zylonite_matrix_keymap_data, .enable_rotary0 = 1, .rotary0_up_key = KEY_UP, diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index 5b1c8bfe6fa9..c85ddb2a0ad0 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -29,6 +29,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -418,7 +419,7 @@ static void __init realview_eb_timer_init(void) realview_eb_twd_init(); } -static void realview_eb_restart(char mode, const char *cmd) +static void realview_eb_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index d5e83a1f6982..c5eade76461b 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -329,7 +330,7 @@ static void __init realview_pb1176_timer_init(void) realview_timer_init(IRQ_DC1176_TIMER0); } -static void realview_pb1176_restart(char mode, const char *cmd) +static void realview_pb1176_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index c3cfe213b5e6..f4b0962578fe 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -29,6 +29,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -316,7 +317,7 @@ static void __init realview_pb11mp_timer_init(void) realview_pb11mp_twd_init(); } -static void realview_pb11mp_restart(char mode, const char *cmd) +static void realview_pb11mp_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index dde652a59620..10a3e1d76891 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -29,6 +29,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/mach-types.h> @@ -264,7 +265,7 @@ static void __init realview_pba8_timer_init(void) realview_timer_init(IRQ_PBA8_TIMER0_1); } -static void realview_pba8_restart(char mode, const char *cmd) +static void realview_pba8_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index 54f0185b01e3..9d75493e3f0c 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -28,6 +28,7 @@ #include <linux/io.h> #include <linux/irqchip/arm-gic.h> #include <linux/platform_data/clk-realview.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/mach-types.h> @@ -344,7 +345,7 @@ static void realview_pbx_fixup(struct tag *tags, char **from, #endif } -static void realview_pbx_restart(char mode, const char *cmd) +static void realview_pbx_restart(enum reboot_mode mode, const char *cmd) { void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL); void __iomem *lock_ctrl = __io_address(REALVIEW_SYS_LOCK); diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index a302cf5e0fc7..09d602b10d57 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -20,6 +20,7 @@ #include <linux/ata_platform.h> #include <linux/io.h> #include <linux/i2c.h> +#include <linux/reboot.h> #include <asm/elf.h> #include <asm/mach-types.h> @@ -201,7 +202,7 @@ static int __init rpc_init(void) arch_initcall(rpc_init); -static void rpc_restart(char mode, const char *cmd) +static void rpc_restart(enum reboot_mode mode, const char *cmd) { iomd_writeb(0, IOMD_ROMCR0); diff --git a/arch/arm/mach-s3c24xx/common.h b/arch/arm/mach-s3c24xx/common.h index 307c3714be55..84b280654f4c 100644 --- a/arch/arm/mach-s3c24xx/common.h +++ b/arch/arm/mach-s3c24xx/common.h @@ -12,6 +12,8 @@ #ifndef __ARCH_ARM_MACH_S3C24XX_COMMON_H #define __ARCH_ARM_MACH_S3C24XX_COMMON_H __FILE__ +#include <linux/reboot.h> + struct s3c2410_uartcfg; #ifdef CONFIG_CPU_S3C2410 @@ -20,7 +22,7 @@ extern int s3c2410a_init(void); extern void s3c2410_map_io(void); extern void s3c2410_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2410_init_clocks(int xtal); -extern void s3c2410_restart(char mode, const char *cmd); +extern void s3c2410_restart(enum reboot_mode mode, const char *cmd); extern void s3c2410_init_irq(void); #else #define s3c2410_init_clocks NULL @@ -36,7 +38,7 @@ extern void s3c2412_map_io(void); extern void s3c2412_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2412_init_clocks(int xtal); extern int s3c2412_baseclk_add(void); -extern void s3c2412_restart(char mode, const char *cmd); +extern void s3c2412_restart(enum reboot_mode mode, const char *cmd); extern void s3c2412_init_irq(void); #else #define s3c2412_init_clocks NULL @@ -51,7 +53,7 @@ extern void s3c2416_map_io(void); extern void s3c2416_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2416_init_clocks(int xtal); extern int s3c2416_baseclk_add(void); -extern void s3c2416_restart(char mode, const char *cmd); +extern void s3c2416_restart(enum reboot_mode mode, const char *cmd); extern void s3c2416_init_irq(void); extern struct syscore_ops s3c2416_irq_syscore_ops; @@ -66,7 +68,7 @@ extern struct syscore_ops s3c2416_irq_syscore_ops; extern void s3c244x_map_io(void); extern void s3c244x_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c244x_init_clocks(int xtal); -extern void s3c244x_restart(char mode, const char *cmd); +extern void s3c244x_restart(enum reboot_mode mode, const char *cmd); #else #define s3c244x_init_clocks NULL #define s3c244x_init_uarts NULL @@ -96,7 +98,7 @@ extern void s3c2443_map_io(void); extern void s3c2443_init_uarts(struct s3c2410_uartcfg *cfg, int no); extern void s3c2443_init_clocks(int xtal); extern int s3c2443_baseclk_add(void); -extern void s3c2443_restart(char mode, const char *cmd); +extern void s3c2443_restart(enum reboot_mode mode, const char *cmd); extern void s3c2443_init_irq(void); #else #define s3c2443_init_clocks NULL diff --git a/arch/arm/mach-s3c24xx/cpufreq-utils.c b/arch/arm/mach-s3c24xx/cpufreq-utils.c index ddd8280e3875..2a0aa5684e72 100644 --- a/arch/arm/mach-s3c24xx/cpufreq-utils.c +++ b/arch/arm/mach-s3c24xx/cpufreq-utils.c @@ -60,5 +60,5 @@ void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg) */ void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg) { - __raw_writel(cfg->pll.index, S3C2410_MPLLCON); + __raw_writel(cfg->pll.driver_data, S3C2410_MPLLCON); } diff --git a/arch/arm/mach-s3c24xx/pll-s3c2410.c b/arch/arm/mach-s3c24xx/pll-s3c2410.c index dcf3420a3271..5e37d368594b 100644 --- a/arch/arm/mach-s3c24xx/pll-s3c2410.c +++ b/arch/arm/mach-s3c24xx/pll-s3c2410.c @@ -33,36 +33,36 @@ #include <plat/cpu-freq-core.h> static struct cpufreq_frequency_table pll_vals_12MHz[] = { - { .frequency = 34000000, .index = PLLVAL(82, 2, 3), }, - { .frequency = 45000000, .index = PLLVAL(82, 1, 3), }, - { .frequency = 51000000, .index = PLLVAL(161, 3, 3), }, - { .frequency = 48000000, .index = PLLVAL(120, 2, 3), }, - { .frequency = 56000000, .index = PLLVAL(142, 2, 3), }, - { .frequency = 68000000, .index = PLLVAL(82, 2, 2), }, - { .frequency = 79000000, .index = PLLVAL(71, 1, 2), }, - { .frequency = 85000000, .index = PLLVAL(105, 2, 2), }, - { .frequency = 90000000, .index = PLLVAL(112, 2, 2), }, - { .frequency = 101000000, .index = PLLVAL(127, 2, 2), }, - { .frequency = 113000000, .index = PLLVAL(105, 1, 2), }, - { .frequency = 118000000, .index = PLLVAL(150, 2, 2), }, - { .frequency = 124000000, .index = PLLVAL(116, 1, 2), }, - { .frequency = 135000000, .index = PLLVAL(82, 2, 1), }, - { .frequency = 147000000, .index = PLLVAL(90, 2, 1), }, - { .frequency = 152000000, .index = PLLVAL(68, 1, 1), }, - { .frequency = 158000000, .index = PLLVAL(71, 1, 1), }, - { .frequency = 170000000, .index = PLLVAL(77, 1, 1), }, - { .frequency = 180000000, .index = PLLVAL(82, 1, 1), }, - { .frequency = 186000000, .index = PLLVAL(85, 1, 1), }, - { .frequency = 192000000, .index = PLLVAL(88, 1, 1), }, - { .frequency = 203000000, .index = PLLVAL(161, 3, 1), }, + { .frequency = 34000000, .driver_data = PLLVAL(82, 2, 3), }, + { .frequency = 45000000, .driver_data = PLLVAL(82, 1, 3), }, + { .frequency = 51000000, .driver_data = PLLVAL(161, 3, 3), }, + { .frequency = 48000000, .driver_data = PLLVAL(120, 2, 3), }, + { .frequency = 56000000, .driver_data = PLLVAL(142, 2, 3), }, + { .frequency = 68000000, .driver_data = PLLVAL(82, 2, 2), }, + { .frequency = 79000000, .driver_data = PLLVAL(71, 1, 2), }, + { .frequency = 85000000, .driver_data = PLLVAL(105, 2, 2), }, + { .frequency = 90000000, .driver_data = PLLVAL(112, 2, 2), }, + { .frequency = 101000000, .driver_data = PLLVAL(127, 2, 2), }, + { .frequency = 113000000, .driver_data = PLLVAL(105, 1, 2), }, + { .frequency = 118000000, .driver_data = PLLVAL(150, 2, 2), }, + { .frequency = 124000000, .driver_data = PLLVAL(116, 1, 2), }, + { .frequency = 135000000, .driver_data = PLLVAL(82, 2, 1), }, + { .frequency = 147000000, .driver_data = PLLVAL(90, 2, 1), }, + { .frequency = 152000000, .driver_data = PLLVAL(68, 1, 1), }, + { .frequency = 158000000, .driver_data = PLLVAL(71, 1, 1), }, + { .frequency = 170000000, .driver_data = PLLVAL(77, 1, 1), }, + { .frequency = 180000000, .driver_data = PLLVAL(82, 1, 1), }, + { .frequency = 186000000, .driver_data = PLLVAL(85, 1, 1), }, + { .frequency = 192000000, .driver_data = PLLVAL(88, 1, 1), }, + { .frequency = 203000000, .driver_data = PLLVAL(161, 3, 1), }, /* 2410A extras */ - { .frequency = 210000000, .index = PLLVAL(132, 2, 1), }, - { .frequency = 226000000, .index = PLLVAL(105, 1, 1), }, - { .frequency = 266000000, .index = PLLVAL(125, 1, 1), }, - { .frequency = 268000000, .index = PLLVAL(126, 1, 1), }, - { .frequency = 270000000, .index = PLLVAL(127, 1, 1), }, + { .frequency = 210000000, .driver_data = PLLVAL(132, 2, 1), }, + { .frequency = 226000000, .driver_data = PLLVAL(105, 1, 1), }, + { .frequency = 266000000, .driver_data = PLLVAL(125, 1, 1), }, + { .frequency = 268000000, .driver_data = PLLVAL(126, 1, 1), }, + { .frequency = 270000000, .driver_data = PLLVAL(127, 1, 1), }, }; static int s3c2410_plls_add(struct device *dev, struct subsys_interface *sif) diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c index 673781758319..a19460e6e7b0 100644 --- a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c +++ b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c @@ -21,33 +21,33 @@ #include <plat/cpu-freq-core.h> static struct cpufreq_frequency_table s3c2440_plls_12[] __initdata = { - { .frequency = 75000000, .index = PLLVAL(0x75, 3, 3), }, /* FVco 600.000000 */ - { .frequency = 80000000, .index = PLLVAL(0x98, 4, 3), }, /* FVco 640.000000 */ - { .frequency = 90000000, .index = PLLVAL(0x70, 2, 3), }, /* FVco 720.000000 */ - { .frequency = 100000000, .index = PLLVAL(0x5c, 1, 3), }, /* FVco 800.000000 */ - { .frequency = 110000000, .index = PLLVAL(0x66, 1, 3), }, /* FVco 880.000000 */ - { .frequency = 120000000, .index = PLLVAL(0x70, 1, 3), }, /* FVco 960.000000 */ - { .frequency = 150000000, .index = PLLVAL(0x75, 3, 2), }, /* FVco 600.000000 */ - { .frequency = 160000000, .index = PLLVAL(0x98, 4, 2), }, /* FVco 640.000000 */ - { .frequency = 170000000, .index = PLLVAL(0x4d, 1, 2), }, /* FVco 680.000000 */ - { .frequency = 180000000, .index = PLLVAL(0x70, 2, 2), }, /* FVco 720.000000 */ - { .frequency = 190000000, .index = PLLVAL(0x57, 1, 2), }, /* FVco 760.000000 */ - { .frequency = 200000000, .index = PLLVAL(0x5c, 1, 2), }, /* FVco 800.000000 */ - { .frequency = 210000000, .index = PLLVAL(0x84, 2, 2), }, /* FVco 840.000000 */ - { .frequency = 220000000, .index = PLLVAL(0x66, 1, 2), }, /* FVco 880.000000 */ - { .frequency = 230000000, .index = PLLVAL(0x6b, 1, 2), }, /* FVco 920.000000 */ - { .frequency = 240000000, .index = PLLVAL(0x70, 1, 2), }, /* FVco 960.000000 */ - { .frequency = 300000000, .index = PLLVAL(0x75, 3, 1), }, /* FVco 600.000000 */ - { .frequency = 310000000, .index = PLLVAL(0x93, 4, 1), }, /* FVco 620.000000 */ - { .frequency = 320000000, .index = PLLVAL(0x98, 4, 1), }, /* FVco 640.000000 */ - { .frequency = 330000000, .index = PLLVAL(0x66, 2, 1), }, /* FVco 660.000000 */ - { .frequency = 340000000, .index = PLLVAL(0x4d, 1, 1), }, /* FVco 680.000000 */ - { .frequency = 350000000, .index = PLLVAL(0xa7, 4, 1), }, /* FVco 700.000000 */ - { .frequency = 360000000, .index = PLLVAL(0x70, 2, 1), }, /* FVco 720.000000 */ - { .frequency = 370000000, .index = PLLVAL(0xb1, 4, 1), }, /* FVco 740.000000 */ - { .frequency = 380000000, .index = PLLVAL(0x57, 1, 1), }, /* FVco 760.000000 */ - { .frequency = 390000000, .index = PLLVAL(0x7a, 2, 1), }, /* FVco 780.000000 */ - { .frequency = 400000000, .index = PLLVAL(0x5c, 1, 1), }, /* FVco 800.000000 */ + { .frequency = 75000000, .driver_data = PLLVAL(0x75, 3, 3), }, /* FVco 600.000000 */ + { .frequency = 80000000, .driver_data = PLLVAL(0x98, 4, 3), }, /* FVco 640.000000 */ + { .frequency = 90000000, .driver_data = PLLVAL(0x70, 2, 3), }, /* FVco 720.000000 */ + { .frequency = 100000000, .driver_data = PLLVAL(0x5c, 1, 3), }, /* FVco 800.000000 */ + { .frequency = 110000000, .driver_data = PLLVAL(0x66, 1, 3), }, /* FVco 880.000000 */ + { .frequency = 120000000, .driver_data = PLLVAL(0x70, 1, 3), }, /* FVco 960.000000 */ + { .frequency = 150000000, .driver_data = PLLVAL(0x75, 3, 2), }, /* FVco 600.000000 */ + { .frequency = 160000000, .driver_data = PLLVAL(0x98, 4, 2), }, /* FVco 640.000000 */ + { .frequency = 170000000, .driver_data = PLLVAL(0x4d, 1, 2), }, /* FVco 680.000000 */ + { .frequency = 180000000, .driver_data = PLLVAL(0x70, 2, 2), }, /* FVco 720.000000 */ + { .frequency = 190000000, .driver_data = PLLVAL(0x57, 1, 2), }, /* FVco 760.000000 */ + { .frequency = 200000000, .driver_data = PLLVAL(0x5c, 1, 2), }, /* FVco 800.000000 */ + { .frequency = 210000000, .driver_data = PLLVAL(0x84, 2, 2), }, /* FVco 840.000000 */ + { .frequency = 220000000, .driver_data = PLLVAL(0x66, 1, 2), }, /* FVco 880.000000 */ + { .frequency = 230000000, .driver_data = PLLVAL(0x6b, 1, 2), }, /* FVco 920.000000 */ + { .frequency = 240000000, .driver_data = PLLVAL(0x70, 1, 2), }, /* FVco 960.000000 */ + { .frequency = 300000000, .driver_data = PLLVAL(0x75, 3, 1), }, /* FVco 600.000000 */ + { .frequency = 310000000, .driver_data = PLLVAL(0x93, 4, 1), }, /* FVco 620.000000 */ + { .frequency = 320000000, .driver_data = PLLVAL(0x98, 4, 1), }, /* FVco 640.000000 */ + { .frequency = 330000000, .driver_data = PLLVAL(0x66, 2, 1), }, /* FVco 660.000000 */ + { .frequency = 340000000, .driver_data = PLLVAL(0x4d, 1, 1), }, /* FVco 680.000000 */ + { .frequency = 350000000, .driver_data = PLLVAL(0xa7, 4, 1), }, /* FVco 700.000000 */ + { .frequency = 360000000, .driver_data = PLLVAL(0x70, 2, 1), }, /* FVco 720.000000 */ + { .frequency = 370000000, .driver_data = PLLVAL(0xb1, 4, 1), }, /* FVco 740.000000 */ + { .frequency = 380000000, .driver_data = PLLVAL(0x57, 1, 1), }, /* FVco 760.000000 */ + { .frequency = 390000000, .driver_data = PLLVAL(0x7a, 2, 1), }, /* FVco 780.000000 */ + { .frequency = 400000000, .driver_data = PLLVAL(0x5c, 1, 1), }, /* FVco 800.000000 */ }; static int s3c2440_plls12_add(struct device *dev, struct subsys_interface *sif) diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c index debfa106289b..1191b2905625 100644 --- a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c +++ b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c @@ -21,61 +21,61 @@ #include <plat/cpu-freq-core.h> static struct cpufreq_frequency_table s3c2440_plls_169344[] __initdata = { - { .frequency = 78019200, .index = PLLVAL(121, 5, 3), }, /* FVco 624.153600 */ - { .frequency = 84067200, .index = PLLVAL(131, 5, 3), }, /* FVco 672.537600 */ - { .frequency = 90115200, .index = PLLVAL(141, 5, 3), }, /* FVco 720.921600 */ - { .frequency = 96163200, .index = PLLVAL(151, 5, 3), }, /* FVco 769.305600 */ - { .frequency = 102135600, .index = PLLVAL(185, 6, 3), }, /* FVco 817.084800 */ - { .frequency = 108259200, .index = PLLVAL(171, 5, 3), }, /* FVco 866.073600 */ - { .frequency = 114307200, .index = PLLVAL(127, 3, 3), }, /* FVco 914.457600 */ - { .frequency = 120234240, .index = PLLVAL(134, 3, 3), }, /* FVco 961.873920 */ - { .frequency = 126161280, .index = PLLVAL(141, 3, 3), }, /* FVco 1009.290240 */ - { .frequency = 132088320, .index = PLLVAL(148, 3, 3), }, /* FVco 1056.706560 */ - { .frequency = 138015360, .index = PLLVAL(155, 3, 3), }, /* FVco 1104.122880 */ - { .frequency = 144789120, .index = PLLVAL(163, 3, 3), }, /* FVco 1158.312960 */ - { .frequency = 150100363, .index = PLLVAL(187, 9, 2), }, /* FVco 600.401454 */ - { .frequency = 156038400, .index = PLLVAL(121, 5, 2), }, /* FVco 624.153600 */ - { .frequency = 162086400, .index = PLLVAL(126, 5, 2), }, /* FVco 648.345600 */ - { .frequency = 168134400, .index = PLLVAL(131, 5, 2), }, /* FVco 672.537600 */ - { .frequency = 174048000, .index = PLLVAL(177, 7, 2), }, /* FVco 696.192000 */ - { .frequency = 180230400, .index = PLLVAL(141, 5, 2), }, /* FVco 720.921600 */ - { .frequency = 186278400, .index = PLLVAL(124, 4, 2), }, /* FVco 745.113600 */ - { .frequency = 192326400, .index = PLLVAL(151, 5, 2), }, /* FVco 769.305600 */ - { .frequency = 198132480, .index = PLLVAL(109, 3, 2), }, /* FVco 792.529920 */ - { .frequency = 204271200, .index = PLLVAL(185, 6, 2), }, /* FVco 817.084800 */ - { .frequency = 210268800, .index = PLLVAL(141, 4, 2), }, /* FVco 841.075200 */ - { .frequency = 216518400, .index = PLLVAL(171, 5, 2), }, /* FVco 866.073600 */ - { .frequency = 222264000, .index = PLLVAL(97, 2, 2), }, /* FVco 889.056000 */ - { .frequency = 228614400, .index = PLLVAL(127, 3, 2), }, /* FVco 914.457600 */ - { .frequency = 234259200, .index = PLLVAL(158, 4, 2), }, /* FVco 937.036800 */ - { .frequency = 240468480, .index = PLLVAL(134, 3, 2), }, /* FVco 961.873920 */ - { .frequency = 246960000, .index = PLLVAL(167, 4, 2), }, /* FVco 987.840000 */ - { .frequency = 252322560, .index = PLLVAL(141, 3, 2), }, /* FVco 1009.290240 */ - { .frequency = 258249600, .index = PLLVAL(114, 2, 2), }, /* FVco 1032.998400 */ - { .frequency = 264176640, .index = PLLVAL(148, 3, 2), }, /* FVco 1056.706560 */ - { .frequency = 270950400, .index = PLLVAL(120, 2, 2), }, /* FVco 1083.801600 */ - { .frequency = 276030720, .index = PLLVAL(155, 3, 2), }, /* FVco 1104.122880 */ - { .frequency = 282240000, .index = PLLVAL(92, 1, 2), }, /* FVco 1128.960000 */ - { .frequency = 289578240, .index = PLLVAL(163, 3, 2), }, /* FVco 1158.312960 */ - { .frequency = 294235200, .index = PLLVAL(131, 2, 2), }, /* FVco 1176.940800 */ - { .frequency = 300200727, .index = PLLVAL(187, 9, 1), }, /* FVco 600.401454 */ - { .frequency = 306358690, .index = PLLVAL(191, 9, 1), }, /* FVco 612.717380 */ - { .frequency = 312076800, .index = PLLVAL(121, 5, 1), }, /* FVco 624.153600 */ - { .frequency = 318366720, .index = PLLVAL(86, 3, 1), }, /* FVco 636.733440 */ - { .frequency = 324172800, .index = PLLVAL(126, 5, 1), }, /* FVco 648.345600 */ - { .frequency = 330220800, .index = PLLVAL(109, 4, 1), }, /* FVco 660.441600 */ - { .frequency = 336268800, .index = PLLVAL(131, 5, 1), }, /* FVco 672.537600 */ - { .frequency = 342074880, .index = PLLVAL(93, 3, 1), }, /* FVco 684.149760 */ - { .frequency = 348096000, .index = PLLVAL(177, 7, 1), }, /* FVco 696.192000 */ - { .frequency = 355622400, .index = PLLVAL(118, 4, 1), }, /* FVco 711.244800 */ - { .frequency = 360460800, .index = PLLVAL(141, 5, 1), }, /* FVco 720.921600 */ - { .frequency = 366206400, .index = PLLVAL(165, 6, 1), }, /* FVco 732.412800 */ - { .frequency = 372556800, .index = PLLVAL(124, 4, 1), }, /* FVco 745.113600 */ - { .frequency = 378201600, .index = PLLVAL(126, 4, 1), }, /* FVco 756.403200 */ - { .frequency = 384652800, .index = PLLVAL(151, 5, 1), }, /* FVco 769.305600 */ - { .frequency = 391608000, .index = PLLVAL(177, 6, 1), }, /* FVco 783.216000 */ - { .frequency = 396264960, .index = PLLVAL(109, 3, 1), }, /* FVco 792.529920 */ - { .frequency = 402192000, .index = PLLVAL(87, 2, 1), }, /* FVco 804.384000 */ + { .frequency = 78019200, .driver_data = PLLVAL(121, 5, 3), }, /* FVco 624.153600 */ + { .frequency = 84067200, .driver_data = PLLVAL(131, 5, 3), }, /* FVco 672.537600 */ + { .frequency = 90115200, .driver_data = PLLVAL(141, 5, 3), }, /* FVco 720.921600 */ + { .frequency = 96163200, .driver_data = PLLVAL(151, 5, 3), }, /* FVco 769.305600 */ + { .frequency = 102135600, .driver_data = PLLVAL(185, 6, 3), }, /* FVco 817.084800 */ + { .frequency = 108259200, .driver_data = PLLVAL(171, 5, 3), }, /* FVco 866.073600 */ + { .frequency = 114307200, .driver_data = PLLVAL(127, 3, 3), }, /* FVco 914.457600 */ + { .frequency = 120234240, .driver_data = PLLVAL(134, 3, 3), }, /* FVco 961.873920 */ + { .frequency = 126161280, .driver_data = PLLVAL(141, 3, 3), }, /* FVco 1009.290240 */ + { .frequency = 132088320, .driver_data = PLLVAL(148, 3, 3), }, /* FVco 1056.706560 */ + { .frequency = 138015360, .driver_data = PLLVAL(155, 3, 3), }, /* FVco 1104.122880 */ + { .frequency = 144789120, .driver_data = PLLVAL(163, 3, 3), }, /* FVco 1158.312960 */ + { .frequency = 150100363, .driver_data = PLLVAL(187, 9, 2), }, /* FVco 600.401454 */ + { .frequency = 156038400, .driver_data = PLLVAL(121, 5, 2), }, /* FVco 624.153600 */ + { .frequency = 162086400, .driver_data = PLLVAL(126, 5, 2), }, /* FVco 648.345600 */ + { .frequency = 168134400, .driver_data = PLLVAL(131, 5, 2), }, /* FVco 672.537600 */ + { .frequency = 174048000, .driver_data = PLLVAL(177, 7, 2), }, /* FVco 696.192000 */ + { .frequency = 180230400, .driver_data = PLLVAL(141, 5, 2), }, /* FVco 720.921600 */ + { .frequency = 186278400, .driver_data = PLLVAL(124, 4, 2), }, /* FVco 745.113600 */ + { .frequency = 192326400, .driver_data = PLLVAL(151, 5, 2), }, /* FVco 769.305600 */ + { .frequency = 198132480, .driver_data = PLLVAL(109, 3, 2), }, /* FVco 792.529920 */ + { .frequency = 204271200, .driver_data = PLLVAL(185, 6, 2), }, /* FVco 817.084800 */ + { .frequency = 210268800, .driver_data = PLLVAL(141, 4, 2), }, /* FVco 841.075200 */ + { .frequency = 216518400, .driver_data = PLLVAL(171, 5, 2), }, /* FVco 866.073600 */ + { .frequency = 222264000, .driver_data = PLLVAL(97, 2, 2), }, /* FVco 889.056000 */ + { .frequency = 228614400, .driver_data = PLLVAL(127, 3, 2), }, /* FVco 914.457600 */ + { .frequency = 234259200, .driver_data = PLLVAL(158, 4, 2), }, /* FVco 937.036800 */ + { .frequency = 240468480, .driver_data = PLLVAL(134, 3, 2), }, /* FVco 961.873920 */ + { .frequency = 246960000, .driver_data = PLLVAL(167, 4, 2), }, /* FVco 987.840000 */ + { .frequency = 252322560, .driver_data = PLLVAL(141, 3, 2), }, /* FVco 1009.290240 */ + { .frequency = 258249600, .driver_data = PLLVAL(114, 2, 2), }, /* FVco 1032.998400 */ + { .frequency = 264176640, .driver_data = PLLVAL(148, 3, 2), }, /* FVco 1056.706560 */ + { .frequency = 270950400, .driver_data = PLLVAL(120, 2, 2), }, /* FVco 1083.801600 */ + { .frequency = 276030720, .driver_data = PLLVAL(155, 3, 2), }, /* FVco 1104.122880 */ + { .frequency = 282240000, .driver_data = PLLVAL(92, 1, 2), }, /* FVco 1128.960000 */ + { .frequency = 289578240, .driver_data = PLLVAL(163, 3, 2), }, /* FVco 1158.312960 */ + { .frequency = 294235200, .driver_data = PLLVAL(131, 2, 2), }, /* FVco 1176.940800 */ + { .frequency = 300200727, .driver_data = PLLVAL(187, 9, 1), }, /* FVco 600.401454 */ + { .frequency = 306358690, .driver_data = PLLVAL(191, 9, 1), }, /* FVco 612.717380 */ + { .frequency = 312076800, .driver_data = PLLVAL(121, 5, 1), }, /* FVco 624.153600 */ + { .frequency = 318366720, .driver_data = PLLVAL(86, 3, 1), }, /* FVco 636.733440 */ + { .frequency = 324172800, .driver_data = PLLVAL(126, 5, 1), }, /* FVco 648.345600 */ + { .frequency = 330220800, .driver_data = PLLVAL(109, 4, 1), }, /* FVco 660.441600 */ + { .frequency = 336268800, .driver_data = PLLVAL(131, 5, 1), }, /* FVco 672.537600 */ + { .frequency = 342074880, .driver_data = PLLVAL(93, 3, 1), }, /* FVco 684.149760 */ + { .frequency = 348096000, .driver_data = PLLVAL(177, 7, 1), }, /* FVco 696.192000 */ + { .frequency = 355622400, .driver_data = PLLVAL(118, 4, 1), }, /* FVco 711.244800 */ + { .frequency = 360460800, .driver_data = PLLVAL(141, 5, 1), }, /* FVco 720.921600 */ + { .frequency = 366206400, .driver_data = PLLVAL(165, 6, 1), }, /* FVco 732.412800 */ + { .frequency = 372556800, .driver_data = PLLVAL(124, 4, 1), }, /* FVco 745.113600 */ + { .frequency = 378201600, .driver_data = PLLVAL(126, 4, 1), }, /* FVco 756.403200 */ + { .frequency = 384652800, .driver_data = PLLVAL(151, 5, 1), }, /* FVco 769.305600 */ + { .frequency = 391608000, .driver_data = PLLVAL(177, 6, 1), }, /* FVco 783.216000 */ + { .frequency = 396264960, .driver_data = PLLVAL(109, 3, 1), }, /* FVco 792.529920 */ + { .frequency = 402192000, .driver_data = PLLVAL(87, 2, 1), }, /* FVco 804.384000 */ }; static int s3c2440_plls169344_add(struct device *dev, diff --git a/arch/arm/mach-s3c24xx/s3c2410.c b/arch/arm/mach-s3c24xx/s3c2410.c index ff384acc65b2..34676d1d5fec 100644 --- a/arch/arm/mach-s3c24xx/s3c2410.c +++ b/arch/arm/mach-s3c24xx/s3c2410.c @@ -22,6 +22,7 @@ #include <linux/syscore_ops.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/io.h> #include <asm/mach/arch.h> @@ -196,9 +197,9 @@ int __init s3c2410a_init(void) return s3c2410_init(); } -void s3c2410_restart(char mode, const char *cmd) +void s3c2410_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { soft_restart(0); } diff --git a/arch/arm/mach-s3c24xx/s3c2412.c b/arch/arm/mach-s3c24xx/s3c2412.c index 0f864d4c97de..0251650cbf80 100644 --- a/arch/arm/mach-s3c24xx/s3c2412.c +++ b/arch/arm/mach-s3c24xx/s3c2412.c @@ -22,6 +22,7 @@ #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -129,9 +130,9 @@ static void s3c2412_idle(void) cpu_do_idle(); } -void s3c2412_restart(char mode, const char *cmd) +void s3c2412_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); /* errata "Watch-dog/Software Reset Problem" specifies that diff --git a/arch/arm/mach-s3c24xx/s3c2416.c b/arch/arm/mach-s3c24xx/s3c2416.c index b9c5d382dafb..9ef3ccfbe196 100644 --- a/arch/arm/mach-s3c24xx/s3c2416.c +++ b/arch/arm/mach-s3c24xx/s3c2416.c @@ -35,6 +35,7 @@ #include <linux/syscore_ops.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -79,9 +80,9 @@ static struct device s3c2416_dev = { .bus = &s3c2416_subsys, }; -void s3c2416_restart(char mode, const char *cmd) +void s3c2416_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); __raw_writel(S3C2443_SWRST_RESET, S3C2443_SWRST); diff --git a/arch/arm/mach-s3c24xx/s3c2443.c b/arch/arm/mach-s3c24xx/s3c2443.c index 8328cd65bf3d..b6c71918b25c 100644 --- a/arch/arm/mach-s3c24xx/s3c2443.c +++ b/arch/arm/mach-s3c24xx/s3c2443.c @@ -22,6 +22,7 @@ #include <linux/device.h> #include <linux/clk.h> #include <linux/io.h> +#include <linux/reboot.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -59,9 +60,9 @@ static struct device s3c2443_dev = { .bus = &s3c2443_subsys, }; -void s3c2443_restart(char mode, const char *cmd) +void s3c2443_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); __raw_writel(S3C2443_SWRST_RESET, S3C2443_SWRST); diff --git a/arch/arm/mach-s3c24xx/s3c244x.c b/arch/arm/mach-s3c24xx/s3c244x.c index d0423e2544c1..911b555029fc 100644 --- a/arch/arm/mach-s3c24xx/s3c244x.c +++ b/arch/arm/mach-s3c24xx/s3c244x.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/device.h> #include <linux/syscore_ops.h> #include <linux/clk.h> @@ -198,9 +199,9 @@ struct syscore_ops s3c244x_pm_syscore_ops = { .resume = s3c244x_resume, }; -void s3c244x_restart(char mode, const char *cmd) +void s3c244x_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') + if (mode == REBOOT_SOFT) soft_restart(0); samsung_wdt_reset(); diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c index 1aed6f4be1ce..3f62e467b129 100644 --- a/arch/arm/mach-s3c64xx/common.c +++ b/arch/arm/mach-s3c64xx/common.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/io.h> #include <linux/dma-mapping.h> #include <linux/irq.h> @@ -381,9 +382,9 @@ static int __init s3c64xx_init_irq_eint(void) } arch_initcall(s3c64xx_init_irq_eint); -void s3c64xx_restart(char mode, const char *cmd) +void s3c64xx_restart(enum reboot_mode mode, const char *cmd) { - if (mode != 's') + if (mode != REBOOT_SOFT) samsung_wdt_reset(); /* if all else fails, or mode was for soft, jump to 0 */ diff --git a/arch/arm/mach-s3c64xx/common.h b/arch/arm/mach-s3c64xx/common.h index 6cfc99bdfb37..e8f990b37665 100644 --- a/arch/arm/mach-s3c64xx/common.h +++ b/arch/arm/mach-s3c64xx/common.h @@ -17,13 +17,15 @@ #ifndef __ARCH_ARM_MACH_S3C64XX_COMMON_H #define __ARCH_ARM_MACH_S3C64XX_COMMON_H +#include <linux/reboot.h> + void s3c64xx_init_irq(u32 vic0, u32 vic1); void s3c64xx_init_io(struct map_desc *mach_desc, int size); void s3c64xx_register_clocks(unsigned long xtal, unsigned armclk_limit); void s3c64xx_setup_clocks(void); -void s3c64xx_restart(char mode, const char *cmd); +void s3c64xx_restart(enum reboot_mode mode, const char *cmd); void s3c64xx_init_late(void); #ifdef CONFIG_CPU_S3C6400 diff --git a/arch/arm/mach-s5p64x0/common.c b/arch/arm/mach-s5p64x0/common.c index 76d0053bf564..dfdfdc320ce7 100644 --- a/arch/arm/mach-s5p64x0/common.c +++ b/arch/arm/mach-s5p64x0/common.c @@ -24,6 +24,7 @@ #include <linux/dma-mapping.h> #include <linux/gpio.h> #include <linux/irq.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/proc-fns.h> @@ -439,9 +440,9 @@ static int __init s5p64x0_init_irq_eint(void) } arch_initcall(s5p64x0_init_irq_eint); -void s5p64x0_restart(char mode, const char *cmd) +void s5p64x0_restart(enum reboot_mode mode, const char *cmd) { - if (mode != 's') + if (mode != REBOOT_SOFT) samsung_wdt_reset(); soft_restart(0); diff --git a/arch/arm/mach-s5p64x0/common.h b/arch/arm/mach-s5p64x0/common.h index f8a60fdc5884..f3a9b43cba4a 100644 --- a/arch/arm/mach-s5p64x0/common.h +++ b/arch/arm/mach-s5p64x0/common.h @@ -12,6 +12,8 @@ #ifndef __ARCH_ARM_MACH_S5P64X0_COMMON_H #define __ARCH_ARM_MACH_S5P64X0_COMMON_H +#include <linux/reboot.h> + void s5p6440_init_irq(void); void s5p6450_init_irq(void); void s5p64x0_init_io(struct map_desc *mach_desc, int size); @@ -22,7 +24,7 @@ void s5p6440_setup_clocks(void); void s5p6450_register_clocks(void); void s5p6450_setup_clocks(void); -void s5p64x0_restart(char mode, const char *cmd); +void s5p64x0_restart(enum reboot_mode mode, const char *cmd); #ifdef CONFIG_CPU_S5P6440 diff --git a/arch/arm/mach-s5pc100/common.c b/arch/arm/mach-s5pc100/common.c index 511031564d35..4bdfecf6d024 100644 --- a/arch/arm/mach-s5pc100/common.c +++ b/arch/arm/mach-s5pc100/common.c @@ -24,6 +24,7 @@ #include <linux/serial_core.h> #include <linux/platform_device.h> #include <linux/sched.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/proc-fns.h> @@ -217,9 +218,9 @@ void __init s5pc100_init_uarts(struct s3c2410_uartcfg *cfg, int no) s3c24xx_init_uartdevs("s3c6400-uart", s5p_uart_resources, cfg, no); } -void s5pc100_restart(char mode, const char *cmd) +void s5pc100_restart(enum reboot_mode mode, const char *cmd) { - if (mode != 's') + if (mode != REBOOT_SOFT) samsung_wdt_reset(); soft_restart(0); diff --git a/arch/arm/mach-s5pc100/common.h b/arch/arm/mach-s5pc100/common.h index c41f912e9e1f..08d782d65d7b 100644 --- a/arch/arm/mach-s5pc100/common.h +++ b/arch/arm/mach-s5pc100/common.h @@ -12,13 +12,15 @@ #ifndef __ARCH_ARM_MACH_S5PC100_COMMON_H #define __ARCH_ARM_MACH_S5PC100_COMMON_H +#include <linux/reboot.h> + void s5pc100_init_io(struct map_desc *mach_desc, int size); void s5pc100_init_irq(void); void s5pc100_register_clocks(void); void s5pc100_setup_clocks(void); -void s5pc100_restart(char mode, const char *cmd); +void s5pc100_restart(enum reboot_mode mode, const char *cmd); extern int s5pc100_init(void); extern void s5pc100_map_io(void); diff --git a/arch/arm/mach-s5pv210/common.c b/arch/arm/mach-s5pv210/common.c index 9dfe93e2624d..023f1a796a9c 100644 --- a/arch/arm/mach-s5pv210/common.c +++ b/arch/arm/mach-s5pv210/common.c @@ -143,7 +143,7 @@ static struct map_desc s5pv210_iodesc[] __initdata = { } }; -void s5pv210_restart(char mode, const char *cmd) +void s5pv210_restart(enum reboot_mode mode, const char *cmd) { __raw_writel(0x1, S5P_SWRESET); } diff --git a/arch/arm/mach-s5pv210/common.h b/arch/arm/mach-s5pv210/common.h index 0a1cc0aef720..fe1beb54e548 100644 --- a/arch/arm/mach-s5pv210/common.h +++ b/arch/arm/mach-s5pv210/common.h @@ -12,13 +12,15 @@ #ifndef __ARCH_ARM_MACH_S5PV210_COMMON_H #define __ARCH_ARM_MACH_S5PV210_COMMON_H +#include <linux/reboot.h> + void s5pv210_init_io(struct map_desc *mach_desc, int size); void s5pv210_init_irq(void); void s5pv210_register_clocks(void); void s5pv210_setup_clocks(void); -void s5pv210_restart(char mode, const char *cmd); +void s5pv210_restart(enum reboot_mode mode, const char *cmd); extern int s5pv210_init(void); extern void s5pv210_map_io(void); diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c index ed2b85485b9d..ad40ab0f5dbd 100644 --- a/arch/arm/mach-s5pv210/mach-aquila.c +++ b/arch/arm/mach-s5pv210/mach-aquila.c @@ -377,12 +377,8 @@ static struct max8998_platform_data aquila_max8998_pdata = { .buck1_set1 = S5PV210_GPH0(3), .buck1_set2 = S5PV210_GPH0(4), .buck2_set3 = S5PV210_GPH0(5), - .buck1_voltage1 = 1200000, - .buck1_voltage2 = 1200000, - .buck1_voltage3 = 1200000, - .buck1_voltage4 = 1200000, - .buck2_voltage1 = 1200000, - .buck2_voltage2 = 1200000, + .buck1_voltage = { 1200000, 1200000, 1200000, 1200000 }, + .buck2_voltage = { 1200000, 1200000 }, }; #endif diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c index 30b24ad84f49..e5cd9fbf19e9 100644 --- a/arch/arm/mach-s5pv210/mach-goni.c +++ b/arch/arm/mach-s5pv210/mach-goni.c @@ -580,12 +580,8 @@ static struct max8998_platform_data goni_max8998_pdata = { .buck1_set1 = S5PV210_GPH0(3), .buck1_set2 = S5PV210_GPH0(4), .buck2_set3 = S5PV210_GPH0(5), - .buck1_voltage1 = 1200000, - .buck1_voltage2 = 1200000, - .buck1_voltage3 = 1200000, - .buck1_voltage4 = 1200000, - .buck2_voltage1 = 1200000, - .buck2_voltage2 = 1200000, + .buck1_voltage = { 1200000, 1200000, 1200000, 1200000 }, + .buck2_voltage = { 1200000, 1200000 }, }; #endif diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 9db3e98e8b85..f25b6119e028 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -19,6 +19,7 @@ #include <linux/cpufreq.h> #include <linux/ioport.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <video/sa1100fb.h> @@ -131,9 +132,9 @@ static void sa1100_power_off(void) PMCR = PMCR_SF; } -void sa11x0_restart(char mode, const char *cmd) +void sa11x0_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 2abc6a1f6e86..9a33695c9492 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -3,12 +3,13 @@ * * Author: Nicolas Pitre */ +#include <linux/reboot.h> extern void sa1100_timer_init(void); extern void __init sa1100_map_io(void); extern void __init sa1100_init_irq(void); extern void __init sa1100_init_gpio(void); -extern void sa11x0_restart(char, const char *); +extern void sa11x0_restart(enum reboot_mode, const char *); extern void sa11x0_init_late(void); #define SET_BANK(__nr,__start,__size) \ diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index a59a13a665a6..713c86cd3d64 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -14,9 +14,9 @@ #include <linux/irq.h> #include <linux/timex.h> #include <linux/clockchips.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <mach/hardware.h> #include <mach/irqs.h> diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c index 153555724988..1d32c5e8eab6 100644 --- a/arch/arm/mach-shark/core.c +++ b/arch/arm/mach-shark/core.c @@ -11,6 +11,7 @@ #include <linux/serial_8250.h> #include <linux/io.h> #include <linux/cpu.h> +#include <linux/reboot.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -24,7 +25,7 @@ #define ROMCARD_SIZE 0x08000000 #define ROMCARD_START 0x10000000 -static void shark_restart(char mode, const char *cmd) +static void shark_restart(enum reboot_mode mode, const char *cmd) { short temp; /* Reset the Machine via pc[3] of the sequoia chipset */ diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index db27e8eef192..3912ce91fee4 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -23,7 +23,7 @@ config ARCH_R8A73A4 select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC select CPU_V7 - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select SH_CLK_CPG select RENESAS_IRQC @@ -59,7 +59,7 @@ config ARCH_R8A7790 select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC select CPU_V7 - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select SH_CLK_CPG select RENESAS_IRQC diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 44a621505eeb..e115f6742107 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -42,6 +42,7 @@ #include <linux/mmc/sh_mmcif.h> #include <linux/mmc/sh_mobile_sdhi.h> #include <linux/i2c-gpio.h> +#include <linux/reboot.h> #include <mach/common.h> #include <mach/irqs.h> #include <mach/r8a7740.h> @@ -377,7 +378,7 @@ static struct resource sh_eth_resources[] = { }; static struct platform_device sh_eth_device = { - .name = "sh-eth", + .name = "r8a7740-gether", .id = -1, .dev = { .platform_data = &sh_eth_platdata, @@ -1259,7 +1260,7 @@ static void __init eva_add_early_devices(void) } #define RESCNT2 IOMEM(0xe6188020) -static void eva_restart(char mode, const char *cmd) +static void eva_restart(enum reboot_mode mode, const char *cmd) { /* Do soft power on reset */ writel((1 << 31), RESCNT2); diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 165483c9bee2..1068120d339f 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -34,6 +34,7 @@ #include <linux/pinctrl/machine.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> #include <linux/smsc911x.h> @@ -890,7 +891,7 @@ static void __init kzm_init(void) sh73a0_pm_init(); } -static void kzm9g_restart(char mode, const char *cmd) +static void kzm9g_restart(enum reboot_mode mode, const char *cmd) { #define RESCNT2 IOMEM(0xe6188020) /* Do soft power on reset */ diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 7fd32d604e34..de10fd78bf2b 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -594,7 +594,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("e6860000.sdhi", &mstp_clks[MSTP313]), CLKDEV_DEV_ID("sh_mmcif", &mstp_clks[MSTP312]), CLKDEV_DEV_ID("e6bd0000.mmcif", &mstp_clks[MSTP312]), - CLKDEV_DEV_ID("sh-eth", &mstp_clks[MSTP309]), + CLKDEV_DEV_ID("r8a7740-gether", &mstp_clks[MSTP309]), CLKDEV_DEV_ID("e9a00000.sh-eth", &mstp_clks[MSTP309]), CLKDEV_DEV_ID("renesas_tpu_pwm", &mstp_clks[MSTP304]), diff --git a/arch/arm/mach-shmobile/clock-r8a7778.c b/arch/arm/mach-shmobile/clock-r8a7778.c index 53798e5037d7..a0e9eb72e46d 100644 --- a/arch/arm/mach-shmobile/clock-r8a7778.c +++ b/arch/arm/mach-shmobile/clock-r8a7778.c @@ -145,7 +145,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP323]), /* SDHI0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP322]), /* SDHI1 */ CLKDEV_DEV_ID("sh_mobile_sdhi.2", &mstp_clks[MSTP321]), /* SDHI2 */ - CLKDEV_DEV_ID("sh-eth", &mstp_clks[MSTP114]), /* Ether */ + CLKDEV_DEV_ID("r8a777x-ether", &mstp_clks[MSTP114]), /* Ether */ CLKDEV_DEV_ID("ehci-platform", &mstp_clks[MSTP100]), /* USB EHCI port0/1 */ CLKDEV_DEV_ID("ohci-platform", &mstp_clks[MSTP100]), /* USB OHCI port0/1 */ CLKDEV_DEV_ID("i2c-rcar.0", &mstp_clks[MSTP030]), /* I2C0 */ diff --git a/arch/arm/mach-shmobile/clock-r8a7779.c b/arch/arm/mach-shmobile/clock-r8a7779.c index 9daeb8c37483..10340f5becbb 100644 --- a/arch/arm/mach-shmobile/clock-r8a7779.c +++ b/arch/arm/mach-shmobile/clock-r8a7779.c @@ -165,7 +165,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("rcar-pcie", &mstp_clks[MSTP116]), /* PCIe */ CLKDEV_DEV_ID("sata_rcar", &mstp_clks[MSTP115]), /* SATA */ CLKDEV_DEV_ID("fc600000.sata", &mstp_clks[MSTP115]), /* SATA w/DT */ - CLKDEV_DEV_ID("sh-eth", &mstp_clks[MSTP114]), /* Ether */ + CLKDEV_DEV_ID("r8a777x-ether", &mstp_clks[MSTP114]), /* Ether */ CLKDEV_DEV_ID("ehci-platform.1", &mstp_clks[MSTP101]), /* USB EHCI port2 */ CLKDEV_DEV_ID("ohci-platform.1", &mstp_clks[MSTP101]), /* USB OHCI port2 */ CLKDEV_DEV_ID("ehci-platform.0", &mstp_clks[MSTP100]), /* USB EHCI port0/1 */ diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7e105932c09d..5390c6bbbc02 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -142,15 +142,15 @@ static void pllc2_table_rebuild(struct clk *clk) /* Initialise PLLC2 frequency table */ for (i = 0; i < ARRAY_SIZE(pllc2_freq_table) - 2; i++) { pllc2_freq_table[i].frequency = clk->parent->rate * (i + 20) * 2; - pllc2_freq_table[i].index = i; + pllc2_freq_table[i].driver_data = i; } /* This is a special entry - switching PLL off makes it a repeater */ pllc2_freq_table[i].frequency = clk->parent->rate; - pllc2_freq_table[i].index = i; + pllc2_freq_table[i].driver_data = i; pllc2_freq_table[++i].frequency = CPUFREQ_TABLE_END; - pllc2_freq_table[i].index = i; + pllc2_freq_table[i].driver_data = i; } static unsigned long pllc2_recalc(struct clk *clk) diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index 8ea11b472b91..bfce9641e32f 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -19,6 +19,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/reboot.h> #include <asm/hardware/cache-l2x0.h> #include <asm/mach/arch.h> @@ -89,13 +90,13 @@ static void __init socfpga_init_irq(void) socfpga_sysmgr_init(); } -static void socfpga_cyclone5_restart(char mode, const char *cmd) +static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) { u32 temp; temp = readl(rst_manager_base_addr + SOCFPGA_RSTMGR_CTRL); - if (mode == 'h') + if (mode == REBOOT_HARD) temp |= RSTMGR_CTRL_SWCOLDRSTREQ; else temp |= RSTMGR_CTRL_SWWARMRSTREQ; diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index a9fd45362fee..904f2c907b46 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -16,6 +16,8 @@ #include <linux/dmaengine.h> #include <linux/amba/pl08x.h> #include <linux/init.h> +#include <linux/reboot.h> + #include <asm/mach/time.h> extern void spear13xx_timer_init(void); @@ -32,7 +34,7 @@ void __init spear6xx_clk_init(void __iomem *misc_base); void __init spear13xx_map_io(void); void __init spear13xx_l2x0_init(void); -void spear_restart(char, const char *); +void spear_restart(enum reboot_mode, const char *); void spear13xx_secondary_startup(void); void __cpuinit spear13xx_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-spear/restart.c b/arch/arm/mach-spear/restart.c index 2b44500bb718..ce5e098c4888 100644 --- a/arch/arm/mach-spear/restart.c +++ b/arch/arm/mach-spear/restart.c @@ -12,14 +12,15 @@ */ #include <linux/io.h> #include <linux/amba/sp810.h> +#include <linux/reboot.h> #include <asm/system_misc.h> #include <mach/spear.h> #include "generic.h" #define SPEAR13XX_SYS_SW_RES (VA_MISC_BASE + 0x204) -void spear_restart(char mode, const char *cmd) +void spear_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* software reset, Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c index 0227c97797cd..bf3b1fd8cb23 100644 --- a/arch/arm/mach-spear/spear3xx.c +++ b/arch/arm/mach-spear/spear3xx.c @@ -56,8 +56,8 @@ struct pl08x_platform_data pl080_plat_data = { }, .lli_buses = PL08X_AHB1, .mem_buses = PL08X_AHB1, - .get_signal = pl080_get_signal, - .put_signal = pl080_put_signal, + .get_xfer_signal = pl080_get_signal, + .put_xfer_signal = pl080_put_signal, }; /* diff --git a/arch/arm/mach-spear/spear6xx.c b/arch/arm/mach-spear/spear6xx.c index 8b0295a41226..da26fa5b68d7 100644 --- a/arch/arm/mach-spear/spear6xx.c +++ b/arch/arm/mach-spear/spear6xx.c @@ -334,8 +334,8 @@ static struct pl08x_platform_data spear6xx_pl080_plat_data = { }, .lli_buses = PL08X_AHB1, .mem_buses = PL08X_AHB1, - .get_signal = pl080_get_signal, - .put_signal = pl080_put_signal, + .get_xfer_signal = pl080_get_signal, + .put_xfer_signal = pl080_put_signal, .slave_channels = spear600_dma_info, .num_slave_channels = ARRAY_SIZE(spear600_dma_info), }; diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 84485a10fc3a..38a3c55527c8 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -18,6 +18,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/io.h> +#include <linux/reboot.h> #include <linux/clk/sunxi.h> @@ -33,7 +34,7 @@ static void __iomem *wdt_base; -static void sun4i_restart(char mode, const char *cmd) +static void sun4i_restart(enum reboot_mode mode, const char *cmd) { if (!wdt_base) return; diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 84d72fc36dfe..ef3a8da49b2d 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -28,7 +28,6 @@ config ARCH_TEGRA_2x_SOC select ARM_ERRATA_754327 if SMP select ARM_ERRATA_764369 if SMP select ARM_GIC - select CPU_FREQ_TABLE if CPU_FREQ select CPU_V7 select PINCTRL select PINCTRL_TEGRA20 @@ -46,7 +45,6 @@ config ARCH_TEGRA_3x_SOC select ARM_ERRATA_754322 select ARM_ERRATA_764369 if SMP select ARM_GIC - select CPU_FREQ_TABLE if CPU_FREQ select CPU_V7 select PINCTRL select PINCTRL_TEGRA30 @@ -60,10 +58,9 @@ config ARCH_TEGRA_3x_SOC config ARCH_TEGRA_114_SOC bool "Enable support for Tegra114 family" - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select ARM_GIC select ARM_L1_CACHE_SHIFT_6 - select CPU_FREQ_TABLE if CPU_FREQ select CPU_V7 select PINCTRL select PINCTRL_TEGRA114 diff --git a/arch/arm/mach-tegra/board.h b/arch/arm/mach-tegra/board.h index 1787327fae3a..9a6659fe2dc2 100644 --- a/arch/arm/mach-tegra/board.h +++ b/arch/arm/mach-tegra/board.h @@ -23,8 +23,9 @@ #define __MACH_TEGRA_BOARD_H #include <linux/types.h> +#include <linux/reboot.h> -void tegra_assert_system_reset(char mode, const char *cmd); +void tegra_assert_system_reset(enum reboot_mode mode, const char *cmd); void __init tegra_init_early(void); void __init tegra_map_common_io(void); diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index ec5836b1e713..94a119a35af8 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -22,8 +22,9 @@ #include <linux/io.h> #include <linux/clk.h> #include <linux/delay.h> +#include <linux/reboot.h> #include <linux/irqchip.h> -#include <linux/clk/tegra.h> +#include <linux/clk-provider.h> #include <asm/hardware/cache-l2x0.h> @@ -60,7 +61,7 @@ u32 tegra_uart_config[4] = { #ifdef CONFIG_OF void __init tegra_dt_init_irq(void) { - tegra_clocks_init(); + of_clk_init(NULL); tegra_pmc_init(); tegra_init_irq(); irqchip_init(); @@ -68,7 +69,7 @@ void __init tegra_dt_init_irq(void) } #endif -void tegra_assert_system_reset(char mode, const char *cmd) +void tegra_assert_system_reset(enum reboot_mode mode, const char *cmd) { void __iomem *reset = IO_ADDRESS(TEGRA_PMC_BASE + 0); u32 reg; diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index 4f7ac2a11452..35670b15f281 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -300,11 +300,11 @@ static void __init u300_init_check_chip(void) /* Forward declare this function from the watchdog */ void coh901327_watchdog_reset(void); -static void u300_restart(char mode, const char *cmd) +static void u300_restart(enum reboot_mode mode, const char *cmd) { switch (mode) { - case 's': - case 'h': + case REBOOT_SOFT: + case REBOOT_HARD: #ifdef CONFIG_COH901327_WATCHDOG coh901327_watchdog_reset(); #endif diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 390ae5feb1d0..b5db207dfd1e 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -21,9 +21,9 @@ #include <linux/delay.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/sched_clock.h> /* Generic stuff */ -#include <asm/sched_clock.h> #include <asm/mach/map.h> #include <asm/mach/time.h> diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index b6145ea51641..e6fb0239151b 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -76,13 +76,15 @@ void __init ux500_init_irq(void) } else if (cpu_is_u9540()) { prcmu_early_init(U8500_PRCMU_BASE, SZ_8K - 1); ux500_pm_init(U8500_PRCMU_BASE, SZ_8K - 1); - u8500_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE, + u9540_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE, U8500_CLKRST3_BASE, U8500_CLKRST5_BASE, U8500_CLKRST6_BASE); } else if (cpu_is_u8540()) { prcmu_early_init(U8500_PRCMU_BASE, SZ_8K + SZ_4K - 1); ux500_pm_init(U8500_PRCMU_BASE, SZ_8K + SZ_4K - 1); - u8540_clk_init(); + u8540_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE, + U8500_CLKRST3_BASE, U8500_CLKRST5_BASE, + U8500_CLKRST6_BASE); } } diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 54bb80b012ac..3b0572f30d56 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -38,6 +38,7 @@ #include <linux/clkdev.h> #include <linux/mtd/physmap.h> #include <linux/bitops.h> +#include <linux/reboot.h> #include <asm/irq.h> #include <asm/hardware/arm_timer.h> @@ -733,7 +734,7 @@ static void versatile_leds_event(led_event_t ledevt) } #endif /* CONFIG_LEDS */ -void versatile_restart(char mode, const char *cmd) +void versatile_restart(enum reboot_mode mode, const char *cmd) { void __iomem *sys = __io_address(VERSATILE_SYS_BASE); u32 val; diff --git a/arch/arm/mach-versatile/core.h b/arch/arm/mach-versatile/core.h index 5c1b87d1da6b..f06d5768e428 100644 --- a/arch/arm/mach-versatile/core.h +++ b/arch/arm/mach-versatile/core.h @@ -24,13 +24,14 @@ #include <linux/amba/bus.h> #include <linux/of_platform.h> +#include <linux/reboot.h> extern void __init versatile_init(void); extern void __init versatile_init_early(void); extern void __init versatile_init_irq(void); extern void __init versatile_map_io(void); extern void versatile_timer_init(void); -extern void versatile_restart(char, const char *); +extern void versatile_restart(enum reboot_mode, const char *); extern unsigned int mmc_status(struct device *dev); #ifdef CONFIG_OF extern struct of_dev_auxdata versatile_auxdata_lookup[]; diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig index 8958f0d896bc..081d46929436 100644 --- a/arch/arm/mach-virt/Kconfig +++ b/arch/arm/mach-virt/Kconfig @@ -2,7 +2,7 @@ config ARCH_VIRT bool "Dummy Virtual Machine" if ARCH_MULTI_V7 select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_GIC - select ARM_ARCH_TIMER + select HAVE_ARM_ARCH_TIMER select ARM_PSCI select HAVE_SMP select CPU_V7 diff --git a/arch/arm/mach-vt8500/vt8500.c b/arch/arm/mach-vt8500/vt8500.c index f8f2f00856e0..eefaa60d6614 100644 --- a/arch/arm/mach-vt8500/vt8500.c +++ b/arch/arm/mach-vt8500/vt8500.c @@ -21,6 +21,7 @@ #include <linux/clocksource.h> #include <linux/io.h> #include <linux/pm.h> +#include <linux/reboot.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -46,7 +47,7 @@ static void __iomem *pmc_base; -void vt8500_restart(char mode, const char *cmd) +void vt8500_restart(enum reboot_mode mode, const char *cmd) { if (pmc_base) writel(1, pmc_base + VT8500_PMSR_REG); diff --git a/arch/arm/mach-w90x900/cpu.c b/arch/arm/mach-w90x900/cpu.c index 9e4dd8b63c4a..b1eabaad50a5 100644 --- a/arch/arm/mach-w90x900/cpu.c +++ b/arch/arm/mach-w90x900/cpu.c @@ -230,9 +230,9 @@ void __init nuc900_init_clocks(void) #define WTE (1 << 7) #define WTRE (1 << 1) -void nuc9xx_restart(char mode, const char *cmd) +void nuc9xx_restart(enum reboot_mode mode, const char *cmd) { - if (mode == 's') { + if (mode == REBOOT_SOFT) { /* Jump into ROM at address 0 */ soft_restart(0); } else { diff --git a/arch/arm/mach-w90x900/nuc9xx.h b/arch/arm/mach-w90x900/nuc9xx.h index 88ef4b267089..e3ab1e1381f1 100644 --- a/arch/arm/mach-w90x900/nuc9xx.h +++ b/arch/arm/mach-w90x900/nuc9xx.h @@ -14,10 +14,13 @@ * published by the Free Software Foundation. * */ + +#include <linux/reboot.h> + struct map_desc; /* core initialisation functions */ extern void nuc900_init_irq(void); extern void nuc900_timer_init(void); -extern void nuc9xx_restart(char, const char *); +extern void nuc9xx_restart(enum reboot_mode, const char *); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 9e8101ecd63e..6cacdc8dd654 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -392,7 +392,8 @@ config CPU_V7 select CPU_CACHE_V7 select CPU_CACHE_VIPT select CPU_COPY_V6 if MMU - select CPU_CP15_MMU + select CPU_CP15_MMU if MMU + select CPU_CP15_MPU if !MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V7 select CPU_TLB_V7 if MMU diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index ee558a01f390..ecfe6e53f6e0 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c465faca51b0..d70e0aba0c9d 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -523,6 +523,147 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_clean_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + static void __init l2x0_of_setup(const struct device_node *np, u32 *aux_val, u32 *aux_mask) { @@ -765,6 +906,21 @@ static const struct l2x0_of_data aurora_no_outer_data = { }, }; +static const struct l2x0_of_data bcm_l2x0_data = { + .setup = pl310_of_setup, + .save = pl310_save, + .outer_cache = { + .resume = pl310_resume, + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + }, +}; + static const struct of_device_id l2x0_ids[] __initconst = { { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, @@ -773,6 +929,8 @@ static const struct of_device_id l2x0_ids[] __initconst = { .data = (void *)&aurora_no_outer_data}, { .compatible = "marvell,aurora-outer-cache", .data = (void *)&aurora_with_outer_data}, + { .compatible = "bcm,bcm11351-a2-pl310-cache", + .data = (void *)&bcm_l2x0_data}, {} }; diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac37372ef52..b55b1015724b 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -20,6 +20,7 @@ #include <asm/smp_plat.h> #include <asm/thread_notify.h> #include <asm/tlbflush.h> +#include <asm/proc-fns.h> /* * On ARMv6, we have the following structure in the Context ID: @@ -39,33 +40,51 @@ * non 64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) -#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) - -#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) -#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) +#define NUM_USER_ASIDS ASID_FIRST_VERSION static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { - unsigned long ttbl = __pa(swapper_pg_dir); - unsigned long ttbh = 0; - /* * Set TTBR0 to swapper_pg_dir which contains only global entries. The * ASID is set to 0. */ - asm volatile( - " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" - : - : "r" (ttbl), "r" (ttbh)); + cpu_set_ttbr(0, __pa(swapper_pg_dir)); isb(); } #else @@ -128,7 +147,16 @@ static void flush_context(unsigned int cpu) asid = 0; } else { asid = atomic64_xchg(&per_cpu(active_asids, i), 0); - __set_bit(ASID_TO_IDX(asid), asid_map); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); } per_cpu(reserved_asids, i) = asid; } @@ -167,17 +195,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs - * as requiring flushes. + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and indicate + * rollover events. */ - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); flush_context(cpu); - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); - asid = generation | IDX_TO_ASID(asid); + asid |= generation; cpumask_clear(mm_cpumask(mm)); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3aac96..7f9b1798c6cf 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size) #ifdef CONFIG_MMU #ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB +#warning ARM Coherent DMA allocator does not (yet) support huge TLB #endif static void *__alloc_from_contiguous(struct device *dev, size_t size, @@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } /** @@ -1314,6 +1328,15 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (gfp & GFP_ATOMIC) return __iommu_alloc_atomic(dev, size, handle); + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); if (!pages) return NULL; @@ -1372,16 +1395,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct page **pages = __iommu_get_pages(cpu_addr, attrs); + struct page **pages; size = PAGE_ALIGN(size); - if (!pages) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); return; } - if (__in_atomic_pool(cpu_addr, size)) { - __iommu_free_atomic(dev, cpu_addr, handle, size); + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } @@ -1636,13 +1660,27 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t dma_addr; - int ret, len = PAGE_ALIGN(size + offset); + int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) goto fail; @@ -1907,7 +1945,7 @@ void arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); - mapping = NULL; + dev->archdata.mapping = NULL; set_dma_ops(dev, NULL); pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5dbf13f954f6..c97f7940cb95 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -491,12 +491,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr, * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ +#ifndef CONFIG_ARM_LPAE static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { do_bad_area(addr, fsr, regs); return 0; } +#endif /* CONFIG_ARM_LPAE */ /* * This abort handler always returns "fault". diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 32aa5861119f..6d5ba9afb16a 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -17,6 +17,7 @@ #include <asm/highmem.h> #include <asm/smp_plat.h> #include <asm/tlbflush.h> +#include <linux/hugetlb.h> #include "mm.h" @@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * coherent with the kernels mapping. */ if (!PageHighMem(page)) { - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + size_t page_size = PAGE_SIZE << compound_order(page); + __cpuc_flush_dcache_area(page_address(page), page_size); } else { - void *addr; - + unsigned long i; if (cache_is_vipt_nonaliasing()) { - addr = kmap_atomic(page); - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_atomic(addr); - } else { - addr = kmap_high_get(page); - if (addr) { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_atomic(page); __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high(page); + kunmap_atomic(addr); + } + } else { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } } } } @@ -287,7 +292,7 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); if (!cache_ops_need_broadcast() && - mapping && !mapping_mapped(mapping)) + mapping && !page_mapped(page)) clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c index 05a4e9431836..ab4409a2307e 100644 --- a/arch/arm/mm/fsr-3level.c +++ b/arch/arm/mm/fsr-3level.c @@ -9,11 +9,11 @@ static struct fsr_info fsr_info[] = { { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c new file mode 100644 index 000000000000..3d1e4a205b0b --- /dev/null +++ b/arch/arm/mm/hugetlbpage.c @@ -0,0 +1,101 @@ +/* + * arch/arm/mm/hugetlbpage.c + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +/* + * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot + * of type casting from pmd_t * to pte_t *. + */ + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + + return (pte_t *)pmd; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc01fcdf..6833cbead6cc 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -36,12 +36,13 @@ #include "mm.h" -static unsigned long phys_initrd_start __initdata = 0; +static phys_addr_t phys_initrd_start __initdata = 0; static unsigned long phys_initrd_size __initdata = 0; static int __init early_initrd(char *p) { - unsigned long start, size; + phys_addr_t start; + unsigned long size; char *endp; start = memparse(p, &endp); @@ -350,14 +351,14 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) #ifdef CONFIG_BLK_DEV_INITRD if (phys_initrd_size && !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { - pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n", - phys_initrd_start, phys_initrd_size); + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } if (phys_initrd_size && memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { - pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n", - phys_initrd_start, phys_initrd_size); + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); phys_initrd_start = phys_initrd_size = 0; } if (phys_initrd_size) { @@ -442,7 +443,7 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; - unsigned long pg, pgend; + phys_addr_t pg, pgend; /* * Convert start_pfn/end_pfn to a struct page pointer. @@ -454,8 +455,8 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * Convert to physical addresses, and * round start upwards and end downwards. */ - pg = (unsigned long)PAGE_ALIGN(__pa(start_pg)); - pgend = (unsigned long)__pa(end_pg) & PAGE_MASK; + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; /* * If there are free pages between these, @@ -582,9 +583,6 @@ static void __init free_highpages(void) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - int i; #ifdef CONFIG_HAVE_TCM /* These pointers are filled in on TCM detection */ extern u32 dtcm_end; @@ -595,57 +593,16 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ free_unused_memmap(&meminfo); - - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL); + free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, -1, NULL); #endif free_highpages(); - reserved_pages = free_pages = 0; - - for_each_bank(i, &meminfo) { - struct membank *bank = &meminfo.bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system - */ - printk(KERN_INFO "Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 @@ -711,7 +668,7 @@ void __init mem_init(void) BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); #endif - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get @@ -728,12 +685,12 @@ void free_initmem(void) extern char __tcm_start, __tcm_end; poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); - free_reserved_area(&__tcm_start, &__tcm_end, 0, "TCM link"); + free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -744,7 +701,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 04d9006eab1f..f123d6eb074b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -331,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, return (void __iomem *) (offset + addr); } -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - unsigned long last_addr; + phys_addr_t last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; unsigned long pfn = __phys_to_pfn(phys_addr); @@ -367,12 +367,12 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; void __iomem * -__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return arch_ioremap_caller(phys_addr, size, mtype, __builtin_return_address(0)); @@ -387,7 +387,7 @@ EXPORT_SYMBOL(__arm_ioremap); * CONFIG_GENERIC_ALLOCATOR for allocating external memory. */ void __iomem * -__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached) +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) { unsigned int mtype; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d1d1cefa1f93..d7229d28c7f8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -675,7 +675,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, const struct mem_type *type) + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { pud_t *pud = pud_offset(pgd, addr); unsigned long next; @@ -989,27 +990,28 @@ phys_addr_t arm_lowmem_limit __initdata = 0; void __init sanity_check_meminfo(void) { int i, j, highmem = 0; + phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; for (i = 0, j = 0; i < meminfo.nr_banks; i++) { struct membank *bank = &meminfo.bank[j]; - *bank = meminfo.bank[i]; + phys_addr_t size_limit; - if (bank->start > ULONG_MAX) - highmem = 1; + *bank = meminfo.bank[i]; + size_limit = bank->size; -#ifdef CONFIG_HIGHMEM - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) + if (bank->start >= vmalloc_limit) highmem = 1; + else + size_limit = vmalloc_limit - bank->start; bank->highmem = highmem; +#ifdef CONFIG_HIGHMEM /* * Split those memory banks which are partially overlapping * the vmalloc area greatly simplifying things later. */ - if (!highmem && __va(bank->start) < vmalloc_min && - bank->size > vmalloc_min - __va(bank->start)) { + if (!highmem && bank->size > size_limit) { if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " "ignoring high memory\n"); @@ -1018,16 +1020,14 @@ void __init sanity_check_meminfo(void) (meminfo.nr_banks - i) * sizeof(*bank)); meminfo.nr_banks++; i++; - bank[1].size -= vmalloc_min - __va(bank->start); - bank[1].start = __pa(vmalloc_min - 1) + 1; + bank[1].size -= size_limit; + bank[1].start = vmalloc_limit; bank[1].highmem = highmem = 1; j++; } - bank->size = vmalloc_min - __va(bank->start); + bank->size = size_limit; } #else - bank->highmem = highmem; - /* * Highmem banks not allowed with !CONFIG_HIGHMEM. */ @@ -1040,31 +1040,16 @@ void __init sanity_check_meminfo(void) } /* - * Check whether this memory bank would entirely overlap - * the vmalloc area. - */ - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) { - printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " - "(vmalloc region overlap).\n", - (unsigned long long)bank->start, - (unsigned long long)bank->start + bank->size - 1); - continue; - } - - /* * Check whether this memory bank would partially overlap * the vmalloc area. */ - if (__va(bank->start + bank->size - 1) >= vmalloc_min || - __va(bank->start + bank->size - 1) <= __va(bank->start)) { - unsigned long newsize = vmalloc_min - __va(bank->start); + if (bank->size > size_limit) { printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " "to -%.8llx (vmalloc region overlap).\n", (unsigned long long)bank->start, (unsigned long long)bank->start + bank->size - 1, - (unsigned long long)bank->start + newsize - 1); - bank->size = newsize; + (unsigned long long)bank->start + size_limit - 1); + bank->size = size_limit; } #endif if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 5a3aba614a40..1fa50100ab6a 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -8,6 +8,7 @@ #include <linux/pagemap.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/kernel.h> #include <asm/cacheflush.h> #include <asm/sections.h> @@ -15,9 +16,260 @@ #include <asm/setup.h> #include <asm/traps.h> #include <asm/mach/arch.h> +#include <asm/cputype.h> +#include <asm/mpu.h> #include "mm.h" +#ifdef CONFIG_ARM_MPU +struct mpu_rgn_info mpu_rgn_info; + +/* Region number */ +static void rgnr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static void dracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); +} + +/* Region size register */ +static void drsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); +} + +/* Region base address register */ +static void drbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); +} + +static u32 drbar_read(void) +{ + u32 v; + asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); + return v; +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static void iracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); +} + +/* I-side Region size register */ +static void irsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); +} + +/* I-side Region base address register */ +static void irbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); +} + +static unsigned long irbar_read(void) +{ + unsigned long v; + asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); + return v; +} + +/* MPU initialisation functions */ +void __init sanity_check_meminfo_mpu(void) +{ + int i; + struct membank *bank = meminfo.bank; + phys_addr_t phys_offset = PHYS_OFFSET; + phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + + /* Initially only use memory continuous from PHYS_OFFSET */ + if (bank_phys_start(&bank[0]) != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + /* Banks have already been sorted by start address */ + for (i = 1; i < meminfo.nr_banks; i++) { + if (bank[i].start <= bank_phys_end(&bank[0]) && + bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) { + bank[0].size = bank_phys_end(&bank[i]) - bank[0].start; + } else { + pr_notice("Ignoring RAM after 0x%.8lx. " + "First non-contiguous (ignored) bank start: 0x%.8lx\n", + (unsigned long)bank_phys_end(&bank[0]), + (unsigned long)bank_phys_start(&bank[i])); + break; + } + } + /* All contiguous banks are now merged in to the first bank */ + meminfo.nr_banks = 1; + specified_mem_size = bank[0].size; + + /* + * MPU has curious alignment requirements: Size must be power of 2, and + * region start must be aligned to the region size + */ + if (phys_offset != 0) + pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); + + /* + * Maximum aligned region might overflow phys_addr_t if phys_offset is + * 0. Hence we keep everything below 4G until we take the smaller of + * the aligned_region_size and rounded_mem_size, one of which is + * guaranteed to be smaller than the maximum physical address. + */ + aligned_region_size = (phys_offset - 1) ^ (phys_offset); + /* Find the max power-of-two sized region that fits inside our bank */ + rounded_mem_size = (1 << __fls(bank[0].size)) - 1; + + /* The actual region size is the smaller of the two */ + aligned_region_size = aligned_region_size < rounded_mem_size + ? aligned_region_size + 1 + : rounded_mem_size + 1; + + if (aligned_region_size != specified_mem_size) + pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)", + (unsigned long)specified_mem_size, + (unsigned long)aligned_region_size); + + meminfo.bank[0].size = aligned_region_size; + pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n", + (unsigned long)phys_offset, + (unsigned long)aligned_region_size, + (unsigned long)bank_phys_end(&bank[0])); + +} + +static int mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + +static int mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid(CPUID_MPUIR) & MPUIR_nU; +} + +static int mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ + rgnr_write(MPU_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); +} + +static int mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order()) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init mpu_setup(void) +{ + int region_err; + if (!mpu_present()) + return; + + region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, + ilog2(meminfo.bank[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + if (region_err) { + panic("MPU region initialization failure! %d", region_err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Max regions: %d\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_max_regions()); + } +} +#else +static void sanity_check_meminfo_mpu(void) {} +static void __init mpu_setup(void) {} +#endif /* CONFIG_ARM_MPU */ + void __init arm_mm_memblock_reserve(void) { #ifndef CONFIG_CPU_V7M @@ -37,7 +289,9 @@ void __init arm_mm_memblock_reserve(void) void __init sanity_check_meminfo(void) { - phys_addr_t end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); + phys_addr_t end; + sanity_check_meminfo_mpu(); + end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); high_memory = __va(end - 1) + 1; } @@ -48,6 +302,7 @@ void __init sanity_check_meminfo(void) void __init paging_init(struct machine_desc *mdesc) { early_trap_init((void *)CONFIG_VECTORS_BASE); + mpu_setup(); bootmem_init(); } @@ -94,16 +349,16 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, return __arm_ioremap_pfn(pfn, offset, size, mtype); } -void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return (void __iomem *)phys_addr; } EXPORT_SYMBOL(__arm_ioremap); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *); +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { return __arm_ioremap(phys_addr, size, mtype); diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 919405e20b80..2d1ef87328a1 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -140,8 +140,10 @@ ENTRY(cpu_v6_set_pte_ext) ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU mrc p15, 0, r5, c3, c0, 0 @ Domain ID mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 +#endif mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register mrc p15, 0, r8, c1, c0, 2 @ co-processor access control mrc p15, 0, r9, c1, c0, 0 @ control register @@ -158,14 +160,16 @@ ENTRY(cpu_v6_do_resume) mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID ldmia r0, {r4 - r9} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU mcr p15, 0, r5, c3, c0, 0 @ Domain ID ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) ALT_UP(orr r1, r1, #TTB_FLAGS_UP) mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register +#endif mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register mcr p15, 0, r8, c1, c0, 2 @ co-processor access control - mcr p15, 0, ip, c2, c0, 2 @ TTB control register mcr p15, 0, ip, c7, c5, 4 @ ISB mov r0, r9 @ control register b cpu_resume_mmu diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 363027e811d6..5ffe1956c6d9 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -39,6 +39,14 @@ #define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) #define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) +#ifndef __ARMEB__ +# define rpgdl r0 +# define rpgdh r1 +#else +# define rpgdl r1 +# define rpgdh r0 +#endif + /* * cpu_v7_switch_mm(pgd_phys, tsk) * @@ -47,10 +55,10 @@ */ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU - mmid r1, r1 @ get mm->context.id - asid r3, r1 - mov r3, r3, lsl #(48 - 32) @ ASID - mcrr p15, 0, r0, r3, c2 @ set TTB 0 + mmid r2, r2 + asid r2, r2 + orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd + mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 isb #endif mov pc, lr @@ -106,7 +114,8 @@ ENDPROC(cpu_v7_set_pte_ext) */ .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? (branch below) + mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT + cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) @@ -114,27 +123,21 @@ ENDPROC(cpu_v7_set_pte_ext) ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) /* - * TTBR0/TTBR1 split (PAGE_OFFSET): - * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) - * 0x80000000: T0SZ = 0, T1SZ = 1 - * 0xc0000000: T0SZ = 0, T1SZ = 2 - * - * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise - * booting secondary CPUs would end up using TTBR1 for the identity - * mapping set up in TTBR0. + * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), + * otherwise booting secondary CPUs would end up using TTBR1 for the + * identity mapping set up in TTBR0. */ - bhi 9001f @ PHYS_OFFSET > PAGE_OFFSET? - orr \tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ -#if defined CONFIG_VMSPLIT_2G - /* PAGE_OFFSET == 0x80000000, T1SZ == 1 */ - add \ttbr1, \ttbr1, #1 << 4 @ skip two L1 entries -#elif defined CONFIG_VMSPLIT_3G - /* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */ - add \ttbr1, \ttbr1, #4096 * (1 + 3) @ only L2 used, skip pgd+3*pmd -#endif - /* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */ -9001: mcr p15, 0, \tmp, c2, c0, 2 @ TTB control register - mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ + mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR + mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + addls \ttbr1, \ttbr1, #TTBR1_OFFSET + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 .endm __CPUINIT diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index e35fec34453e..7ef3ad05df39 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -98,9 +98,11 @@ ENTRY(cpu_v7_do_suspend) mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID stmia r0!, {r4 - r5} +#ifdef CONFIG_MMU mrc p15, 0, r6, c3, c0, 0 @ Domain ID mrc p15, 0, r7, c2, c0, 1 @ TTB 1 mrc p15, 0, r11, c2, c0, 2 @ TTB control register +#endif mrc p15, 0, r8, c1, c0, 0 @ Control register mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control @@ -110,13 +112,14 @@ ENDPROC(cpu_v7_do_suspend) ENTRY(cpu_v7_do_resume) mov ip, #0 - mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID ldmia r0!, {r4 - r5} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID ldmia r0, {r6 - r11} +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r6, c3, c0, 0 @ Domain ID #ifndef CONFIG_ARM_LPAE ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) @@ -125,14 +128,15 @@ ENTRY(cpu_v7_do_resume) mcr p15, 0, r1, c2, c0, 0 @ TTB 0 mcr p15, 0, r7, c2, c0, 1 @ TTB 1 mcr p15, 0, r11, c2, c0, 2 @ TTB control register - mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register - teq r4, r9 @ Is it already set? - mcrne p15, 0, r9, c1, c0, 1 @ No, so write it - mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control ldr r4, =PRRR @ PRRR ldr r5, =NMRR @ NMRR mcr p15, 0, r4, c10, c2, 0 @ write PRRR mcr p15, 0, r5, c10, c2, 1 @ write NMRR +#endif /* CONFIG_MMU */ + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control isb dsb mov r0, r8 @ control register @@ -178,7 +182,8 @@ ENDPROC(cpu_pj4b_do_idle) */ __v7_ca5mp_setup: __v7_ca9mp_setup: - mov r10, #(1 << 0) @ TLB ops broadcasting +__v7_cr7mp_setup: + mov r10, #(1 << 0) @ Cache/TLB ops broadcasting b 1f __v7_ca7mp_setup: __v7_ca15mp_setup: @@ -443,6 +448,16 @@ __v7_pj4b_proc_info: #endif /* + * ARM Ltd. Cortex R7 processor. + */ + .type __v7_cr7mp_proc_info, #object +__v7_cr7mp_proc_info: + .long 0x410fc170 + .long 0xff0ffff0 + __v7_proc __v7_cr7mp_setup + .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info + + /* * ARM Ltd. Cortex A7 processor. */ .type __v7_ca7mp_proc_info, #object diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 1a643ee8e082..f50d223a0bd3 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -900,8 +900,7 @@ void bpf_jit_compile(struct sk_filter *fp) #endif alloc_size = 4 * ctx.idx; - ctx.target = module_alloc(max(sizeof(struct work_struct), - alloc_size)); + ctx.target = module_alloc(alloc_size); if (unlikely(ctx.target == NULL)) goto out; @@ -927,19 +926,8 @@ out: return; } -static void bpf_jit_free_worker(struct work_struct *work) -{ - module_free(NULL, work); -} - void bpf_jit_free(struct sk_filter *fp) { - struct work_struct *work; - - if (fp->bpf_func != sk_run_filter) { - work = (struct work_struct *)fp->bpf_func; - - INIT_WORK(work, bpf_jit_free_worker); - schedule_work(work); - } + if (fp->bpf_func != sk_run_filter) + module_free(NULL, fp->bpf_func); } diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index 1ff6a37e893c..a4d1f8de3b5b 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -192,12 +192,10 @@ static int __init iop3xx_adma_cap_init(void) #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */ dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); - dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #else dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask); - dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #endif diff --git a/arch/arm/plat-iop/gpio.c b/arch/arm/plat-iop/gpio.c index e4de9be78feb..697de6dc4936 100644 --- a/arch/arm/plat-iop/gpio.c +++ b/arch/arm/plat-iop/gpio.c @@ -17,6 +17,7 @@ #include <linux/gpio.h> #include <linux/export.h> #include <asm/hardware/iop3xx.h> +#include <mach/gpio.h> void gpio_line_config(int line, int direction) { diff --git a/arch/arm/plat-iop/restart.c b/arch/arm/plat-iop/restart.c index 33fa699a4d28..3a4d5e5fde52 100644 --- a/arch/arm/plat-iop/restart.c +++ b/arch/arm/plat-iop/restart.c @@ -11,7 +11,7 @@ #include <asm/system_misc.h> #include <mach/hardware.h> -void iop3xx_restart(char mode, const char *cmd) +void iop3xx_restart(enum reboot_mode mode, const char *cmd) { *IOP3XX_PCSR = 0x30; diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 837a2d52e9db..29606bd75f3f 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -22,9 +22,9 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/export.h> +#include <linux/sched_clock.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sched_clock.h> #include <asm/uaccess.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 5b0b86bb34bb..d9bc98eb2a6b 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -18,9 +18,9 @@ #include <linux/err.h> #include <linux/io.h> #include <linux/clocksource.h> +#include <linux/sched_clock.h> #include <asm/mach/time.h> -#include <asm/sched_clock.h> #include <plat/counter-32k.h> diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index c019b7aaf776..c66d163d7a2a 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -666,14 +666,9 @@ void __init orion_xor0_init(unsigned long mapbase_low, orion_xor0_shared_resources[3].start = irq_1; orion_xor0_shared_resources[3].end = irq_1; - /* - * two engines can't do memset simultaneously, this limitation - * satisfied by removing memset support from one of the engines. - */ dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[0].cap_mask); dma_cap_set(DMA_XOR, orion_xor0_channels_data[0].cap_mask); - dma_cap_set(DMA_MEMSET, orion_xor0_channels_data[1].cap_mask); dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[1].cap_mask); dma_cap_set(DMA_XOR, orion_xor0_channels_data[1].cap_mask); @@ -732,14 +727,9 @@ void __init orion_xor1_init(unsigned long mapbase_low, orion_xor1_shared_resources[3].start = irq_1; orion_xor1_shared_resources[3].end = irq_1; - /* - * two engines can't do memset simultaneously, this limitation - * satisfied by removing memset support from one of the engines. - */ dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[0].cap_mask); dma_cap_set(DMA_XOR, orion_xor1_channels_data[0].cap_mask); - dma_cap_set(DMA_MEMSET, orion_xor1_channels_data[1].cap_mask); dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[1].cap_mask); dma_cap_set(DMA_XOR, orion_xor1_channels_data[1].cap_mask); diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 5d5ac0f05422..9d2b2ac74938 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -16,7 +16,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <asm/sched_clock.h> +#include <linux/sched_clock.h> /* * MBus bridge block registers. diff --git a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h index d7e17150028a..7231c8e4975e 100644 --- a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h +++ b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h @@ -285,7 +285,7 @@ static inline int s3c_cpufreq_addfreq(struct cpufreq_frequency_table *table, s3c_freq_dbg("%s: { %d = %u kHz }\n", __func__, index, freq); - table[index].index = index; + table[index].driver_data = index; table[index].frequency = freq; } diff --git a/arch/arm/plat-samsung/samsung-time.c b/arch/arm/plat-samsung/samsung-time.c index f899cbc9b288..2957075ca836 100644 --- a/arch/arm/plat-samsung/samsung-time.c +++ b/arch/arm/plat-samsung/samsung-time.c @@ -15,12 +15,12 @@ #include <linux/clk.h> #include <linux/clockchips.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> #include <asm/smp_twd.h> #include <asm/mach/time.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <asm/sched_clock.h> #include <mach/map.h> #include <plat/devs.h> diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index b178d44e9eaa..2677bc3762d7 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S @@ -11,8 +11,6 @@ #include <linux/linkage.h> #include <linux/init.h> - __INIT - /* * Realview/Versatile Express specific entry point for secondary CPUs. * This provides a "holding pen" into which all secondary cores are held diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index b33b74c87232..51b109e3b6c3 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -20,8 +20,8 @@ */ #include <linux/kernel.h> #include <linux/io.h> +#include <linux/sched_clock.h> -#include <asm/sched_clock.h> #include <plat/sched_clock.h> static void __iomem *ctr; diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 13609e01f4b7..f71c37edca26 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -314,4 +314,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op); EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 199cb2da7663..d1cf7b7c2200 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -88,6 +88,7 @@ HYPERCALL2(hvm_op); HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); +HYPERCALL1(tmem_op); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..4143d9b0d87a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS @@ -111,6 +112,11 @@ config ARCH_VEXPRESS This enables support for the ARMv8 software model (Versatile Express). +config ARCH_XGENE + bool "AppliedMicro X-Gene SOC Family" + help + This enables support for AppliedMicro X-Gene SOC Family + endmenu menu "Bus support" @@ -148,6 +154,8 @@ config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 depends on SMP + # These have to remain sorted largest to smallest + default "8" if ARCH_XGENE default "4" source kernel/Kconfig.preempt @@ -180,8 +188,35 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y if !ARM64_64K_PAGES + +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + source "mm/Kconfig" +config XEN_DOM0 + def_bool y + depends on XEN + +config XEN + bool "Xen guest support on ARM64 (EXPERIMENTAL)" + depends on ARM64 && OF + help + Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. + +config FORCE_MAX_ZONEORDER + int + default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "11" + endmenu menu "Boot options" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..d90cf79f233a 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,6 +37,8 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_KVM) += arch/arm64/kvm/ +core-$(CONFIG_XEN) += arch/arm64/xen/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) @@ -60,6 +62,10 @@ zinstall install: vmlinux dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts dtbs +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 68457e9e0975..c52bdb051f66 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,4 +1,5 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb +dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb targets += dtbs targets += $(dtb-y) diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts new file mode 100644 index 000000000000..1247ca1200b1 --- /dev/null +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -0,0 +1,26 @@ +/* + * dts file for AppliedMicro (APM) Mustang Board + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/dts-v1/; + +/include/ "apm-storm.dtsi" + +/ { + model = "APM X-Gene Mustang board"; + compatible = "apm,mustang", "apm,xgene-storm"; + + chosen { }; + + memory { + device_type = "memory"; + reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ + }; +}; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi new file mode 100644 index 000000000000..bfdc57834929 --- /dev/null +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -0,0 +1,116 @@ +/* + * dts file for AppliedMicro (APM) X-Gene Storm SOC + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/ { + compatible = "apm,xgene-storm"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@000 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x000>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@001 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x001>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@100 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x100>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@101 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x101>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@200 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x200>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@201 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x201>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@300 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x300>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@301 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x301>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + }; + + gic: interrupt-controller@78010000 { + compatible = "arm,cortex-a15-gic"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0x0 0x78010000 0x0 0x1000>, /* GIC Dist */ + <0x0 0x78020000 0x0 0x1000>, /* GIC CPU */ + <0x0 0x78040000 0x0 0x2000>, /* GIC VCPU Control */ + <0x0 0x78060000 0x0 0x2000>; /* GIC VCPU */ + interrupts = <1 9 0xf04>; /* GIC Maintenence IRQ */ + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ + <1 13 0xff01>, /* Non-secure Phys IRQ */ + <1 14 0xff01>, /* Virt IRQ */ + <1 15 0xff01>; /* Hyp IRQ */ + clock-frequency = <50000000>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + serial0: serial@1c020000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0 0x1c020000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4c 0x4>; + }; + }; +}; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 8d9696adb440..5b3e83217b03 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -24,6 +24,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_XGENE=y CONFIG_SMP=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_CMDLINE="console=ttyAMA0" @@ -54,6 +55,9 @@ CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index bf6ab242f047..d56ed11ba9a3 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -110,16 +110,6 @@ static inline void __cpuinit arch_counter_set_user_access(void) asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); - - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3300cbd18a89..fea9ee327206 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -123,9 +123,6 @@ static inline void __flush_icache_all(void) #define flush_dcache_mmap_unlock(mapping) \ spin_unlock_irq(&(mapping)->tree_lock) -#define flush_icache_user_range(vma,page,addr,len) \ - flush_dcache_page(page) - /* * We don't appear to need to do anything here. In fact, if we did, we'd * duplicate cache flushing elsewhere performed by flush_dcache_page(). diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf2749488cd4..5fe138e0b828 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -37,11 +37,14 @@ }) #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 #define ARM_CPU_PART_CORTEX_A57 0xD070 +#define APM_CPU_PART_POTENZA 0x0000 + #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7eaa0b302493..ef8235c68c09 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif +#ifdef CONFIG_COMPAT +int aarch32_break_handler(struct pt_regs *regs); +#else +static int aarch32_break_handler(struct pt_regs *regs) +{ + return -EFAULT; +} +#endif + #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 0d8453c755a8..cf98b362094b 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -18,6 +18,9 @@ struct dev_archdata { struct dma_map_ops *dma_ops; +#ifdef CONFIG_IOMMU_API + void *iommu; /* private IOMMU data */ +#endif }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 994776894198..8d1810001aef 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -81,8 +81,12 @@ static inline void dma_mark_clean(void *addr, size_t size) { } -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *vaddr; @@ -90,13 +94,14 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size, if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr)) return vaddr; - vaddr = ops->alloc(dev, size, dma_handle, flags, NULL); + vaddr = ops->alloc(dev, size, dma_handle, flags, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr); return vaddr; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dev_addr) +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t dev_addr, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -104,7 +109,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, dev_addr); - ops->free(dev, size, vaddr, dev_addr, NULL); + ops->free(dev, size, vaddr, dev_addr, attrs); } /* diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h @@ -0,0 +1,117 @@ +/* + * arch/arm64/include/asm/hugetlb.h + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include <asm-generic/hugetlb.h> +#include <asm/page.h> + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h new file mode 100644 index 000000000000..d2c79049ff11 --- /dev/null +++ b/arch/arm64/include/asm/hypervisor.h @@ -0,0 +1,6 @@ +#ifndef _ASM_ARM64_HYPERVISOR_H +#define _ASM_ARM64_HYPERVISOR_H + +#include <asm/xen/hypervisor.h> + +#endif diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 2e12258aa7e4..1d12f89140ba 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -228,10 +228,12 @@ extern void __iounmap(volatile void __iomem *addr); #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) +#define ioremap_cached(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL)) #define iounmap __iounmap #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h new file mode 100644 index 000000000000..a5f28e2720c7 --- /dev/null +++ b/arch/arm64/include/asm/kvm_arm.h @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_ARM_H__ +#define __ARM64_KVM_ARM_H__ + +#include <asm/types.h> + +/* Hyp Configuration Register (HCR) bits */ +#define HCR_ID (UL(1) << 33) +#define HCR_CD (UL(1) << 32) +#define HCR_RW_SHIFT 31 +#define HCR_RW (UL(1) << HCR_RW_SHIFT) +#define HCR_TRVM (UL(1) << 30) +#define HCR_HCD (UL(1) << 29) +#define HCR_TDZ (UL(1) << 28) +#define HCR_TGE (UL(1) << 27) +#define HCR_TVM (UL(1) << 26) +#define HCR_TTLB (UL(1) << 25) +#define HCR_TPU (UL(1) << 24) +#define HCR_TPC (UL(1) << 23) +#define HCR_TSW (UL(1) << 22) +#define HCR_TAC (UL(1) << 21) +#define HCR_TIDCP (UL(1) << 20) +#define HCR_TSC (UL(1) << 19) +#define HCR_TID3 (UL(1) << 18) +#define HCR_TID2 (UL(1) << 17) +#define HCR_TID1 (UL(1) << 16) +#define HCR_TID0 (UL(1) << 15) +#define HCR_TWE (UL(1) << 14) +#define HCR_TWI (UL(1) << 13) +#define HCR_DC (UL(1) << 12) +#define HCR_BSU (3 << 10) +#define HCR_BSU_IS (UL(1) << 10) +#define HCR_FB (UL(1) << 9) +#define HCR_VA (UL(1) << 8) +#define HCR_VI (UL(1) << 7) +#define HCR_VF (UL(1) << 6) +#define HCR_AMO (UL(1) << 5) +#define HCR_IMO (UL(1) << 4) +#define HCR_FMO (UL(1) << 3) +#define HCR_PTW (UL(1) << 2) +#define HCR_SWIO (UL(1) << 1) +#define HCR_VM (UL(1) << 0) + +/* + * The bits we set in HCR: + * RW: 64bit by default, can be overriden for 32bit VMs + * TAC: Trap ACTLR + * TSC: Trap SMC + * TSW: Trap cache operations by set/way + * TWI: Trap WFI + * TIDCP: Trap L2CTLR/L2ECTLR + * BSU_IS: Upgrade barriers to the inner shareable domain + * FB: Force broadcast of all maintainance operations + * AMO: Override CPSR.A and enable signaling with VA + * IMO: Override CPSR.I and enable signaling with VI + * FMO: Override CPSR.F and enable signaling with VF + * SWIO: Turn set/way invalidates into set/way clean+invalidate + */ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ + HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ + HCR_SWIO | HCR_TIDCP | HCR_RW) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) + +/* Hyp System Control Register (SCTLR_EL2) bits */ +#define SCTLR_EL2_EE (1 << 25) +#define SCTLR_EL2_WXN (1 << 19) +#define SCTLR_EL2_I (1 << 12) +#define SCTLR_EL2_SA (1 << 3) +#define SCTLR_EL2_C (1 << 2) +#define SCTLR_EL2_A (1 << 1) +#define SCTLR_EL2_M 1 +#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ + SCTLR_EL2_SA | SCTLR_EL2_I) + +/* TCR_EL2 Registers bits */ +#define TCR_EL2_TBI (1 << 20) +#define TCR_EL2_PS (7 << 16) +#define TCR_EL2_PS_40B (2 << 16) +#define TCR_EL2_TG0 (1 << 14) +#define TCR_EL2_SH0 (3 << 12) +#define TCR_EL2_ORGN0 (3 << 10) +#define TCR_EL2_IRGN0 (3 << 8) +#define TCR_EL2_T0SZ 0x3f +#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ + TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) + +#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) + +/* VTCR_EL2 Registers bits */ +#define VTCR_EL2_PS_MASK (7 << 16) +#define VTCR_EL2_PS_40B (2 << 16) +#define VTCR_EL2_TG0_MASK (1 << 14) +#define VTCR_EL2_TG0_4K (0 << 14) +#define VTCR_EL2_TG0_64K (1 << 14) +#define VTCR_EL2_SH0_MASK (3 << 12) +#define VTCR_EL2_SH0_INNER (3 << 12) +#define VTCR_EL2_ORGN0_MASK (3 << 10) +#define VTCR_EL2_ORGN0_WBWA (1 << 10) +#define VTCR_EL2_IRGN0_MASK (3 << 8) +#define VTCR_EL2_IRGN0_WBWA (1 << 8) +#define VTCR_EL2_SL0_MASK (3 << 6) +#define VTCR_EL2_SL0_LVL1 (1 << 6) +#define VTCR_EL2_T0SZ_MASK 0x3f +#define VTCR_EL2_T0SZ_40B 24 + +#ifdef CONFIG_ARM64_64K_PAGES +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 64kB pages (TG0 = 1) + * 2 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \ + VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ + VTCR_EL2_T0SZ_40B) +#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) +#else +/* + * Stage2 translation configuration: + * 40bits output (PS = 2) + * 40bits input (T0SZ = 24) + * 4kB pages (TG0 = 0) + * 3 level page tables (SL = 1) + */ +#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \ + VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ + VTCR_EL2_T0SZ_40B) +#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) +#endif + +#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) +#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (48LLU) +#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) + +/* Hyp System Trap Register */ +#define HSTR_EL2_TTEE (1 << 16) +#define HSTR_EL2_T(x) (1 << x) + +/* Hyp Coprocessor Trap Register */ +#define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TTA (1 << 20) +#define CPTR_EL2_TFP (1 << 10) + +/* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_TDRA (1 << 11) +#define MDCR_EL2_TDOSA (1 << 10) +#define MDCR_EL2_TDA (1 << 9) +#define MDCR_EL2_TDE (1 << 8) +#define MDCR_EL2_HPME (1 << 7) +#define MDCR_EL2_TPM (1 << 6) +#define MDCR_EL2_TPMCR (1 << 5) +#define MDCR_EL2_HPMN_MASK (0x1F) + +/* Exception Syndrome Register (ESR) bits */ +#define ESR_EL2_EC_SHIFT (26) +#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT) +#define ESR_EL2_IL (1U << 25) +#define ESR_EL2_ISS (ESR_EL2_IL - 1) +#define ESR_EL2_ISV_SHIFT (24) +#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT) +#define ESR_EL2_SAS_SHIFT (22) +#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT) +#define ESR_EL2_SSE (1 << 21) +#define ESR_EL2_SRT_SHIFT (16) +#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) +#define ESR_EL2_SF (1 << 15) +#define ESR_EL2_AR (1 << 14) +#define ESR_EL2_EA (1 << 9) +#define ESR_EL2_CM (1 << 8) +#define ESR_EL2_S1PTW (1 << 7) +#define ESR_EL2_WNR (1 << 6) +#define ESR_EL2_FSC (0x3f) +#define ESR_EL2_FSC_TYPE (0x3c) + +#define ESR_EL2_CV_SHIFT (24) +#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT) +#define ESR_EL2_COND_SHIFT (20) +#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT) + + +#define FSC_FAULT (0x04) +#define FSC_PERM (0x0c) + +/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ +#define HPFAR_MASK (~0xFUL) + +#define ESR_EL2_EC_UNKNOWN (0x00) +#define ESR_EL2_EC_WFI (0x01) +#define ESR_EL2_EC_CP15_32 (0x03) +#define ESR_EL2_EC_CP15_64 (0x04) +#define ESR_EL2_EC_CP14_MR (0x05) +#define ESR_EL2_EC_CP14_LS (0x06) +#define ESR_EL2_EC_FP_ASIMD (0x07) +#define ESR_EL2_EC_CP10_ID (0x08) +#define ESR_EL2_EC_CP14_64 (0x0C) +#define ESR_EL2_EC_ILL_ISS (0x0E) +#define ESR_EL2_EC_SVC32 (0x11) +#define ESR_EL2_EC_HVC32 (0x12) +#define ESR_EL2_EC_SMC32 (0x13) +#define ESR_EL2_EC_SVC64 (0x15) +#define ESR_EL2_EC_HVC64 (0x16) +#define ESR_EL2_EC_SMC64 (0x17) +#define ESR_EL2_EC_SYS64 (0x18) +#define ESR_EL2_EC_IABT (0x20) +#define ESR_EL2_EC_IABT_HYP (0x21) +#define ESR_EL2_EC_PC_ALIGN (0x22) +#define ESR_EL2_EC_DABT (0x24) +#define ESR_EL2_EC_DABT_HYP (0x25) +#define ESR_EL2_EC_SP_ALIGN (0x26) +#define ESR_EL2_EC_FP_EXC32 (0x28) +#define ESR_EL2_EC_FP_EXC64 (0x2C) +#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_BREAKPT (0x30) +#define ESR_EL2_EC_BREAKPT_HYP (0x31) +#define ESR_EL2_EC_SOFTSTP (0x32) +#define ESR_EL2_EC_SOFTSTP_HYP (0x33) +#define ESR_EL2_EC_WATCHPT (0x34) +#define ESR_EL2_EC_WATCHPT_HYP (0x35) +#define ESR_EL2_EC_BKPT32 (0x38) +#define ESR_EL2_EC_VECTOR32 (0x3A) +#define ESR_EL2_EC_BRK64 (0x3C) + +#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 + +#endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h new file mode 100644 index 000000000000..c92de4163eba --- /dev/null +++ b/arch/arm64/include/asm/kvm_asm.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +/* + * 0 is reserved as an invalid value. + * Order *must* be kept in sync with the hyp switch code. + */ +#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ +#define CSSELR_EL1 2 /* Cache Size Selection Register */ +#define SCTLR_EL1 3 /* System Control Register */ +#define ACTLR_EL1 4 /* Auxilliary Control Register */ +#define CPACR_EL1 5 /* Coprocessor Access Control */ +#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ +#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ +#define TCR_EL1 8 /* Translation Control Register */ +#define ESR_EL1 9 /* Exception Syndrome Register */ +#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ +#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ +#define FAR_EL1 12 /* Fault Address Register */ +#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ +#define VBAR_EL1 14 /* Vector Base Address Register */ +#define CONTEXTIDR_EL1 15 /* Context ID Register */ +#define TPIDR_EL0 16 /* Thread ID, User R/W */ +#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ +#define TPIDR_EL1 18 /* Thread ID, Privileged */ +#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ +#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ +/* 32bit specific registers. Keep them at the end of the range */ +#define DACR32_EL2 21 /* Domain Access Control Register */ +#define IFSR32_EL2 22 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */ +#define TEECR32_EL1 25 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 27 + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ +#define NR_CP15_REGS (NR_SYS_REGS * 2) + +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_TRAP 1 + +#ifndef __ASSEMBLY__ +struct kvm; +struct kvm_vcpu; + +extern char __kvm_hyp_init[]; +extern char __kvm_hyp_init_end[]; + +extern char __kvm_hyp_vector[]; + +extern char __kvm_hyp_code_start[]; +extern char __kvm_hyp_code_end[]; + +extern void __kvm_flush_vm_context(void); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); + +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +#endif + +#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..9a59301cd014 --- /dev/null +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/asm/kvm_coproc.h + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_COPROC_H__ +#define __ARM64_KVM_COPROC_H__ + +#include <linux/kvm_host.h> + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +struct kvm_sys_reg_table { + const struct sys_reg_desc *table; + size_t num; +}; + +struct kvm_sys_reg_target_table { + struct kvm_sys_reg_table table64; + struct kvm_sys_reg_table table32; +}; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table); + +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#define kvm_coproc_table_init kvm_sys_reg_table_init +void kvm_sys_reg_table_init(void); + +struct kvm_one_reg; +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..eec073875218 --- /dev/null +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/kvm_emulate.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_EMULATE_H__ +#define __ARM64_KVM_EMULATE_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmio.h> +#include <asm/ptrace.h> + +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); + +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); + +void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); + +static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; +} + +static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; +} + +static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) +{ + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; +} + +static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) +{ + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); +} + +static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return kvm_condition_valid32(vcpu); + + return true; +} + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + if (vcpu_mode_is_32bit(vcpu)) + kvm_skip_instr32(vcpu, is_wide_instr); + else + *vcpu_pc(vcpu) += 4; +} + +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; +} + +static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_reg32(vcpu, reg_num); + + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +} + +/* Get vcpu SPSR for current mode */ +static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_spsr32(vcpu); + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +} + +static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + + if (vcpu_mode_is_32bit(vcpu)) + return mode > COMPAT_PSR_MODE_USR; + + return mode != PSR_MODE_EL0t; +} + +static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.esr_el2; +} + +static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.far_el2; +} + +static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) +{ + return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; +} + +static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); +} + +static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR); +} + +static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE); +} + +static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT; +} + +static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA); +} + +static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW); +} + +static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +{ + return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT); +} + +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL); +} + +static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT; +} + +static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; +} + +static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; +} + +#endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h new file mode 100644 index 000000000000..644d73956864 --- /dev/null +++ b/arch/arm64/include/asm/kvm_host.h @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/asm/kvm_host.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_HOST_H__ +#define __ARM64_KVM_HOST_H__ + +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmio.h> + +#define KVM_MAX_VCPUS 4 +#define KVM_USER_MEM_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#include <kvm/arm_vgic.h> +#include <kvm/arm_arch_timer.h> + +#define KVM_VCPU_MAX_FEATURES 2 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) + +struct kvm_vcpu; +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +int kvm_arch_dev_ioctl_check_extension(long ext); + +struct kvm_arch { + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* 1-level 2nd stage table and lock */ + spinlock_t pgd_lock; + pgd_t *pgd; + + /* VTTBR value associated with above pgd and vmid */ + u64 vttbr; + + /* Interrupt controller */ + struct vgic_dist vgic; + + /* Timer */ + struct arch_timer_kvm timer; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_fault_info { + u32 esr_el2; /* Hyp Syndrom Register */ + u64 far_el2; /* Hyp Fault Address Register */ + u64 hpfar_el2; /* Hyp IPA Fault Address Register */ +}; + +struct kvm_cpu_context { + struct kvm_regs gp_regs; + union { + u64 sys_regs[NR_SYS_REGS]; + u32 cp15[NR_CP15_REGS]; + }; +}; + +typedef struct kvm_cpu_context kvm_cpu_context_t; + +struct kvm_vcpu_arch { + struct kvm_cpu_context ctxt; + + /* HYP configuration */ + u64 hcr_el2; + + /* Exception Information */ + struct kvm_vcpu_fault_info fault; + + /* Pointer to host CPU context */ + kvm_cpu_context_t *host_cpu_context; + + /* VGIC state */ + struct vgic_cpu vgic_cpu; + struct arch_timer_cpu timer_cpu; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + /* dcache set/way operation pending */ + int last_pcpu; + cpumask_t require_dcache_flush; + + /* Don't run the guest */ + bool pause; + + /* IO related fields */ + struct kvm_decode mmio_decode; + + /* Interrupt related fields */ + u64 irq_lines; /* IRQ and FIQ levels */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + + /* Target CPU and feature flags */ + u32 target; + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* Detect first run of a vcpu */ + bool has_run_once; +}; + +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)]) + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +#define KVM_ARCH_WANT_MMU_NOTIFIER +struct kvm; +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + +/* We do not have shadow page tables, hence the empty hooks */ +static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return 0; +} + +struct kvm_vcpu *kvm_arm_get_running_vcpu(void); +struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); + +u64 kvm_call_hyp(void *hypfn, ...); + +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); + +int kvm_perf_init(void); +int kvm_perf_teardown(void); + +static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t pgd_ptr, + unsigned long hyp_stack_ptr, + unsigned long vector_ptr) +{ + /* + * Call initialization code, and switch to the full blown + * HYP code. + */ + kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); +} + +#endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h new file mode 100644 index 000000000000..fc2f689c0694 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_MMIO_H__ +#define __ARM64_KVM_MMIO_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> + +/* + * This is annoying. The mmio code requires this, even if we don't + * need any decoding. To be fixed. + */ +struct kvm_decode { + unsigned long rt; + bool sign_extend; +}; + +/* + * The in-kernel MMIO emulation code wants to use a copy of run->mmio, + * which is an anonymous type. Use our own type instead. + */ +struct kvm_exit_mmio { + phys_addr_t phys_addr; + u8 data[8]; + u32 len; + bool is_write; +}; + +static inline void kvm_prepare_mmio(struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + run->mmio.phys_addr = mmio->phys_addr; + run->mmio.len = mmio->len; + run->mmio.is_write = mmio->is_write; + memcpy(run->mmio.data, mmio->data, mmio->len); + run->exit_reason = KVM_EXIT_MMIO; +} + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + +#endif /* __ARM64_KVM_MMIO_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h new file mode 100644 index 000000000000..efe609c6a3c9 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_MMU_H__ +#define __ARM64_KVM_MMU_H__ + +#include <asm/page.h> +#include <asm/memory.h> + +/* + * As we only have the TTBR0_EL2 register, we cannot express + * "negative" addresses. This makes it impossible to directly share + * mappings with the kernel. + * + * Instead, give the HYP mode its own VA region at a fixed offset from + * the kernel by just masking the top bits (which are all ones for a + * kernel address). + */ +#define HYP_PAGE_OFFSET_SHIFT VA_BITS +#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) +#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK) + +/* + * Our virtual mapping for the idmap-ed MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the last + * possible page, where no kernel mapping will ever exist. + */ +#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) + +#ifdef __ASSEMBLY__ + +/* + * Convert a kernel VA into a HYP VA. + * reg: VA to be converted. + */ +.macro kern_hyp_va reg + and \reg, \reg, #HYP_PAGE_OFFSET_MASK +.endm + +#else + +#include <asm/cachetype.h> +#include <asm/cacheflush.h> + +#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) + +/* + * Align KVM with the kernel's view of physical memory. Should be + * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. + */ +#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT +#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) + +/* Make sure we get the right size, and thus the right alignment */ +#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT)) +#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) + +int create_hyp_mappings(void *from, void *to); +int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +void free_boot_hyp_pgd(void); +void free_hyp_pgds(void); + +int kvm_alloc_stage2_pgd(struct kvm *kvm); +void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size); + +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); + +phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); +int kvm_mmu_init(void); +void kvm_clear_hyp_idmap(void); + +#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) + +static inline bool kvm_is_write_fault(unsigned long esr) +{ + unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; + + if (esr_ec == ESR_EL2_EC_IABT) + return false; + + if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) + return false; + + return true; +} + +static inline void kvm_clean_dcache_area(void *addr, size_t size) {} +static inline void kvm_clean_pgd(pgd_t *pgd) {} +static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} +static inline void kvm_clean_pte(pte_t *pte) {} +static inline void kvm_clean_pte_entry(pte_t *pte) {} + +static inline void kvm_set_s2pte_writable(pte_t *pte) +{ + pte_val(*pte) |= PTE_S2_RDWR; +} + +struct kvm; + +static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +{ + if (!icache_is_aliasing()) { /* PIPT */ + unsigned long hva = gfn_to_hva(kvm, gfn); + flush_icache_range(hva, hva + PAGE_SIZE); + } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + /* any kind of VIPT cache */ + __flush_icache_all(); + } +} + +#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) + +#endif /* __ASSEMBLY__ */ +#endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h new file mode 100644 index 000000000000..e301a4816355 --- /dev/null +++ b/arch/arm64/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_PSCI_H__ +#define __ARM64_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 381f556b664e..20925bcf4e2a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -90,6 +90,12 @@ #define MT_NORMAL_NC 3 #define MT_NORMAL 4 +/* + * Memory types for Stage-2 translation + */ +#define MT_S2_NORMAL 0xf +#define MT_S2_DEVICE_nGnRE 0x1 + #ifndef __ASSEMBLY__ extern phys_addr_t memstart_addr; diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index e2bc385adb6b..a9eee33dfa62 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,12 +151,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); -#ifdef CONFIG_SMP - /* check for possible thread migration */ - if (!cpumask_empty(mm_cpumask(next)) && - !cpumask_test_cpu(cpu, mm_cpumask(next))) - __flush_icache_all(); -#endif if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d289b9..e182a356c979 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -25,16 +25,27 @@ /* * Hardware page table definitions. * + * Level 1 descriptor (PUD). + */ + +#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) + +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) /* * Section */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) @@ -53,6 +64,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ @@ -68,6 +80,24 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* + * 2nd stage PTE definitions + */ +#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ +#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ + +/* + * Memory Attribute override for Stage-2 (MemAttr[3:0]) + */ +#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) +#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2) + +/* + * EL2/HYP PTE/PMD definitions + */ +#define PMD_HYP PMD_SECT_USER +#define PTE_HYP PTE_USER + +/* * 40-bit physical address supported. */ #define PHYS_MASK_SHIFT (40) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3a768e96cf0e..f0bebc5e22cd 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,8 +25,8 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ +#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) @@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) @@ -76,7 +76,13 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) +#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) + +#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) + +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) @@ -119,7 +125,7 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -173,12 +179,76 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Huge pte definitions. */ -#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) -#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) +#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +/* + * Hugetlb definitions. + */ +#define HUGE_MAX_HSTATE 2 +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define __HAVE_ARCH_PTE_SPECIAL /* + * Software PMD bits for THP + */ + +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) + +/* + * THP definitions. + */ +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); +PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | + PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | + PMD_SECT_VALID; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + +/* * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ @@ -197,6 +267,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) +#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ + PMD_TYPE_TABLE) +#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ + PMD_TYPE_SECT) + + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; @@ -263,7 +339,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #endif /* Find an entry in the third-level page table.. */ -#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -281,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-8: swap type + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-8: swap type * bits 9-63: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 4 #define __SWP_TYPE_BITS 6 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -306,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-63: file offset / PAGE_SIZE + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-63: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 4) +#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) -#define PTE_FILE_MAX_BITS 61 +#define PTE_FILE_MAX_BITS 60 extern int kern_addr_valid(unsigned long addr); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..0dacbbf9458b 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -extern int aarch32_break_trap(struct pt_regs *regs); - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 7065e920149d..0defa0728a9b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -59,9 +59,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned int tmp; asm volatile( - " ldaxr %w0, %1\n" + "2: ldaxr %w0, %1\n" " cbnz %w0, 1f\n" " stxr %w0, %w2, %1\n" + " cbnz %w0, 2b\n" "1:\n" : "=&r" (tmp), "+Q" (lock->lock) : "r" (1) diff --git a/arch/arm64/include/asm/sync_bitops.h b/arch/arm64/include/asm/sync_bitops.h new file mode 100644 index 000000000000..8da0bf4f7659 --- /dev/null +++ b/arch/arm64/include/asm/sync_bitops.h @@ -0,0 +1,26 @@ +#ifndef __ASM_SYNC_BITOPS_H__ +#define __ASM_SYNC_BITOPS_H__ + +#include <asm/bitops.h> +#include <asm/cmpxchg.h> + +/* sync_bitops functions are equivalent to the SMP implementation of the + * original functions, independently from CONFIG_SMP being defined. + * + * We need them because _set_bit etc are not SMP safe if !CONFIG_SMP. But + * under Xen you might be communicating with a completely external entity + * who might be on another CPU (e.g. two uniprocessor guests communicating + * via event channels and grant tables). So we need a variant of the bit + * ops which are SMP safe even on a UP kernel. + */ + +#define sync_set_bit(nr, p) set_bit(nr, p) +#define sync_clear_bit(nr, p) clear_bit(nr, p) +#define sync_change_bit(nr, p) change_bit(nr, p) +#define sync_test_and_set_bit(nr, p) test_and_set_bit(nr, p) +#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p) +#define sync_test_and_change_bit(nr, p) test_and_change_bit(nr, p) +#define sync_test_bit(nr, addr) test_bit(nr, addr) +#define sync_cmpxchg cmpxchg + +#endif diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index b24a31a7e2c9..81a076eb37fa 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -16,14 +16,14 @@ #ifndef __ASM_TIMEX_H #define __ASM_TIMEX_H +#include <asm/arch_timer.h> + /* * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() ({ cycles_t c; read_current_timer(&c); c; }) +#define get_cycles() arch_counter_get_cntvct() #include <asm-generic/timex.h> -#define ARCH_HAS_READ_CURRENT_TIMER - #endif diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..46b3beb4b773 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #define tlb_migrate_finish(mm) do { } while (0) +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, dsb(); } +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif #endif diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h new file mode 100644 index 000000000000..86553213c132 --- /dev/null +++ b/arch/arm64/include/asm/xen/events.h @@ -0,0 +1,21 @@ +#ifndef _ASM_ARM64_XEN_EVENTS_H +#define _ASM_ARM64_XEN_EVENTS_H + +#include <asm/ptrace.h> +#include <asm/atomic.h> + +enum ipi_vector { + XEN_PLACEHOLDER_VECTOR, + + /* Xen IPIs go here */ + XEN_NR_IPIS, +}; + +static inline int xen_irqs_disabled(struct pt_regs *regs) +{ + return raw_irqs_disabled_flags((unsigned long) regs->pstate); +} + +#define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) + +#endif /* _ASM_ARM64_XEN_EVENTS_H */ diff --git a/arch/arm64/include/asm/xen/hypercall.h b/arch/arm64/include/asm/xen/hypercall.h new file mode 100644 index 000000000000..74b0c423ff5b --- /dev/null +++ b/arch/arm64/include/asm/xen/hypercall.h @@ -0,0 +1 @@ +#include <../../arm/include/asm/xen/hypercall.h> diff --git a/arch/arm64/include/asm/xen/hypervisor.h b/arch/arm64/include/asm/xen/hypervisor.h new file mode 100644 index 000000000000..f263da8e8769 --- /dev/null +++ b/arch/arm64/include/asm/xen/hypervisor.h @@ -0,0 +1 @@ +#include <../../arm/include/asm/xen/hypervisor.h> diff --git a/arch/arm64/include/asm/xen/interface.h b/arch/arm64/include/asm/xen/interface.h new file mode 100644 index 000000000000..44457aebeed4 --- /dev/null +++ b/arch/arm64/include/asm/xen/interface.h @@ -0,0 +1 @@ +#include <../../arm/include/asm/xen/interface.h> diff --git a/arch/arm64/include/asm/xen/page.h b/arch/arm64/include/asm/xen/page.h new file mode 100644 index 000000000000..bed87ec36780 --- /dev/null +++ b/arch/arm64/include/asm/xen/page.h @@ -0,0 +1 @@ +#include <../../arm/include/asm/xen/page.h> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..5031f4263937 --- /dev/null +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/include/uapi/asm/kvm.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#define KVM_SPSR_EL1 0 +#define KVM_SPSR_SVC KVM_SPSR_EL1 +#define KVM_SPSR_ABT 1 +#define KVM_SPSR_UND 2 +#define KVM_SPSR_IRQ 3 +#define KVM_SPSR_FIQ 4 +#define KVM_NR_SPSR 5 + +#ifndef __ASSEMBLY__ +#include <asm/types.h> +#include <asm/ptrace.h> + +#define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +struct kvm_regs { + struct user_pt_regs regs; /* sp = sp_el0 */ + + __u64 sp_el1; + __u64 elr_el1; + + __u64 spsr[KVM_NR_SPSR]; + + struct user_fpsimd_state fp_regs; +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_AEM_V8 0 +#define KVM_ARM_TARGET_FOUNDATION_V8 1 +#define KVM_ARM_TARGET_CORTEX_A57 2 + +#define KVM_ARM_NUM_TARGETS 3 + +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + +#define KVM_VGIC_V2_DIST_SIZE 0x1000 +#define KVM_VGIC_V2_CPU_SIZE 0x2000 + +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(__u32)) + +/* Some registers need more space to represent values. */ +#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00 +#define KVM_REG_ARM_DEMUX_ID_SHIFT 8 +#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT) +#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF +#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0 + +/* AArch64 system registers */ +#define KVM_REG_ARM64_SYSREG (0x0013 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM64_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM64_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI ((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED ((unsigned long)-3) + +#endif + +#endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d810bea3..49c162c03b69 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -104,5 +104,38 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif return 0; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..08018e3df580 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/ptrace.h> #include <linux/stat.h> +#include <linux/uaccess.h> #include <asm/debug-monitors.h> #include <asm/local.h> @@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6e1e77f1831c..fecdbf7de82e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -817,33 +795,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1111,16 +1062,6 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } - -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - asmlinkage int syscall_trace(int dir, struct pt_regs *regs) { unsigned long saved_reg; diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a551f88ae2c1..03dc3718eb13 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void) return arch_timer_read_counter() * sched_clock_mult; } -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_timer_read_counter(); - return 0; -} - void __init time_init(void) { u32 arch_timer_rate; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..7ffadddb645d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include <linux/syscalls.h> #include <asm/atomic.h> +#include <asm/debug-monitors.h> #include <asm/traps.h> #include <asm/stacktrace.h> #include <asm/exception.h> @@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..f5e55747242f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,6 +17,19 @@ ENTRY(stext) jiffies = jiffies_64; +#define HYPERVISOR_TEXT \ + /* \ + * Force the alignment to be compatible with \ + * the vectors requirements \ + */ \ + . = ALIGN(2048); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -49,6 +62,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -56,7 +70,7 @@ SECTIONS } RO_DATA(PAGE_SIZE) - + EXCEPTION_TABLE(8) _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -99,14 +113,6 @@ SECTIONS READ_MOSTLY_DATA(64) /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - - /* * and the usual data section */ DATA_DATA @@ -124,3 +130,9 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } } + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), + "HYP init code too big") diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile new file mode 100644 index 000000000000..72a9fd583ad3 --- /dev/null +++ b/arch/arm64/kvm/Makefile @@ -0,0 +1,23 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +CFLAGS_arm.o := -I. +CFLAGS_mmu.o := -I. + +KVM=../../../virt/kvm +ARM=../../../arch/arm/kvm + +obj-$(CONFIG_KVM_ARM_HOST) += kvm.o + +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o + +kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o +kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o + +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c new file mode 100644 index 000000000000..124418d17049 --- /dev/null +++ b/arch/arm64/kvm/emulate.c @@ -0,0 +1,158 @@ +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + + if (esr & ESR_EL2_CV) + return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT; + + return -1; +} + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); + + BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK)); + + if (!(cpsr & COMPAT_PSR_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~COMPAT_PSR_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c new file mode 100644 index 000000000000..2c3ff67a8ecb --- /dev/null +++ b/arch/arm64/kvm/guest.c @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/guest.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <asm/cputype.h> +#include <asm/uaccess.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* + * Because the kvm_regs structure is a mix of 32, 64 and + * 128bit fields, we index it as if it was a 32bit + * array. Hence below, nr_regs is the number of entries, and + * off the index in the "array". + */ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + u32 off; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; + struct kvm_regs *regs = vcpu_gp_regs(vcpu); + int nr_regs = sizeof(*regs) / sizeof(__u32); + __uint128_t tmp; + void *valp = &tmp; + u64 off; + int err = 0; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= nr_regs || + (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) + return -ENOENT; + + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) { + err = -EFAULT; + goto out; + } + + if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { + u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_USR: + case COMPAT_PSR_MODE_FIQ: + case COMPAT_PSR_MODE_IRQ: + case COMPAT_PSR_MODE_SVC: + case COMPAT_PSR_MODE_ABT: + case COMPAT_PSR_MODE_UND: + case PSR_MODE_EL0t: + case PSR_MODE_EL1t: + case PSR_MODE_EL1h: + break; + default: + err = -EINVAL; + goto out; + } + } + + memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); +out: + return err; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(__u32); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend system regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + + for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + return kvm_arm_sys_reg_get_reg(vcpu, reg); +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + return kvm_arm_sys_reg_set_reg(vcpu, reg); +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + /* Currently handled by the generic backend */ + return KVM_ARM_TARGET_CORTEX_A57; + default: + return -EINVAL; + } +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + vcpu->arch.target = phys_target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (init->features[i / 32] & (1 << (i % 32))) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c new file mode 100644 index 000000000000..9beaca033437 --- /dev/null +++ b/arch/arm64/kvm/handle_exit.c @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/handle_exit.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_psci.h> + +typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); + +static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (kvm_psci_call(vcpu)) + return 1; + + kvm_inject_undefined(vcpu); + return 1; +} + +static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (kvm_psci_call(vcpu)) + return 1; + + kvm_inject_undefined(vcpu); + return 1; +} + +/** + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest + * @vcpu: the vcpu pointer + * + * Simply call kvm_vcpu_block(), which will halt execution of + * world-switches and schedule other host processes until there is an + * incoming IRQ or FIQ to the VM. + */ +static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_vcpu_block(vcpu); + return 1; +} + +static exit_handle_fn arm_exit_handlers[] = { + [ESR_EL2_EC_WFI] = kvm_handle_wfi, + [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, + [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, + [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access, + [ESR_EL2_EC_HVC32] = handle_hvc, + [ESR_EL2_EC_SMC32] = handle_smc, + [ESR_EL2_EC_HVC64] = handle_hvc, + [ESR_EL2_EC_SMC64] = handle_smc, + [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, + [ESR_EL2_EC_IABT] = kvm_handle_guest_abort, + [ESR_EL2_EC_DABT] = kvm_handle_guest_abort, +}; + +static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || + !arm_exit_handlers[hsr_ec]) { + kvm_err("Unkown exception class: hsr: %#08x\n", + (unsigned int)kvm_vcpu_get_hsr(vcpu)); + BUG(); + } + + return arm_exit_handlers[hsr_ec]; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + exit_handle_fn exit_handler; + + switch (exception_index) { + case ARM_EXCEPTION_IRQ: + return 1; + case ARM_EXCEPTION_TRAP: + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + + exit_handler = kvm_get_exit_handler(vcpu); + + return exit_handler(vcpu, run); + default: + kvm_pr_unimpl("Unsupported exception type: %d", + exception_index); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; + } +} diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S new file mode 100644 index 000000000000..ba84e6705e20 --- /dev/null +++ b/arch/arm64/kvm/hyp-init.S @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + + .text + .pushsection .hyp.idmap.text, "ax" + + .align 11 + +ENTRY(__kvm_hyp_init) + ventry __invalid // Synchronous EL2t + ventry __invalid // IRQ EL2t + ventry __invalid // FIQ EL2t + ventry __invalid // Error EL2t + + ventry __invalid // Synchronous EL2h + ventry __invalid // IRQ EL2h + ventry __invalid // FIQ EL2h + ventry __invalid // Error EL2h + + ventry __do_hyp_init // Synchronous 64-bit EL1 + ventry __invalid // IRQ 64-bit EL1 + ventry __invalid // FIQ 64-bit EL1 + ventry __invalid // Error 64-bit EL1 + + ventry __invalid // Synchronous 32-bit EL1 + ventry __invalid // IRQ 32-bit EL1 + ventry __invalid // FIQ 32-bit EL1 + ventry __invalid // Error 32-bit EL1 + +__invalid: + b . + + /* + * x0: HYP boot pgd + * x1: HYP pgd + * x2: HYP stack + * x3: HYP vectors + */ +__do_hyp_init: + + msr ttbr0_el2, x0 + + mrs x4, tcr_el1 + ldr x5, =TCR_EL2_MASK + and x4, x4, x5 + ldr x5, =TCR_EL2_FLAGS + orr x4, x4, x5 + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + msr vtcr_el2, x4 + + mrs x4, mair_el1 + msr mair_el2, x4 + isb + + mov x4, #SCTLR_EL2_FLAGS + msr sctlr_el2, x4 + isb + + /* MMU is now enabled. Get ready for the trampoline dance */ + ldr x4, =TRAMPOLINE_VA + adr x5, target + bfi x4, x5, #0, #PAGE_SHIFT + br x4 + +target: /* We're now in the trampoline code, switch page tables */ + msr ttbr0_el2, x1 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Set the stack and new vectors */ + kern_hyp_va x2 + mov sp, x2 + kern_hyp_va x3 + msr vbar_el2, x3 + + /* Hello, World! */ + eret +ENDPROC(__kvm_hyp_init) + + .ltorg + + .popsection diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S new file mode 100644 index 000000000000..ff985e3d8b72 --- /dev/null +++ b/arch/arm64/kvm/hyp.S @@ -0,0 +1,831 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/irqchip/arm-gic.h> + +#include <asm/assembler.h> +#include <asm/memory.h> +#include <asm/asm-offsets.h> +#include <asm/fpsimdmacros.h> +#include <asm/kvm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) +#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + .align PAGE_SHIFT + +__kvm_hyp_code_start: + .globl __kvm_hyp_code_start + +.macro save_common_regs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_XREG_OFFSET(19) + stp x19, x20, [x3] + stp x21, x22, [x3, #16] + stp x23, x24, [x3, #32] + stp x25, x26, [x3, #48] + stp x27, x28, [x3, #64] + stp x29, lr, [x3, #80] + + mrs x19, sp_el0 + mrs x20, elr_el2 // EL1 PC + mrs x21, spsr_el2 // EL1 pstate + + stp x19, x20, [x3, #96] + str x21, [x3, #112] + + mrs x22, sp_el1 + mrs x23, elr_el1 + mrs x24, spsr_el1 + + str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] +.endm + +.macro restore_common_regs + // x2: base address for cpu context + // x3: tmp register + + ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] + ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] + + msr sp_el1, x22 + msr elr_el1, x23 + msr spsr_el1, x24 + + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 + ldp x19, x20, [x3] + ldr x21, [x3, #16] + + msr sp_el0, x19 + msr elr_el2, x20 // EL1 PC + msr spsr_el2, x21 // EL1 pstate + + add x3, x2, #CPU_XREG_OFFSET(19) + ldp x19, x20, [x3] + ldp x21, x22, [x3, #16] + ldp x23, x24, [x3, #32] + ldp x25, x26, [x3, #48] + ldp x27, x28, [x3, #64] + ldp x29, lr, [x3, #80] +.endm + +.macro save_host_regs + save_common_regs +.endm + +.macro restore_host_regs + restore_common_regs +.endm + +.macro save_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_save x3, 4 +.endm + +.macro restore_fpsimd + // x2: cpu context address + // x3, x4: tmp regs + add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + fpsimd_restore x3, 4 +.endm + +.macro save_guest_regs + // x0 is the vcpu address + // x1 is the return code, do not corrupt! + // x2 is the cpu context + // x3 is a tmp register + // Guest's x0-x3 are on the stack + + // Compute base to save registers + add x3, x2, #CPU_XREG_OFFSET(4) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + str x18, [x3, #112] + + pop x6, x7 // x2, x3 + pop x4, x5 // x0, x1 + + add x3, x2, #CPU_XREG_OFFSET(0) + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + save_common_regs +.endm + +.macro restore_guest_regs + // x0 is the vcpu address. + // x2 is the cpu context + // x3 is a tmp register + + // Prepare x0-x3 for later restore + add x3, x2, #CPU_XREG_OFFSET(0) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + push x4, x5 // Push x0-x3 on the stack + push x6, x7 + + // x4-x18 + ldp x4, x5, [x3, #32] + ldp x6, x7, [x3, #48] + ldp x8, x9, [x3, #64] + ldp x10, x11, [x3, #80] + ldp x12, x13, [x3, #96] + ldp x14, x15, [x3, #112] + ldp x16, x17, [x3, #128] + ldr x18, [x3, #144] + + // x19-x29, lr, sp*, elr*, spsr* + restore_common_regs + + // Last bits of the 64bit state + pop x2, x3 + pop x0, x1 + + // Do not touch any register after this! +.endm + +/* + * Macros to perform system register save/restore. + * + * Ordering here is absolutely critical, and must be kept consistent + * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, + * and in kvm_asm.h. + * + * In other words, don't touch any of these unless you know what + * you are doing. + */ +.macro save_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + mrs x4, vmpidr_el2 + mrs x5, csselr_el1 + mrs x6, sctlr_el1 + mrs x7, actlr_el1 + mrs x8, cpacr_el1 + mrs x9, ttbr0_el1 + mrs x10, ttbr1_el1 + mrs x11, tcr_el1 + mrs x12, esr_el1 + mrs x13, afsr0_el1 + mrs x14, afsr1_el1 + mrs x15, far_el1 + mrs x16, mair_el1 + mrs x17, vbar_el1 + mrs x18, contextidr_el1 + mrs x19, tpidr_el0 + mrs x20, tpidrro_el0 + mrs x21, tpidr_el1 + mrs x22, amair_el1 + mrs x23, cntkctl_el1 + + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + stp x8, x9, [x3, #32] + stp x10, x11, [x3, #48] + stp x12, x13, [x3, #64] + stp x14, x15, [x3, #80] + stp x16, x17, [x3, #96] + stp x18, x19, [x3, #112] + stp x20, x21, [x3, #128] + stp x22, x23, [x3, #144] +.endm + +.macro restore_sysregs + // x2: base address for cpu context + // x3: tmp register + + add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) + + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + ldp x8, x9, [x3, #32] + ldp x10, x11, [x3, #48] + ldp x12, x13, [x3, #64] + ldp x14, x15, [x3, #80] + ldp x16, x17, [x3, #96] + ldp x18, x19, [x3, #112] + ldp x20, x21, [x3, #128] + ldp x22, x23, [x3, #144] + + msr vmpidr_el2, x4 + msr csselr_el1, x5 + msr sctlr_el1, x6 + msr actlr_el1, x7 + msr cpacr_el1, x8 + msr ttbr0_el1, x9 + msr ttbr1_el1, x10 + msr tcr_el1, x11 + msr esr_el1, x12 + msr afsr0_el1, x13 + msr afsr1_el1, x14 + msr far_el1, x15 + msr mair_el1, x16 + msr vbar_el1, x17 + msr contextidr_el1, x18 + msr tpidr_el0, x19 + msr tpidrro_el0, x20 + msr tpidr_el1, x21 + msr amair_el1, x22 + msr cntkctl_el1, x23 +.endm + +.macro skip_32bit_state tmp, target + // Skip 32bit state if not needed + mrs \tmp, hcr_el2 + tbnz \tmp, #HCR_RW_SHIFT, \target +.endm + +.macro skip_tee_state tmp, target + // Skip ThumbEE state if not needed + mrs \tmp, id_pfr0_el1 + tbz \tmp, #12, \target +.endm + +.macro save_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + mrs x4, spsr_abt + mrs x5, spsr_und + mrs x6, spsr_irq + mrs x7, spsr_fiq + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + mrs x4, dacr32_el2 + mrs x5, ifsr32_el2 + mrs x6, fpexc32_el2 + mrs x7, dbgvcr32_el2 + stp x4, x5, [x3] + stp x6, x7, [x3, #16] + + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + mrs x4, teecr32_el1 + mrs x5, teehbr32_el1 + stp x4, x5, [x3] +1: +.endm + +.macro restore_guest_32bit_state + skip_32bit_state x3, 1f + + add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr spsr_abt, x4 + msr spsr_und, x5 + msr spsr_irq, x6 + msr spsr_fiq, x7 + + add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) + ldp x4, x5, [x3] + ldp x6, x7, [x3, #16] + msr dacr32_el2, x4 + msr ifsr32_el2, x5 + msr fpexc32_el2, x6 + msr dbgvcr32_el2, x7 + + skip_tee_state x8, 1f + + add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) + ldp x4, x5, [x3] + msr teecr32_el1, x4 + msr teehbr32_el1, x5 +1: +.endm + +.macro activate_traps + ldr x2, [x0, #VCPU_IRQ_LINES] + ldr x1, [x0, #VCPU_HCR_EL2] + orr x2, x2, x1 + msr hcr_el2, x2 + + ldr x2, =(CPTR_EL2_TTA) + msr cptr_el2, x2 + + ldr x2, =(1 << 15) // Trap CP15 Cr=15 + msr hstr_el2, x2 + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) + msr mdcr_el2, x2 +.endm + +.macro deactivate_traps + mov x2, #HCR_RW + msr hcr_el2, x2 + msr cptr_el2, xzr + msr hstr_el2, xzr + + mrs x2, mdcr_el2 + and x2, x2, #MDCR_EL2_HPMN_MASK + msr mdcr_el2, x2 +.endm + +.macro activate_vm + ldr x1, [x0, #VCPU_KVM] + kern_hyp_va x1 + ldr x2, [x1, #KVM_VTTBR] + msr vttbr_el2, x2 +.endm + +.macro deactivate_vm + msr vttbr_el2, xzr +.endm + +/* + * Save the VGIC CPU state into memory + * x0: Register pointing to VCPU struct + * Do not corrupt x1!!! + */ +.macro save_vgic_state + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr w4, [x2, #GICH_HCR] + ldr w5, [x2, #GICH_VMCR] + ldr w6, [x2, #GICH_MISR] + ldr w7, [x2, #GICH_EISR0] + ldr w8, [x2, #GICH_EISR1] + ldr w9, [x2, #GICH_ELRSR0] + ldr w10, [x2, #GICH_ELRSR1] + ldr w11, [x2, #GICH_APR] + + str w4, [x3, #VGIC_CPU_HCR] + str w5, [x3, #VGIC_CPU_VMCR] + str w6, [x3, #VGIC_CPU_MISR] + str w7, [x3, #VGIC_CPU_EISR] + str w8, [x3, #(VGIC_CPU_EISR + 4)] + str w9, [x3, #VGIC_CPU_ELRSR] + str w10, [x3, #(VGIC_CPU_ELRSR + 4)] + str w11, [x3, #VGIC_CPU_APR] + + /* Clear GICH_HCR */ + str wzr, [x2, #GICH_HCR] + + /* Save list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_CPU_LR +1: ldr w5, [x2], #4 + str w5, [x3], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: +.endm + +/* + * Restore the VGIC CPU state from memory + * x0: Register pointing to VCPU struct + */ +.macro restore_vgic_state + /* Get VGIC VCTRL base into x2 */ + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr x2, [x2, #KVM_VGIC_VCTRL] + kern_hyp_va x2 + cbz x2, 2f // disabled + + /* Compute the address of struct vgic_cpu */ + add x3, x0, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr w4, [x3, #VGIC_CPU_HCR] + ldr w5, [x3, #VGIC_CPU_VMCR] + ldr w6, [x3, #VGIC_CPU_APR] + + str w4, [x2, #GICH_HCR] + str w5, [x2, #GICH_VMCR] + str w6, [x2, #GICH_APR] + + /* Restore list registers */ + add x2, x2, #GICH_LR0 + ldr w4, [x3, #VGIC_CPU_NR_LR] + add x3, x3, #VGIC_CPU_LR +1: ldr w5, [x3], #4 + str w5, [x2], #4 + sub w4, w4, #1 + cbnz w4, 1b +2: +.endm + +.macro save_timer_state + // x0: vcpu pointer + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + mrs x3, cntv_ctl_el0 + and x3, x3, #3 + str w3, [x0, #VCPU_TIMER_CNTV_CTL] + bic x3, x3, #1 // Clear Enable + msr cntv_ctl_el0, x3 + + isb + + mrs x3, cntv_cval_el0 + str x3, [x0, #VCPU_TIMER_CNTV_CVAL] + +1: + // Allow physical timer/counter access for the host + mrs x2, cnthctl_el2 + orr x2, x2, #3 + msr cnthctl_el2, x2 + + // Clear cntvoff for the host + msr cntvoff_el2, xzr +.endm + +.macro restore_timer_state + // x0: vcpu pointer + // Disallow physical timer access for the guest + // Physical counter access is allowed + mrs x2, cnthctl_el2 + orr x2, x2, #1 + bic x2, x2, #2 + msr cnthctl_el2, x2 + + ldr x2, [x0, #VCPU_KVM] + kern_hyp_va x2 + ldr w3, [x2, #KVM_TIMER_ENABLED] + cbz w3, 1f + + ldr x3, [x2, #KVM_TIMER_CNTVOFF] + msr cntvoff_el2, x3 + ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] + msr cntv_cval_el0, x2 + isb + + ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] + and x2, x2, #3 + msr cntv_ctl_el0, x2 +1: +.endm + +__save_sysregs: + save_sysregs + ret + +__restore_sysregs: + restore_sysregs + ret + +__save_fpsimd: + save_fpsimd + ret + +__restore_fpsimd: + restore_fpsimd + ret + +/* + * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); + * + * This is the world switch. The first half of the function + * deals with entering the guest, and anything from __kvm_vcpu_return + * to the end of the function deals with reentering the host. + * On the enter path, only x0 (vcpu pointer) must be preserved until + * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception + * code) must both be preserved until the epilogue. + * In both cases, x2 points to the CPU context we're saving/restoring from/to. + */ +ENTRY(__kvm_vcpu_run) + kern_hyp_va x0 + msr tpidr_el2, x0 // Save the vcpu register + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + save_host_regs + bl __save_fpsimd + bl __save_sysregs + + activate_traps + activate_vm + + restore_vgic_state + restore_timer_state + + // Guest context + add x2, x0, #VCPU_CONTEXT + + bl __restore_sysregs + bl __restore_fpsimd + restore_guest_32bit_state + restore_guest_regs + + // That's it, no more messing around. + eret + +__kvm_vcpu_return: + // Assume x0 is the vcpu pointer, x1 the return code + // Guest's x0-x3 are on the stack + + // Guest context + add x2, x0, #VCPU_CONTEXT + + save_guest_regs + bl __save_fpsimd + bl __save_sysregs + save_guest_32bit_state + + save_timer_state + save_vgic_state + + deactivate_traps + deactivate_vm + + // Host context + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + bl __restore_fpsimd + restore_host_regs + + mov x0, x1 + ret +END(__kvm_vcpu_run) + +// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +ENTRY(__kvm_tlb_flush_vmid_ipa) + kern_hyp_va x0 + ldr x2, [x0, #KVM_VTTBR] + msr vttbr_el2, x2 + isb + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + tlbi ipas2e1is, x1 + dsb sy + tlbi vmalle1is + dsb sy + isb + + msr vttbr_el2, xzr + ret +ENDPROC(__kvm_tlb_flush_vmid_ipa) + +ENTRY(__kvm_flush_vm_context) + tlbi alle1is + ic ialluis + dsb sy + ret +ENDPROC(__kvm_flush_vm_context) + +__kvm_hyp_panic: + // Guess the context by looking at VTTBR: + // If zero, then we're already a host. + // Otherwise restore a minimal host context before panicing. + mrs x0, vttbr_el2 + cbz x0, 1f + + mrs x0, tpidr_el2 + + deactivate_traps + deactivate_vm + + ldr x2, [x0, #VCPU_HOST_CONTEXT] + kern_hyp_va x2 + + bl __restore_sysregs + +1: adr x0, __hyp_panic_str + adr x1, 2f + ldp x2, x3, [x1] + sub x0, x0, x2 + add x0, x0, x3 + mrs x1, spsr_el2 + mrs x2, elr_el2 + mrs x3, esr_el2 + mrs x4, far_el2 + mrs x5, hpfar_el2 + mrs x6, par_el1 + mrs x7, tpidr_el2 + + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret + + .align 3 +2: .quad HYP_PAGE_OFFSET + .quad PAGE_OFFSET +ENDPROC(__kvm_hyp_panic) + +__hyp_panic_str: + .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" + + .align 2 + +ENTRY(kvm_call_hyp) + hvc #0 + ret +ENDPROC(kvm_call_hyp) + +.macro invalid_vector label, target + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid, __kvm_hyp_panic + invalid_vector el2t_irq_invalid, __kvm_hyp_panic + invalid_vector el2t_fiq_invalid, __kvm_hyp_panic + invalid_vector el2t_error_invalid, __kvm_hyp_panic + invalid_vector el2h_sync_invalid, __kvm_hyp_panic + invalid_vector el2h_irq_invalid, __kvm_hyp_panic + invalid_vector el2h_fiq_invalid, __kvm_hyp_panic + invalid_vector el2h_error_invalid, __kvm_hyp_panic + invalid_vector el1_sync_invalid, __kvm_hyp_panic + invalid_vector el1_irq_invalid, __kvm_hyp_panic + invalid_vector el1_fiq_invalid, __kvm_hyp_panic + invalid_vector el1_error_invalid, __kvm_hyp_panic + +el1_sync: // Guest trapped into EL2 + push x0, x1 + push x2, x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_EL2_EC_SHIFT + + cmp x2, #ESR_EL2_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + pop x2, x3 + pop x0, x1 + + push lr, xzr + + /* + * Compute the function address in EL2, and shuffle the parameters. + */ + kern_hyp_va x0 + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + + pop lr, xzr + eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + cmp x2, #ESR_EL2_EC_DABT + mov x0, #ESR_EL2_EC_IABT + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ + and x2, x1, #ESR_EL2_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str x1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __kvm_vcpu_return + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: pop x2, x3 + pop x0, x1 + + eret + +el1_irq: + push x0, x1 + push x2, x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __kvm_vcpu_return + + .ltorg + + .align 11 + +ENTRY(__kvm_hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__kvm_hyp_vector) + +__kvm_hyp_code_end: + .globl __kvm_hyp_code_end + + .popsection diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c new file mode 100644 index 000000000000..81a02a8762b0 --- /dev/null +++ b/arch/arm64/kvm/inject_fault.c @@ -0,0 +1,203 @@ +/* + * Fault injection for both 32 and 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/esr.h> + +#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ + PSR_I_BIT | PSR_D_BIT) +#define EL1_EXCEPT_SYNC_OFFSET 0x200 + +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + u32 return_offset = (is_thumb) ? 4 : 0; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr & (1 << 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr & (1 << 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) + *fsr = 1 << 9 | 0x34; + else + *fsr = 0x14; +} + +static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_aarch32; + u32 esr = 0; + + is_aarch32 = vcpu_mode_is_32bit(vcpu); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + vcpu_sys_reg(vcpu, FAR_EL1) = addr; + + /* + * Build an {i,d}abort, depending on the level and the + * instruction set. Report an external synchronous abort. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + /* + * Here, the guest runs in AArch64 mode when in EL1. If we get + * an AArch32 fault, it means we managed to trap an EL0 fault. + */ + if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT); + else + esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT); + + if (!is_iabt) + esr |= ESR_EL1_EC_DABT_EL0; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT; +} + +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT); + + *vcpu_spsr(vcpu) = cpsr; + *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; + + /* + * Build an unknown exception, depending on the instruction + * set. + */ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= ESR_EL1_IL; + + vcpu_sys_reg(vcpu, ESR_EL1) = esr; +} + +/** + * kvm_inject_dabt - inject a data abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, false, addr); + + inject_abt64(vcpu, false, addr); +} + +/** + * kvm_inject_pabt - inject a prefetch abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_abt32(vcpu, true, addr); + + inject_abt64(vcpu, true, addr); +} + +/** + * kvm_inject_undefined - inject an undefined instruction into the guest + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_undefined(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hcr_el2 & HCR_RW)) + inject_undef32(vcpu); + + inject_undef64(vcpu); +} diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c new file mode 100644 index 000000000000..bbc6ae32e4af --- /dev/null +++ b/arch/arm64/kvm/regmap.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/emulate.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/ptrace.h> + +#define VCPU_NR_MODES 6 +#define REG_OFFSET(_reg) \ + (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) + +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { + /* USR Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + REG_OFFSET(pc) + }, + + /* FIQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(compat_r8_fiq), /* r8 */ + REG_OFFSET(compat_r9_fiq), /* r9 */ + REG_OFFSET(compat_r10_fiq), /* r10 */ + REG_OFFSET(compat_r11_fiq), /* r11 */ + REG_OFFSET(compat_r12_fiq), /* r12 */ + REG_OFFSET(compat_sp_fiq), /* r13 */ + REG_OFFSET(compat_lr_fiq), /* r14 */ + REG_OFFSET(pc) + }, + + /* IRQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_irq), /* r13 */ + REG_OFFSET(compat_lr_irq), /* r14 */ + REG_OFFSET(pc) + }, + + /* SVC Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_svc), /* r13 */ + REG_OFFSET(compat_lr_svc), /* r14 */ + REG_OFFSET(pc) + }, + + /* ABT Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_abt), /* r13 */ + REG_OFFSET(compat_lr_abt), /* r14 */ + REG_OFFSET(pc) + }, + + /* UND Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_und), /* r13 */ + REG_OFFSET(compat_lr_und), /* r14 */ + REG_OFFSET(pc) + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + + switch (mode) { + case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: + mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ + break; + + case COMPAT_PSR_MODE_ABT: + mode = 4; + break; + + case COMPAT_PSR_MODE_UND: + mode = 5; + break; + + case COMPAT_PSR_MODE_SYS: + mode = 0; /* SYS maps to USR */ + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) +{ + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_SVC: + mode = KVM_SPSR_SVC; + break; + case COMPAT_PSR_MODE_ABT: + mode = KVM_SPSR_ABT; + break; + case COMPAT_PSR_MODE_UND: + mode = KVM_SPSR_UND; + break; + case COMPAT_PSR_MODE_IRQ: + mode = KVM_SPSR_IRQ; + break; + case COMPAT_PSR_MODE_FIQ: + mode = KVM_SPSR_FIQ; + break; + default: + BUG(); + } + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; +} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c new file mode 100644 index 000000000000..70a7816535cd --- /dev/null +++ b/arch/arm64/kvm/reset.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/reset.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/errno.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> + +#include <kvm/arm_arch_timer.h> + +#include <asm/cputype.h> +#include <asm/ptrace.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_coproc.h> + +/* + * ARMv8 Reset Values + */ +static const struct kvm_regs default_regs_reset = { + .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | + PSR_F_BIT | PSR_D_BIT), +}; + +static const struct kvm_regs default_regs_reset32 = { + .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT | + COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + +static bool cpu_has_32bit_el1(void) +{ + u64 pfr0; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + return !!(pfr0 & 0x20); +} + +int kvm_arch_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_ARM_EL1_32BIT: + r = cpu_has_32bit_el1(); + break; + default: + r = 0; + } + + return r; +} + +/** + * kvm_reset_vcpu - sets core registers and sys_regs to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on + * the virtual CPU struct to their architectually defined reset + * values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + const struct kvm_irq_level *cpu_vtimer_irq; + const struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + default: + if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (!cpu_has_32bit_el1()) + return -EINVAL; + cpu_reset = &default_regs_reset32; + vcpu->arch.hcr_el2 &= ~HCR_RW; + } else { + cpu_reset = &default_regs_reset; + } + + cpu_vtimer_irq = &default_vtimer_irq; + break; + } + + /* Reset core registers */ + memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + + /* Reset system registers */ + kvm_reset_sys_regs(vcpu); + + /* Reset timer */ + kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + + return 0; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c new file mode 100644 index 000000000000..94923609753b --- /dev/null +++ b/arch/arm64/kvm/sys_regs.c @@ -0,0 +1,1050 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/coproc.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell <rusty@rustcorp.com.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <trace/events/kvm.h> + +#include "sys_regs.h" + +/* + * All of this file is extremly similar to the ARM coproc.c, but the + * types are different. My gut feeling is that it should be pretty + * easy to merge, but that would be an ABI breakage -- again. VFP + * would also need to be abstracted. + * + * For AArch32, we only take care of what is being trapped. Anything + * that has to do with init and userspace access has to go via the + * 64bit interface. + */ + +/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ +static u32 cache_levels; + +/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ +#define CSSELR_MAX 12 + +/* Which cache CCSIDR represents depends on CSSELR value. */ +static u32 get_ccsidr(u32 csselr) +{ + u32 ccsidr; + + /* Make sure noone else changes CSSELR during this! */ + local_irq_disable(); + /* Put value into CSSELR */ + asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + isb(); + /* Read result out of CCSIDR */ + asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + local_irq_enable(); + + return ccsidr; +} + +static void do_dc_cisw(u32 val) +{ + asm volatile("dc cisw, %x0" : : "r" (val)); + dsb(); +} + +static void do_dc_csw(u32 val) +{ + asm volatile("dc csw, %x0" : : "r" (val)); + dsb(); +} + +/* See note at ARM ARM B1.14.4 */ +static bool access_dcsw(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned long val; + int cpu; + + if (!p->is_write) + return read_from_write_only(vcpu, p); + + cpu = get_cpu(); + + cpumask_setall(&vcpu->arch.require_dcache_flush); + cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); + + /* If we were already preempted, take the long way around */ + if (cpu != vcpu->arch.last_pcpu) { + flush_cache_all(); + goto done; + } + + val = *vcpu_reg(vcpu, p->Rt); + + switch (p->CRm) { + case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ + case 14: /* DCCISW */ + do_dc_cisw(val); + break; + + case 10: /* DCCSW */ + do_dc_csw(val); + break; + } + +done: + put_cpu(); + + return true; +} + +/* + * We could trap ID_DFR0 and tell the guest we don't support performance + * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was + * NAKed, so it will read the PMCR anyway. + * + * Therefore we tell the guest we have 0 counters. Unfortunately, we + * must always support PMCCNTR (the cycle counter): we just RAZ/WI for + * all PM registers, which doesn't crash the guest kernel at least. + */ +static bool pm_fake(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + else + return read_zero(vcpu, p); +} + +static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 amair; + + asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); + vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; +} + +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + /* + * Simply map the vcpu_id into the Aff0 field of the MPIDR. + */ + vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); +} + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc sys_reg_descs[] = { + /* DC ISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010), + access_dcsw }, + /* DC CSW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010), + access_dcsw }, + /* DC CISW */ + { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), + access_dcsw }, + + /* TEECR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEECR32_EL1, 0 }, + /* TEEHBR32_EL1 */ + { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, TEEHBR32_EL1, 0 }, + /* DBGVCR32_EL2 */ + { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), + NULL, reset_val, DBGVCR32_EL2, 0 }, + + /* MPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), + NULL, reset_mpidr, MPIDR_EL1 }, + /* SCTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), + NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + /* CPACR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), + NULL, reset_val, CPACR_EL1, 0 }, + /* TTBR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, TTBR0_EL1 }, + /* TTBR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, TTBR1_EL1 }, + /* TCR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), + NULL, reset_val, TCR_EL1, 0 }, + + /* AFSR0_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), + NULL, reset_unknown, AFSR0_EL1 }, + /* AFSR1_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), + NULL, reset_unknown, AFSR1_EL1 }, + /* ESR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), + NULL, reset_unknown, ESR_EL1 }, + /* FAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, FAR_EL1 }, + + /* PMINTENSET_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), + pm_fake }, + /* PMINTENCLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), + pm_fake }, + + /* MAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), + NULL, reset_unknown, MAIR_EL1 }, + /* AMAIR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), + NULL, reset_amair_el1, AMAIR_EL1 }, + + /* VBAR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), + NULL, reset_val, VBAR_EL1, 0 }, + /* CONTEXTIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), + NULL, reset_val, CONTEXTIDR_EL1, 0 }, + /* TPIDR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), + NULL, reset_unknown, TPIDR_EL1 }, + + /* CNTKCTL_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000), + NULL, reset_val, CNTKCTL_EL1, 0}, + + /* CSSELR_EL1 */ + { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, CSSELR_EL1 }, + + /* PMCR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), + pm_fake }, + /* PMCNTENSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), + pm_fake }, + /* PMCNTENCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), + pm_fake }, + /* PMOVSCLR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), + pm_fake }, + /* PMSWINC_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), + pm_fake }, + /* PMSELR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), + pm_fake }, + /* PMCEID0_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), + pm_fake }, + /* PMCEID1_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), + pm_fake }, + /* PMCCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), + pm_fake }, + /* PMXEVTYPER_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), + pm_fake }, + /* PMXEVCNTR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), + pm_fake }, + /* PMUSERENR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), + pm_fake }, + /* PMOVSSET_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), + pm_fake }, + + /* TPIDR_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), + NULL, reset_unknown, TPIDR_EL0 }, + /* TPIDRRO_EL0 */ + { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), + NULL, reset_unknown, TPIDRRO_EL0 }, + + /* DACR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), + NULL, reset_unknown, DACR32_EL2 }, + /* IFSR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001), + NULL, reset_unknown, IFSR32_EL2 }, + /* FPEXC32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000), + NULL, reset_val, FPEXC32_EL2, 0x70 }, +}; + +/* Trapped cp15 registers */ +static const struct sys_reg_desc cp15_regs[] = { + /* + * DC{C,I,CI}SW operations: + */ + { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, +}; + +/* Target specific emulation tables */ +static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; + +void kvm_register_target_sys_reg_table(unsigned int target, + struct kvm_sys_reg_target_table *table) +{ + target_tables[target] = table; +} + +/* Get specific register table for this target. */ +static const struct sys_reg_desc *get_target_table(unsigned target, + bool mode_is_64, + size_t *num) +{ + struct kvm_sys_reg_target_table *table; + + table = target_tables[target]; + if (mode_is_64) { + *num = table->table64.num; + return table->table64.table; + } else { + *num = table->table32.num; + return table->table32.table; + } +} + +static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, + const struct sys_reg_desc table[], + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + const struct sys_reg_desc *r = &table[i]; + + if (params->Op0 != r->Op0) + continue; + if (params->Op1 != r->Op1) + continue; + if (params->CRn != r->CRn) + continue; + if (params->CRm != r->CRm) + continue; + if (params->Op2 != r->Op2) + continue; + + return r; + } + return NULL; +} + +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + +static void emulate_cp15(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, false, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return; + } + /* If access function fails, it should complain. */ + } + + kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); +} + +/** + * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + int Rt2 = (hsr >> 10) & 0xf; + + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + + params.Op0 = 0; + params.Op1 = (hsr >> 16) & 0xf; + params.Op2 = 0; + params.CRn = 0; + + /* + * Massive hack here. Store Rt2 in the top 32bits so we only + * have one register to deal with. As we use the same trap + * backends between AArch32 and AArch64, we get away with it. + */ + if (params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val &= 0xffffffff; + val |= *vcpu_reg(vcpu, Rt2) << 32; + *vcpu_reg(vcpu, params.Rt) = val; + } + + emulate_cp15(vcpu, ¶ms); + + /* Do the opposite hack for the read side */ + if (!params.is_write) { + u64 val = *vcpu_reg(vcpu, params.Rt); + val >>= 32; + *vcpu_reg(vcpu, Rt2) = val; + } + + return 1; +} + +/** + * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + params.CRm = (hsr >> 1) & 0xf; + params.Rt = (hsr >> 5) & 0xf; + params.is_write = ((hsr & 1) == 0); + params.CRn = (hsr >> 10) & 0xf; + params.Op0 = 0; + params.Op1 = (hsr >> 14) & 0x7; + params.Op2 = (hsr >> 17) & 0x7; + + emulate_cp15(vcpu, ¶ms); + return 1; +} + +static int emulate_sys_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + size_t num; + const struct sys_reg_desc *table, *r; + + table = get_target_table(vcpu->arch.target, true, &num); + + /* Search target-specific then generic table. */ + r = find_reg(params, table, num); + if (!r) + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + if (likely(r)) { + /* + * Not having an accessor means that we have + * configured a trap that we don't know how to + * handle. This certainly qualifies as a gross bug + * that should be fixed right away. + */ + BUG_ON(!r->access); + + if (likely(r->access(vcpu, params, r))) { + /* Skip instruction, since it was emulated */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + /* If access function fails, it should complain. */ + } else { + kvm_err("Unsupported guest sys_reg access at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + } + kvm_inject_undefined(vcpu); + return 1; +} + +static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *table, size_t num) +{ + unsigned long i; + + for (i = 0; i < num; i++) + if (table[i].reset) + table[i].reset(vcpu, &table[i]); +} + +/** + * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_hsr(vcpu); + + params.Op0 = (esr >> 20) & 3; + params.Op1 = (esr >> 14) & 0x7; + params.CRn = (esr >> 10) & 0xf; + params.CRm = (esr >> 1) & 0xf; + params.Op2 = (esr >> 17) & 0x7; + params.Rt = (esr >> 5) & 0x1f; + params.is_write = !(esr & 1); + + return emulate_sys_reg(vcpu, ¶ms); +} + +/****************************************************************************** + * Userspace API + *****************************************************************************/ + +static bool index_to_params(u64 id, struct sys_reg_params *params) +{ + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U64: + /* Any unused index bits means it's not valid. */ + if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK + | KVM_REG_ARM_COPROC_MASK + | KVM_REG_ARM64_SYSREG_OP0_MASK + | KVM_REG_ARM64_SYSREG_OP1_MASK + | KVM_REG_ARM64_SYSREG_CRN_MASK + | KVM_REG_ARM64_SYSREG_CRM_MASK + | KVM_REG_ARM64_SYSREG_OP2_MASK)) + return false; + params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK) + >> KVM_REG_ARM64_SYSREG_OP0_SHIFT); + params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK) + >> KVM_REG_ARM64_SYSREG_OP1_SHIFT); + params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK) + >> KVM_REG_ARM64_SYSREG_CRN_SHIFT); + params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK) + >> KVM_REG_ARM64_SYSREG_CRM_SHIFT); + params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK) + >> KVM_REG_ARM64_SYSREG_OP2_SHIFT); + return true; + default: + return false; + } +} + +/* Decode an index value, and find the sys_reg_desc entry. */ +static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, + u64 id) +{ + size_t num; + const struct sys_reg_desc *table, *r; + struct sys_reg_params params; + + /* We only do sys_reg for now. */ + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) + return NULL; + + if (!index_to_params(id, ¶ms)) + return NULL; + + table = get_target_table(vcpu->arch.target, true, &num); + r = find_reg(¶ms, table, num); + if (!r) + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + /* Not saved in the sys_reg array? */ + if (r && !r->reg) + r = NULL; + + return r; +} + +/* + * These are the invariant sys_reg registers: we let the guest see the + * host versions of these, so they're part of the guest state. + * + * A future CPU may provide a mechanism to present different values to + * the guest, or a future kvm may trap them. + */ + +#define FUNCTION_INVARIANT(reg) \ + static void get_##reg(struct kvm_vcpu *v, \ + const struct sys_reg_desc *r) \ + { \ + u64 val; \ + \ + asm volatile("mrs %0, " __stringify(reg) "\n" \ + : "=r" (val)); \ + ((struct sys_reg_desc *)r)->val = val; \ + } + +FUNCTION_INVARIANT(midr_el1) +FUNCTION_INVARIANT(ctr_el0) +FUNCTION_INVARIANT(revidr_el1) +FUNCTION_INVARIANT(id_pfr0_el1) +FUNCTION_INVARIANT(id_pfr1_el1) +FUNCTION_INVARIANT(id_dfr0_el1) +FUNCTION_INVARIANT(id_afr0_el1) +FUNCTION_INVARIANT(id_mmfr0_el1) +FUNCTION_INVARIANT(id_mmfr1_el1) +FUNCTION_INVARIANT(id_mmfr2_el1) +FUNCTION_INVARIANT(id_mmfr3_el1) +FUNCTION_INVARIANT(id_isar0_el1) +FUNCTION_INVARIANT(id_isar1_el1) +FUNCTION_INVARIANT(id_isar2_el1) +FUNCTION_INVARIANT(id_isar3_el1) +FUNCTION_INVARIANT(id_isar4_el1) +FUNCTION_INVARIANT(id_isar5_el1) +FUNCTION_INVARIANT(clidr_el1) +FUNCTION_INVARIANT(aidr_el1) + +/* ->val is filled in by kvm_sys_reg_table_init() */ +static struct sys_reg_desc invariant_sys_regs[] = { + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000), + NULL, get_midr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110), + NULL, get_revidr_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000), + NULL, get_id_pfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001), + NULL, get_id_pfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010), + NULL, get_id_dfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011), + NULL, get_id_afr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100), + NULL, get_id_mmfr0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101), + NULL, get_id_mmfr1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110), + NULL, get_id_mmfr2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111), + NULL, get_id_mmfr3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), + NULL, get_id_isar0_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001), + NULL, get_id_isar1_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), + NULL, get_id_isar2_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011), + NULL, get_id_isar3_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100), + NULL, get_id_isar4_el1 }, + { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101), + NULL, get_id_isar5_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_clidr_el1 }, + { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111), + NULL, get_aidr_el1 }, + { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001), + NULL, get_ctr_el0 }, +}; + +static int reg_from_user(void *val, const void __user *uaddr, u64 id) +{ + /* This Just Works because we are little endian. */ + if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int reg_to_user(void __user *uaddr, const void *val, u64 id) +{ + /* This Just Works because we are little endian. */ + if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) + return -EFAULT; + return 0; +} + +static int get_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + return reg_to_user(uaddr, &r->val, id); +} + +static int set_invariant_sys_reg(u64 id, void __user *uaddr) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + int err; + u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + + if (!index_to_params(id, ¶ms)) + return -ENOENT; + r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); + if (!r) + return -ENOENT; + + err = reg_from_user(&val, uaddr, id); + if (err) + return err; + + /* This is what we mean by invariant: you can't change it. */ + if (r->val != val) + return -EINVAL; + + return 0; +} + +static bool is_valid_cache(u32 val) +{ + u32 level, ctype; + + if (val >= CSSELR_MAX) + return -ENOENT; + + /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ + level = (val >> 1); + ctype = (cache_levels >> (level * 3)) & 7; + + switch (ctype) { + case 0: /* No cache */ + return false; + case 1: /* Instruction cache only */ + return (val & 1); + case 2: /* Data cache only */ + case 4: /* Unified cache */ + return !(val & 1); + case 3: /* Separate instruction and data caches */ + return true; + default: /* Reserved: we can't know instruction or data. */ + return false; + } +} + +static int demux_c15_get(u64 id, void __user *uaddr) +{ + u32 val; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + return put_user(get_ccsidr(val), uval); + default: + return -ENOENT; + } +} + +static int demux_c15_set(u64 id, void __user *uaddr) +{ + u32 val, newval; + u32 __user *uval = uaddr; + + /* Fail if we have unknown bits set. */ + if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK + | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1))) + return -ENOENT; + + switch (id & KVM_REG_ARM_DEMUX_ID_MASK) { + case KVM_REG_ARM_DEMUX_ID_CCSIDR: + if (KVM_REG_SIZE(id) != 4) + return -ENOENT; + val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) + >> KVM_REG_ARM_DEMUX_VAL_SHIFT; + if (!is_valid_cache(val)) + return -ENOENT; + + if (get_user(newval, uval)) + return -EFAULT; + + /* This is also invariant: you can't change it. */ + if (newval != get_ccsidr(val)) + return -EINVAL; + return 0; + default: + return -ENOENT; + } +} + +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_get(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return get_invariant_sys_reg(reg->id, uaddr); + + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); +} + +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + const struct sys_reg_desc *r; + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return demux_c15_set(reg->id, uaddr); + + if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) + return -ENOENT; + + r = index_to_sys_reg_desc(vcpu, reg->id); + if (!r) + return set_invariant_sys_reg(reg->id, uaddr); + + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); +} + +static unsigned int num_demux_regs(void) +{ + unsigned int i, count = 0; + + for (i = 0; i < CSSELR_MAX; i++) + if (is_valid_cache(i)) + count++; + + return count; +} + +static int write_demux_regids(u64 __user *uindices) +{ + u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + unsigned int i; + + val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; + for (i = 0; i < CSSELR_MAX; i++) { + if (!is_valid_cache(i)) + continue; + if (put_user(val | i, uindices)) + return -EFAULT; + uindices++; + } + return 0; +} + +static u64 sys_reg_to_index(const struct sys_reg_desc *reg) +{ + return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | + KVM_REG_ARM64_SYSREG | + (reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) | + (reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) | + (reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) | + (reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) | + (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT)); +} + +static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) +{ + if (!*uind) + return true; + + if (put_user(sys_reg_to_index(reg), *uind)) + return false; + + (*uind)++; + return true; +} + +/* Assumed ordered tables, see kvm_sys_reg_table_init. */ +static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) +{ + const struct sys_reg_desc *i1, *i2, *end1, *end2; + unsigned int total = 0; + size_t num; + + /* We check for duplicates here, to allow arch-specific overrides. */ + i1 = get_target_table(vcpu->arch.target, true, &num); + end1 = i1 + num; + i2 = sys_reg_descs; + end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); + + BUG_ON(i1 == end1 || i2 == end2); + + /* Walk carefully, as both tables may refer to the same register. */ + while (i1 || i2) { + int cmp = cmp_sys_reg(i1, i2); + /* target-specific overrides generic entry. */ + if (cmp <= 0) { + /* Ignore registers we trap but don't save. */ + if (i1->reg) { + if (!copy_reg_to_user(i1, &uind)) + return -EFAULT; + total++; + } + } else { + /* Ignore registers we trap but don't save. */ + if (i2->reg) { + if (!copy_reg_to_user(i2, &uind)) + return -EFAULT; + total++; + } + } + + if (cmp <= 0 && ++i1 == end1) + i1 = NULL; + if (cmp >= 0 && ++i2 == end2) + i2 = NULL; + } + return total; +} + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) +{ + return ARRAY_SIZE(invariant_sys_regs) + + num_demux_regs() + + walk_sys_regs(vcpu, (u64 __user *)NULL); +} + +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + int err; + + /* Then give them all the invariant registers' indices. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { + if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) + return -EFAULT; + uindices++; + } + + err = walk_sys_regs(vcpu, uindices); + if (err < 0) + return err; + uindices += err; + + return write_demux_regids(uindices); +} + +void kvm_sys_reg_table_init(void) +{ + unsigned int i; + struct sys_reg_desc clidr; + + /* Make sure tables are unique and in order. */ + for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++) + BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0); + + /* We abuse the reset function to overwrite the table itself. */ + for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) + invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); + + /* + * CLIDR format is awkward, so clean it up. See ARM B4.1.20: + * + * If software reads the Cache Type fields from Ctype1 + * upwards, once it has seen a value of 0b000, no caches + * exist at further-out levels of the hierarchy. So, for + * example, if Ctype3 is the first Cache Type field with a + * value of 0b000, the values of Ctype4 to Ctype7 must be + * ignored. + */ + get_clidr_el1(NULL, &clidr); /* Ugly... */ + cache_levels = clidr.val; + for (i = 0; i < 7; i++) + if (((cache_levels >> (i*3)) & 7) == 0) + break; + /* Clear all higher bits. */ + cache_levels &= (1 << (i*3))-1; +} + +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) +{ + size_t num; + const struct sys_reg_desc *table; + + /* Catch someone adding a register without putting in reset entry. */ + memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); + + /* Generic chip reset first (so target could override). */ + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + + table = get_target_table(vcpu->arch.target, true, &num); + reset_sys_reg_descs(vcpu, table, num); + + for (num = 1; num < NR_SYS_REGS; num++) + if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) + panic("Didn't reset vcpu_sys_reg(%zi)", num); +} diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h new file mode 100644 index 000000000000..d50d3722998e --- /dev/null +++ b/arch/arm64/kvm/sys_regs.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/coproc.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__ +#define __ARM64_KVM_SYS_REGS_LOCAL_H__ + +struct sys_reg_params { + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + u8 Rt; + bool is_write; +}; + +struct sys_reg_desc { + /* MRS/MSR instruction which accesses it. */ + u8 Op0; + u8 Op1; + u8 CRn; + u8 CRm; + u8 Op2; + + /* Trapped access from guest, if non-NULL. */ + bool (*access)(struct kvm_vcpu *, + const struct sys_reg_params *, + const struct sys_reg_desc *); + + /* Initialization for vcpu. */ + void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *); + + /* Index into sys_reg[], or 0 if we don't need to save it. */ + int reg; + + /* Value (usually reset value) */ + u64 val; +}; + +static inline void print_sys_reg_instr(const struct sys_reg_params *p) +{ + /* Look, we even formatted it for you to paste into the table! */ + kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); +} + +static inline bool ignore_write(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + return true; +} + +static inline bool read_zero(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p) +{ + *vcpu_reg(vcpu, p->Rt) = 0; + return true; +} + +static inline bool write_to_read_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg write to read-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +static inline bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + kvm_debug("sys_reg read to write-only register at: %lx\n", + *vcpu_pc(vcpu)); + print_sys_reg_instr(params); + return false; +} + +/* Reset functions */ +static inline void reset_unknown(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; +} + +static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + BUG_ON(!r->reg); + BUG_ON(r->reg >= NR_SYS_REGS); + vcpu_sys_reg(vcpu, r->reg) = r->val; +} + +static inline int cmp_sys_reg(const struct sys_reg_desc *i1, + const struct sys_reg_desc *i2) +{ + BUG_ON(i1 == i2); + if (!i1) + return 1; + else if (!i2) + return -1; + if (i1->Op0 != i2->Op0) + return i1->Op0 - i2->Op0; + if (i1->Op1 != i2->Op1) + return i1->Op1 - i2->Op1; + if (i1->CRn != i2->CRn) + return i1->CRn - i2->CRn; + if (i1->CRm != i2->CRm) + return i1->CRm - i2->CRm; + return i1->Op2 - i2->Op2; +} + + +#define Op0(_x) .Op0 = _x +#define Op1(_x) .Op1 = _x +#define CRn(_x) .CRn = _x +#define CRm(_x) .CRm = _x +#define Op2(_x) .Op2 = _x + +#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c new file mode 100644 index 000000000000..4268ab9356b1 --- /dev/null +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Based on arch/arm/kvm/coproc_a15.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell <rusty@rustcorp.au> + * Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kvm_host.h> +#include <asm/cputype.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <linux/init.h> + +#include "sys_regs.h" + +static bool access_actlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1); + return true; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr; + + asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); + vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; +} + +/* + * Implementation specific sys-reg registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc genericv8_sys_regs[] = { + /* ACTLR_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr, reset_actlr, ACTLR_EL1 }, +}; + +static const struct sys_reg_desc genericv8_cp15_regs[] = { + /* ACTLR */ + { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), + access_actlr }, +}; + +static struct kvm_sys_reg_target_table genericv8_target_table = { + .table64 = { + .table = genericv8_sys_regs, + .num = ARRAY_SIZE(genericv8_sys_regs), + }, + .table32 = { + .table = genericv8_cp15_regs, + .num = ARRAY_SIZE(genericv8_cp15_regs), + }, +}; + +static int __init sys_reg_genericv8_init(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) + BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], + &genericv8_sys_regs[i]) >= 0); + + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, + &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, + &genericv8_target_table); + return 0; +} +late_initcall(sys_reg_genericv8_init); diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o tlb.o proc.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..0ecac8980aae 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -365,17 +365,6 @@ static int __kprobes do_translation_fault(unsigned long addr, } /* - * Some section permission faults need to be handled gracefully. They can - * happen due to a __{get,put}_user during an oops. - */ -static int do_sect_fault(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - do_bad_area(addr, esr, regs); - return 0; -} - -/* * This abort handler always returns "fault". */ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -398,12 +387,12 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 88611c3a421a..e4193e3adc7f 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -70,23 +70,16 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, #endif } -void __flush_dcache_page(struct page *page) -{ - __flush_dcache_area(page_address(page), PAGE_SIZE); -} - void __sync_icache_dcache(pte_t pte, unsigned long addr) { - unsigned long pfn; - struct page *page; + struct page *page = pte_page(pte); - pfn = pte_pfn(pte); - if (!pfn_valid(pfn)) + /* no flushing needed for anonymous pages */ + if (!page_mapping(page)) return; - page = pfn_to_page(pfn); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_page(page); + __flush_dcache_area(page_address(page), PAGE_SIZE); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); @@ -94,28 +87,14 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) } /* - * Ensure cache coherency between kernel mapping and userspace mapping of this - * page. + * This function is called when a page has been modified by the kernel. Mark + * it as dirty for later flushing when mapped in user space (if executable, + * see __sync_icache_dcache). */ void flush_dcache_page(struct page *page) { - struct address_space *mapping; - - /* - * The zero page is never written to, so never has any dirty cache - * lines, and therefore never needs to be flushed. - */ - if (page == ZERO_PAGE(0)) - return; - - mapping = page_mapping(page); - if (mapping && mapping_mapped(mapping)) { - __flush_dcache_page(page); - __flush_icache_all(); - set_bit(PG_dcache_clean, &page->flags); - } else { + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); - } } EXPORT_SYMBOL(flush_dcache_page); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c @@ -0,0 +1,70 @@ +/* + * arch/arm64/mm/hugetlbpage.c + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/mm/hugetlbpage.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} +#endif + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !(pmd_val(pmd) & PMD_TABLE_BIT); +} + +int pud_huge(pud_t pud) +{ + return !(pud_val(pud) & PUD_TABLE_BIT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..67e8d7ce3fe7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -197,14 +197,6 @@ void __init bootmem_init(void) max_pfn = max_low_pfn = max; } -/* - * Poison init memory with an undefined instruction (0x0). - */ -static inline void poison_init_mem(void *s, size_t count) -{ - memset(s, 0, count); -} - #ifndef CONFIG_SPARSEMEM_VMEMMAP static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { @@ -280,59 +272,17 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - arm64_swiotlb_init(); max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #ifndef CONFIG_SPARSEMEM_VMEMMAP - /* this will put all unused low memory onto the freelists */ free_unused_memmap(); #endif + /* this will put all unused low memory onto the freelists */ + free_all_bootmem(); - totalram_pages += free_all_bootmem(); - - reserved_pages = free_pages = 0; - - for_each_memblock(memory, reg) { - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = __phys_to_pfn(reg->base); - pfn2 = pfn1 + __phys_to_pfn(reg->size); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the real number - * of pages we have in this system. - */ - pr_info("Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - pr_notice("Memory: %luk/%luk available, %luk reserved\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 @@ -374,7 +324,7 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE_64 > MODULES_VADDR); BUG_ON(TASK_SIZE_64 > MODULES_VADDR); - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get anywhere without @@ -386,7 +336,6 @@ void __init mem_init(void) void free_initmem(void) { - poison_init_mem(__init_begin, __init_end - __init_begin); free_initmem_default(0); } @@ -396,10 +345,8 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) { - poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area(start, end, 0, "initrd"); - } + if (!keep_initrd) + free_reserved_area((void *)start, (void *)end, 0, "initrd"); } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index 916701e6d040..d519f4f50c8c 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,3 +1,2 @@ -extern void __flush_dcache_page(struct page *page); extern void __init bootmem_init(void); extern void __init arm64_swiotlb_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eeecc9c8ed68..a8d1059b91b2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -297,6 +297,16 @@ static void __init map_mem(void) { struct memblock_region *reg; + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + */ + memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + /* map all the memory banks */ for_each_memblock(memory, reg) { phys_addr_t start = reg->base; @@ -307,6 +317,9 @@ static void __init map_mem(void) create_mapping(start, __phys_to_virt(start), end - start); } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } /* @@ -317,12 +330,6 @@ void __init paging_init(void) { void *zero_page; - /* - * Maximum PGDIR_SIZE addressable via the initial direct kernel - * mapping in swapper_pg_dir. - */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); - init_mem_pgprot(); map_mem(); @@ -339,7 +346,6 @@ void __init paging_init(void) bootmem_init(); empty_zero_page = virt_to_page(zero_page); - __flush_dcache_page(empty_zero_page); /* * TTBR0 is only used for the identity mapping at this stage. Make it diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile new file mode 100644 index 000000000000..be240404ba96 --- /dev/null +++ b/arch/arm64/xen/Makefile @@ -0,0 +1,2 @@ +xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o) +obj-y := xen-arm.o hypercall.o diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S new file mode 100644 index 000000000000..531342ec4bcf --- /dev/null +++ b/arch/arm64/xen/hypercall.S @@ -0,0 +1,93 @@ +/****************************************************************************** + * hypercall.S + * + * Xen hypercall wrappers + * + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * The Xen hypercall calling convention is very similar to the procedure + * call standard for the ARM 64-bit architecture: the first parameter is + * passed in x0, the second in x1, the third in x2, the fourth in x3 and + * the fifth in x4. + * + * The hypercall number is passed in x16. + * + * The return value is in x0. + * + * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM + * hypercall tag. + * + * Parameter structs passed to hypercalls are laid out according to + * the ARM 64-bit EABI standard. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <xen/interface/xen.h> + + +#define XEN_IMM 0xEA1 + +#define HYPERCALL_SIMPLE(hypercall) \ +ENTRY(HYPERVISOR_##hypercall) \ + mov x16, #__HYPERVISOR_##hypercall; \ + hvc XEN_IMM; \ + ret; \ +ENDPROC(HYPERVISOR_##hypercall) + +#define HYPERCALL0 HYPERCALL_SIMPLE +#define HYPERCALL1 HYPERCALL_SIMPLE +#define HYPERCALL2 HYPERCALL_SIMPLE +#define HYPERCALL3 HYPERCALL_SIMPLE +#define HYPERCALL4 HYPERCALL_SIMPLE +#define HYPERCALL5 HYPERCALL_SIMPLE + + .text + +HYPERCALL2(xen_version); +HYPERCALL3(console_io); +HYPERCALL3(grant_table_op); +HYPERCALL2(sched_op); +HYPERCALL2(event_channel_op); +HYPERCALL2(hvm_op); +HYPERCALL2(memory_op); +HYPERCALL2(physdev_op); +HYPERCALL3(vcpu_op); +HYPERCALL1(tmem_op); + +ENTRY(privcmd_call) + mov x16, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + mov x4, x5 + hvc XEN_IMM + ret +ENDPROC(privcmd_call); diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 37401f535126..79b61798ebf8 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -74,4 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index b4247f478065..209ae5ad3495 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -555,7 +555,7 @@ void __init setup_arch (char **cmdline_p) { struct clk *cpu_clk; - init_mm.start_code = (unsigned long)_text; + init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long)_etext; init_mm.end_data = (unsigned long)_edata; init_mm.brk = (unsigned long)_end; diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 9cd2bd91d64a..a4589176bed5 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -23,7 +23,7 @@ SECTIONS { . = CONFIG_ENTRY_ADDRESS; .init : AT(ADDR(.init) - LOAD_OFFSET) { - _stext = .; + _text = .; __init_begin = .; _sinittext = .; *(.text.reset) @@ -46,7 +46,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { _evba = .; - _text = .; + _stext = .; *(.ex.text) *(.irq.text) KPROBES_TEXT diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 7c2f6685bf43..7f8759a8a92a 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1060,7 +1060,9 @@ struct platform_device *__init at32_add_device_usart(unsigned int id) void __init at32_setup_serial_console(unsigned int usart_id) { +#ifdef CONFIG_SERIAL_ATMEL atmel_default_console_device = at32_usarts[usart_id]; +#endif } /* -------------------------------------------------------------------- diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index e66e8406f992..def5391d927a 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -100,60 +100,26 @@ void __init paging_init(void) void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int nid, i; + pg_data_t *pgdat; - reservedpages = 0; high_memory = NULL; + for_each_online_pgdat(pgdat) + high_memory = max_t(void *, high_memory, + __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - /* this will put all low memory onto the freelists */ - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - unsigned long node_pages = 0; - void *node_high_memory; - - num_physpages += pgdat->node_present_pages; - - if (pgdat->node_spanned_pages != 0) - node_pages = free_all_bootmem_node(pgdat); - - totalram_pages += node_pages; - - for (i = 0; i < node_pages; i++) - if (PageReserved(pgdat->node_mem_map + i)) - reservedpages++; - - node_high_memory = (void *)((pgdat->node_start_pfn - + pgdat->node_spanned_pages) - << PAGE_SHIFT); - if (node_high_memory > high_memory) - high_memory = node_high_memory; - } - - max_mapnr = MAP_NR(high_memory); - - codesize = (unsigned long)_etext - (unsigned long)_text; - datasize = (unsigned long)_edata - (unsigned long)_data; - initsize = (unsigned long)__init_end - (unsigned long)__init_begin; - - printk ("Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT - 10), - totalram_pages << (PAGE_SHIFT - 10), - codesize >> 10, - reservedpages << (PAGE_SHIFT - 10), - datasize >> 10, - initsize >> 10); + set_max_mapnr(MAP_NR(high_memory)); + free_all_bootmem(); + mem_init_print_info(NULL); } void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index b573827d0416..3b6abc54b015 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -41,6 +41,7 @@ config BLACKFIN select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA + select HAVE_DEBUG_STACKOVERFLOW config GENERIC_CSUM def_bool y @@ -282,7 +283,7 @@ config BF_REV_0_0 config BF_REV_0_1 bool "0.1" - depends on (BF51x || BF52x || (BF54x && !BF54xM)) + depends on (BF51x || BF52x || (BF54x && !BF54xM) || BF60x) config BF_REV_0_2 bool "0.2" diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug index 79594694ee90..f3337ee03621 100644 --- a/arch/blackfin/Kconfig.debug +++ b/arch/blackfin/Kconfig.debug @@ -2,13 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit. - config DEBUG_VERBOSE bool "Verbose fault messages" default y diff --git a/arch/blackfin/include/asm/bfin6xx_spi.h b/arch/blackfin/include/asm/bfin_spi3.h index 89370b653dcd..0957e65a54be 100644 --- a/arch/blackfin/include/asm/bfin6xx_spi.h +++ b/arch/blackfin/include/asm/bfin_spi3.h @@ -240,7 +240,7 @@ struct bfin_spi_regs { #define MAX_CTRL_CS 8 /* cs in spi controller */ /* device.platform_data for SSP controller devices */ -struct bfin6xx_spi_master { +struct bfin_spi3_master { u16 num_chipselect; u16 pin_req[7]; }; @@ -248,7 +248,7 @@ struct bfin6xx_spi_master { /* spi_board_info.controller_data for SPI slave devices, * copied to spi_device.platform_data ... mostly for dma tuning */ -struct bfin6xx_spi_chip { +struct bfin_spi3_chip { u32 control; u16 cs_chg_udelay; /* Some devices require 16-bit delays */ u32 tx_dummy_val; /* tx value for rx only transfer */ diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c index b882ce22c347..fa53faeeb0e9 100644 --- a/arch/blackfin/kernel/kgdb.c +++ b/arch/blackfin/kernel/kgdb.c @@ -9,6 +9,7 @@ #include <linux/ptrace.h> /* for linux pt_regs struct */ #include <linux/kgdb.h> #include <linux/uaccess.h> +#include <asm/irq_regs.h> void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) { diff --git a/arch/blackfin/mach-bf527/boards/ad7160eval.c b/arch/blackfin/mach-bf527/boards/ad7160eval.c index d58f50e5aa4b..1e7be62fccb6 100644 --- a/arch/blackfin/mach-bf527/boards/ad7160eval.c +++ b/arch/blackfin/mach-bf527/boards/ad7160eval.c @@ -283,14 +283,6 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - /* TODO: add platform data here */ -}; -#endif - static struct spi_board_info bfin_spi_board_info[] __initdata = { #if defined(CONFIG_MTD_M25P80) \ || defined(CONFIG_MTD_M25P80_MODULE) @@ -800,10 +792,6 @@ static struct platform_device *stamp_devices[] __initdata = { #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) &bfin_i2s, #endif - -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm, -#endif }; static int __init ad7160eval_init(void) diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c index 29f16e5c37b9..d0a0c5e527cd 100644 --- a/arch/blackfin/mach-bf527/boards/ezkit.c +++ b/arch/blackfin/mach-bf527/boards/ezkit.c @@ -493,8 +493,7 @@ static const struct ad7879_platform_data bfin_ad7879_ts_info = { }; #endif -#if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \ - defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) +#if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) static const u16 bfin_snd_pin[][7] = { {P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_RFS, @@ -549,13 +548,6 @@ static struct platform_device bfin_i2s_pcm = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm_pcm = { - .name = "bfin-tdm-pcm-audio", - .id = -1, -}; -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) static struct platform_device bfin_ac97_pcm = { .name = "bfin-ac97-pcm-audio", @@ -575,22 +567,10 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - .num_resources = ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]), - .resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM], - .dev = { - .platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM], - }, -}; -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \ || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE) static const char * const ad1836_link[] = { - "bfin-tdm.0", + "bfin-i2s.0", "spi0.4", }; static struct platform_device bfin_ad1836_machine = { @@ -1269,10 +1249,6 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_i2s_pcm, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm_pcm, -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97_pcm, #endif @@ -1281,10 +1257,6 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_i2s, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm, -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AD1836) || \ defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE) &bfin_ad1836_machine, diff --git a/arch/blackfin/mach-bf533/boards/ezkit.c b/arch/blackfin/mach-bf533/boards/ezkit.c index 07811c209b9d..90fb0d14b147 100644 --- a/arch/blackfin/mach-bf533/boards/ezkit.c +++ b/arch/blackfin/mach-bf533/boards/ezkit.c @@ -450,14 +450,6 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - /* TODO: add platform data here */ -}; -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) static struct platform_device bfin_ac97 = { .name = "bfin-ac97", @@ -516,10 +508,6 @@ static struct platform_device *ezkit_devices[] __initdata = { &bfin_i2s, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm, -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97, #endif diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index 6fca8698bf3b..4a8c2e3fd7e5 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -542,8 +542,7 @@ static struct platform_device bfin_dpmc = { }; #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \ - defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) \ - || defined(CONFIG_SND_BF5XX_AC97) || \ + defined(CONFIG_SND_BF5XX_AC97) || \ defined(CONFIG_SND_BF5XX_AC97_MODULE) #include <asm/bfin_sport.h> @@ -603,13 +602,6 @@ static struct platform_device bfin_i2s_pcm = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm_pcm = { - .name = "bfin-tdm-pcm-audio", - .id = -1, -}; -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) static struct platform_device bfin_ac97_pcm = { .name = "bfin-ac97-pcm-audio", @@ -620,7 +612,7 @@ static struct platform_device bfin_ac97_pcm = { #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \ || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE) static const char * const ad1836_link[] = { - "bfin-tdm.0", + "bfin-i2s.0", "spi0.4", }; static struct platform_device bfin_ad1836_machine = { @@ -675,20 +667,6 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_SOC_TDM) || \ - defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - .num_resources = - ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]), - .resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM], - .dev = { - .platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM], - }, -}; -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AC97) || \ defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE) static struct platform_device bfin_ac97 = { @@ -761,10 +739,6 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_i2s_pcm, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm_pcm, -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97_pcm, #endif @@ -792,11 +766,6 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_i2s, #endif -#if defined(CONFIG_SND_BF5XX_SOC_TDM) || \ - defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE) - &bfin_tdm, -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AC97) || \ defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE) &bfin_ac97, diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index 6a3a14bcd3a1..44fd1d4682ac 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -2570,7 +2570,6 @@ static struct platform_device bfin_dpmc = { }; #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \ - defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) || \ defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) #define SPORT_REQ(x) \ @@ -2628,13 +2627,6 @@ static struct platform_device bfin_i2s_pcm = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm_pcm = { - .name = "bfin-tdm-pcm-audio", - .id = -1, -}; -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) static struct platform_device bfin_ac97_pcm = { .name = "bfin-ac97-pcm-audio", @@ -2645,7 +2637,7 @@ static struct platform_device bfin_ac97_pcm = { #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \ || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE) static const char * const ad1836_link[] = { - "bfin-tdm.0", + "bfin-i2s.0", "spi0.4", }; static struct platform_device bfin_ad1836_machine = { @@ -2699,18 +2691,6 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_SOC_TDM) || defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - .num_resources = ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]), - .resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM], - .dev = { - .platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM], - }, -}; -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AC97) || defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE) static struct platform_device bfin_ac97 = { .name = "bfin-ac97", @@ -2935,10 +2915,6 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_i2s_pcm, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm_pcm, -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97_pcm, #endif @@ -2961,10 +2937,6 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_i2s, #endif -#if defined(CONFIG_SND_BF5XX_SOC_TDM) || defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE) - &bfin_tdm, -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AC97) || defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE) &bfin_ac97, #endif diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index c4d07f040947..372eb54944ef 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -1393,7 +1393,6 @@ static struct platform_device bfin_dpmc = { }; #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \ - defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) || \ defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) #define SPORT_REQ(x) \ @@ -1461,13 +1460,6 @@ static struct platform_device bfin_i2s_pcm = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm_pcm = { - .name = "bfin-tdm-pcm-audio", - .id = -1, -}; -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) static struct platform_device bfin_ac97_pcm = { .name = "bfin-ac97-pcm-audio", @@ -1501,18 +1493,6 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_SOC_TDM) || defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - .num_resources = ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]), - .resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM], - .dev = { - .platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM], - }, -}; -#endif - #if defined(CONFIG_SND_BF5XX_SOC_AC97) || defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE) static struct platform_device bfin_ac97 = { .name = "bfin-ac97", @@ -1646,9 +1626,7 @@ static struct platform_device *ezkit_devices[] __initdata = { #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) &bfin_i2s_pcm, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm_pcm, -#endif + #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97_pcm, #endif @@ -1661,10 +1639,6 @@ static struct platform_device *ezkit_devices[] __initdata = { &bfin_i2s, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm, -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97, #endif diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index 551f866172cf..92938e79b9e3 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -523,14 +523,6 @@ static struct platform_device bfin_i2s = { }; #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) -static struct platform_device bfin_tdm = { - .name = "bfin-tdm", - .id = CONFIG_SND_BF5XX_SPORT_NUM, - /* TODO: add platform data here */ -}; -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) static struct platform_device bfin_ac97 = { .name = "bfin-ac97", @@ -542,7 +534,7 @@ static struct platform_device bfin_ac97 = { #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \ || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE) static const char * const ad1836_link[] = { - "bfin-tdm.0", + "bfin-i2s.0", "spi0.4", }; static struct platform_device bfin_ad1836_machine = { @@ -611,10 +603,6 @@ static struct platform_device *ezkit_devices[] __initdata = { &bfin_i2s, #endif -#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) - &bfin_tdm, -#endif - #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE) &bfin_ac97, #endif diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c index ab1c617b9cfc..c77a23bc9de3 100644 --- a/arch/blackfin/mach-bf561/smp.c +++ b/arch/blackfin/mach-bf561/smp.c @@ -69,7 +69,6 @@ void __cpuinit platform_secondary_init(unsigned int cpu) SSYNC(); /* We are done with local CPU inits, unblock the boot CPU. */ - set_cpu_online(cpu, true); spin_lock(&boot_lock); spin_unlock(&boot_lock); } @@ -91,7 +90,9 @@ int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle SSYNC(); } - timeout = jiffies + 1 * HZ; + timeout = jiffies + HZ; + /* release the lock and let coreb run */ + spin_unlock(&boot_lock); while (time_before(jiffies, timeout)) { if (cpu_online(cpu)) break; @@ -100,8 +101,6 @@ int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle } if (cpu_online(cpu)) { - /* release the lock and let coreb run */ - spin_unlock(&boot_lock); return 0; } else panic("CPU%u: processor failed to boot\n", cpu); diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 97d701639585..0bc47231540b 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -17,7 +17,7 @@ #include <linux/i2c.h> #include <linux/interrupt.h> #include <linux/usb/musb.h> -#include <asm/bfin6xx_spi.h> +#include <asm/bfin_spi3.h> #include <asm/dma.h> #include <asm/gpio.h> #include <asm/nand.h> @@ -108,7 +108,6 @@ static struct platform_device bfin_rotary_device = { static unsigned short pins[] = P_RMII0; static struct stmmac_mdio_bus_data phy_private_data = { - .bus_id = 0, .phy_mask = 1, }; @@ -745,13 +744,13 @@ static struct flash_platform_data bfin_spi_flash_data = { .type = "w25q32", }; -static struct bfin6xx_spi_chip spi_flash_chip_info = { +static struct bfin_spi3_chip spi_flash_chip_info = { .enable_dma = true, /* use dma transfer with this chip*/ }; #endif #if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE) -static struct bfin6xx_spi_chip spidev_chip_info = { +static struct bfin_spi3_chip spidev_chip_info = { .enable_dma = true, }; #endif @@ -821,7 +820,7 @@ static struct platform_device bfin_i2s = { #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \ || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE) static const char * const ad1836_link[] = { - "bfin-tdm.0", + "bfin-i2s.0", "spi0.76", }; static struct platform_device bfin_ad1836_machine = { @@ -1296,7 +1295,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = { }, #endif }; -#if defined(CONFIG_SPI_BFIN6XX) || defined(CONFIG_SPI_BFIN6XX_MODULE) +#if IS_ENABLED(CONFIG_SPI_BFIN_V3) /* SPI (0) */ static struct resource bfin_spi0_resource[] = { { @@ -1337,13 +1336,13 @@ static struct resource bfin_spi1_resource[] = { }; /* SPI controller data */ -static struct bfin6xx_spi_master bf60x_spi_master_info0 = { +static struct bfin_spi3_master bf60x_spi_master_info0 = { .num_chipselect = MAX_CTRL_CS + MAX_BLACKFIN_GPIOS, .pin_req = {P_SPI0_SCK, P_SPI0_MISO, P_SPI0_MOSI, 0}, }; static struct platform_device bf60x_spi_master0 = { - .name = "bfin-spi", + .name = "bfin-spi3", .id = 0, /* Bus number */ .num_resources = ARRAY_SIZE(bfin_spi0_resource), .resource = bfin_spi0_resource, @@ -1352,13 +1351,13 @@ static struct platform_device bf60x_spi_master0 = { }, }; -static struct bfin6xx_spi_master bf60x_spi_master_info1 = { +static struct bfin_spi3_master bf60x_spi_master_info1 = { .num_chipselect = MAX_CTRL_CS + MAX_BLACKFIN_GPIOS, .pin_req = {P_SPI1_SCK, P_SPI1_MISO, P_SPI1_MOSI, 0}, }; static struct platform_device bf60x_spi_master1 = { - .name = "bfin-spi", + .name = "bfin-spi3", .id = 1, /* Bus number */ .num_resources = ARRAY_SIZE(bfin_spi1_resource), .resource = bfin_spi1_resource, @@ -1534,7 +1533,7 @@ static struct platform_device *ezkit_devices[] __initdata = { &bfin_sdh_device, #endif -#if defined(CONFIG_SPI_BFIN6XX) || defined(CONFIG_SPI_BFIN6XX_MODULE) +#if IS_ENABLED(CONFIG_SPI_BFIN_V3) &bf60x_spi_master0, &bf60x_spi_master1, #endif diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 1bc2ce6f3c94..961d8392e5e3 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -49,6 +49,7 @@ unsigned long blackfin_iflush_l1_entry[NR_CPUS]; struct blackfin_initial_pda __cpuinitdata initial_pda_coreb; enum ipi_message_type { + BFIN_IPI_NONE, BFIN_IPI_TIMER, BFIN_IPI_RESCHEDULE, BFIN_IPI_CALL_FUNC, @@ -72,8 +73,8 @@ static DEFINE_SPINLOCK(stop_lock); /* Simple FIFO buffer, overflow leads to panic */ struct ipi_data { - unsigned long count; - unsigned long bits; + atomic_t count; + atomic_t bits; }; static DEFINE_PER_CPU(struct ipi_data, bfin_ipi); @@ -146,7 +147,6 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) platform_clear_ipi(cpu, IRQ_SUPPLE_1); bfin_ipi_data = &__get_cpu_var(bfin_ipi); - smp_mb(); while ((pending = xchg(&bfin_ipi_data->bits, 0)) != 0) { msg = 0; do { @@ -170,9 +170,8 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) ipi_cpu_stop(cpu); break; } + atomic_dec(&bfin_ipi_data->count); } while (msg < BITS_PER_LONG); - - smp_mb(); } return IRQ_HANDLED; } @@ -195,12 +194,10 @@ void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg) unsigned long flags; local_irq_save(flags); - smp_mb(); for_each_cpu(cpu, cpumask) { bfin_ipi_data = &per_cpu(bfin_ipi, cpu); - smp_mb(); - set_bit(msg, &bfin_ipi_data->bits); - bfin_ipi_data->count++; + atomic_set_mask((1 << msg), &bfin_ipi_data->bits); + atomic_inc(&bfin_ipi_data->count); platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1); } @@ -319,7 +316,6 @@ void __cpuinit secondary_start_kernel(void) setup_secondary(cpu); platform_secondary_init(cpu); - /* setup local core timer */ bfin_local_timer_setup(); @@ -335,6 +331,8 @@ void __cpuinit secondary_start_kernel(void) */ calibrate_delay(); + /* We are done with local CPU inits, unblock the boot CPU. */ + set_cpu_online(cpu, true); cpu_startup_entry(CPUHP_ONLINE); } diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index 82d01a71207f..166842de3dc7 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -90,50 +90,24 @@ asmlinkage void __init init_pda(void) void __init mem_init(void) { - unsigned int codek = 0, datak = 0, initk = 0; - unsigned int reservedpages = 0, freepages = 0; - unsigned long tmp; - unsigned long start_mem = memory_start; - unsigned long end_mem = memory_end; + char buf[64]; - end_mem &= PAGE_MASK; - high_memory = (void *)end_mem; - - start_mem = PAGE_ALIGN(start_mem); - max_mapnr = num_physpages = MAP_NR(high_memory); - printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", num_physpages); + high_memory = (void *)(memory_end & PAGE_MASK); + max_mapnr = MAP_NR(high_memory); + printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", max_mapnr); /* This will put all low memory onto the freelists. */ - totalram_pages = free_all_bootmem(); - - reservedpages = 0; - for (tmp = ARCH_PFN_OFFSET; tmp < max_mapnr; tmp++) - if (PageReserved(pfn_to_page(tmp))) - reservedpages++; - freepages = max_mapnr - ARCH_PFN_OFFSET - reservedpages; - - /* do not count in kernel image between _rambase and _ramstart */ - reservedpages -= (_ramstart - _rambase) >> PAGE_SHIFT; -#if (defined(CONFIG_BFIN_EXTMEM_ICACHEABLE) && ANOMALY_05000263) - reservedpages += (_ramend - memory_end - DMA_UNCACHED_REGION) >> PAGE_SHIFT; -#endif - - codek = (_etext - _stext) >> 10; - initk = (__init_end - __init_begin) >> 10; - datak = ((_ramstart - _rambase) >> 10) - codek - initk; + free_all_bootmem(); - printk(KERN_INFO - "Memory available: %luk/%luk RAM, " - "(%uk init code, %uk kernel code, %uk data, %uk dma, %uk reserved)\n", - (unsigned long) freepages << (PAGE_SHIFT-10), (_ramend - CONFIG_PHY_RAM_BASE_ADDRESS) >> 10, - initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10))); + snprintf(buf, sizeof(buf) - 1, "%uK DMA", DMA_UNCACHED_REGION >> 10); + mem_init_print_info(buf); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { #ifndef CONFIG_MPU - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); #endif } #endif @@ -141,7 +115,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) void __init_refok free_initmem(void) { #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU - free_initmem_default(0); + free_initmem_default(-1); if (memory_start == (unsigned long)(&__init_end)) memory_start = (unsigned long)(&__init_begin); #endif diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 4258b088aa93..e49f918531ad 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -55,3 +55,4 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += xor.h diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S index 1d81c4c129ec..279d80725128 100644 --- a/arch/c6x/kernel/vmlinux.lds.S +++ b/arch/c6x/kernel/vmlinux.lds.S @@ -54,16 +54,15 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_begin = .; .init : { - _stext = .; _sinittext = .; HEAD_TEXT INIT_TEXT _einittext = .; } - __init_begin = _stext; INIT_DATA_SECTION(16) PERCPU_SECTION(128) @@ -74,6 +73,7 @@ SECTIONS .text : { _text = .; + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index a9fcd89b251b..63f5560d6eb2 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -18,6 +18,7 @@ #include <linux/initrd.h> #include <asm/sections.h> +#include <asm/uaccess.h> /* * ZERO_PAGE is a special page that is used for zero-initialized @@ -57,31 +58,22 @@ void __init paging_init(void) void __init mem_init(void) { - int codek, datak; - unsigned long tmp; - unsigned long len = memory_end - memory_start; - high_memory = (void *)(memory_end & PAGE_MASK); /* this will put all memory onto the freelists */ - totalram_pages = free_all_bootmem(); - - codek = (_etext - _stext) >> 10; - datak = (_end - _sdata) >> 10; + free_all_bootmem(); - tmp = nr_free_pages() << PAGE_SHIFT; - printk(KERN_INFO "Memory: %luk/%luk RAM (%dk kernel code, %dk data)\n", - tmp >> 10, len >> 10, codek, datak); + mem_init_print_info(NULL); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void __init free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 8769a9045a54..3201ddb8da6a 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -134,11 +134,13 @@ config SVINTO_SIM config ETRAXFS bool "ETRAX-FS-V32" + select CPU_FREQ_TABLE if CPU_FREQ help Support CRIS V32. config CRIS_MACH_ARTPEC3 bool "ARTPEC-3" + select CPU_FREQ_TABLE if CPU_FREQ help Support Axis ARTPEC-3. @@ -637,40 +639,10 @@ endchoice endmenu -source "drivers/base/Kconfig" - -# standard linux drivers -source "drivers/mtd/Kconfig" - -source "drivers/parport/Kconfig" - -source "drivers/pnp/Kconfig" - -source "drivers/block/Kconfig" - -source "drivers/ide/Kconfig" - -source "drivers/net/Kconfig" - -source "drivers/i2c/Kconfig" - -source "drivers/rtc/Kconfig" - -# -# input before char - char/joystick depends on it. As does USB. -# -source "drivers/input/Kconfig" - -source "drivers/char/Kconfig" +source "drivers/Kconfig" source "fs/Kconfig" -source "drivers/usb/Kconfig" - -source "drivers/uwb/Kconfig" - -source "drivers/staging/Kconfig" - source "arch/cris/Kconfig.debug" source "security/Kconfig" diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig index 5f2cdb3e428c..daf5f19b61a1 100644 --- a/arch/cris/arch-v10/drivers/Kconfig +++ b/arch/cris/arch-v10/drivers/Kconfig @@ -2,9 +2,7 @@ if ETRAX_ARCH_V10 config ETRAX_ETHERNET bool "Ethernet support" - depends on ETRAX_ARCH_V10 - select ETHERNET - select NET_CORE + depends on ETRAX_ARCH_V10 && NETDEVICES select MII help This option enables the ETRAX 100LX built-in 10/100Mbit Ethernet diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c index 37e6d2c50b76..22d846bfc570 100644 --- a/arch/cris/arch-v10/kernel/kgdb.c +++ b/arch/cris/arch-v10/kernel/kgdb.c @@ -230,46 +230,6 @@ struct register_image unsigned int usp; /* 0x66 User mode stack pointer */ } registers; -/************** Prototypes for local library functions ***********************/ - -/* Copy of strcpy from libc. */ -static char *gdb_cris_strcpy (char *s1, const char *s2); - -/* Copy of strlen from libc. */ -static int gdb_cris_strlen (const char *s); - -/* Copy of memchr from libc. */ -static void *gdb_cris_memchr (const void *s, int c, int n); - -/* Copy of strtol from libc. Does only support base 16. */ -static int gdb_cris_strtol (const char *s, char **endptr, int base); - -/********************** Prototypes for local functions. **********************/ -/* Copy the content of a register image into another. The size n is - the size of the register image. Due to struct assignment generation of - memcpy in libc. */ -static void copy_registers (registers *dptr, registers *sptr, int n); - -/* Copy the stored registers from the stack. Put the register contents - of thread thread_id in the struct reg. */ -static void copy_registers_from_stack (int thread_id, registers *reg); - -/* Copy the registers to the stack. Put the register contents of thread - thread_id from struct reg to the stack. */ -static void copy_registers_to_stack (int thread_id, registers *reg); - -/* Write a value to a specified register regno in the register image - of the current thread. */ -static int write_register (int regno, char *val); - -/* Write a value to a specified register in the stack of a thread other - than the current thread. */ -static int write_stack_register(int thread_id, int regno, char *valptr); - -/* Read a value from a specified register in the register image. Returns the - status of the read operation. The register value is returned in valptr. */ -static int read_register (char regno, unsigned int *valptr); - /* Serial port, reads one character. ETRAX 100 specific. from debugport.c */ int getDebugChar (void); @@ -278,42 +238,6 @@ void putDebugChar (int val); void enableDebugIRQ (void); -/* Returns the integer equivalent of a hexadecimal character. */ -static int hex (char ch); - -/* Convert the memory, pointed to by mem into hexadecimal representation. - Put the result in buf, and return a pointer to the last character - in buf (null). */ -static char *mem2hex (char *buf, unsigned char *mem, int count); - -/* Convert the array, in hexadecimal representation, pointed to by buf into - binary representation. Put the result in mem, and return a pointer to - the character after the last byte written. */ -static unsigned char *hex2mem (unsigned char *mem, char *buf, int count); - -/* Put the content of the array, in binary representation, pointed to by buf - into memory pointed to by mem, and return a pointer to - the character after the last byte written. */ -static unsigned char *bin2mem (unsigned char *mem, unsigned char *buf, int count); - -/* Await the sequence $<data>#<checksum> and store <data> in the array buffer - returned. */ -static void getpacket (char *buffer); - -/* Send $<data>#<checksum> from the <data> in the array buffer. */ -static void putpacket (char *buffer); - -/* Build and send a response packet in order to inform the host the - stub is stopped. */ -static void stub_is_stopped (int sigval); - -/* All expected commands are sent from remote.c. Send a response according - to the description in remote.c. */ -static void handle_exception (int sigval); - -/* Performs a complete re-start from scratch. ETRAX specific. */ -static void kill_restart (void); - /******************** Prototypes for global functions. ***********************/ /* The string str is prepended with the GDB printout token and sent. */ @@ -336,10 +260,6 @@ extern unsigned char executing_task; /* The number of characters used for a 64 bit thread identifier. */ #define HEXCHARS_IN_THREAD_ID 16 -/* Avoid warning as the internal_stack is not used in the C-code. */ -#define USEDVAR(name) { if (name) { ; } } -#define USEDFUN(name) { void (*pf)(void) = (void *)name; USEDVAR(pf) } - /********************************** Packet I/O ******************************/ /* BUFMAX defines the maximum number of characters in inbound/outbound buffers */ @@ -405,7 +325,7 @@ static int register_size[] = /* Contains the register image of the executing thread in the assembler part of the code in order to avoid horrible addressing modes. */ -static registers reg; +registers cris_reg; /* FIXME: Should this be used? Delete otherwise. */ /* Contains the assumed consistency state of the register image. Uses the @@ -413,7 +333,7 @@ static registers reg; static int consistency_status = SUCCESS; /********************************** Handle exceptions ************************/ -/* The variable reg contains the register image associated with the +/* The variable cris_reg contains the register image associated with the current_thread_c variable. It is a complete register image created at entry. The reg_g contains a register image of a task where the general registers are taken from the stack and all special registers are taken @@ -421,18 +341,10 @@ static int consistency_status = SUCCESS; in order to provide access mainly for 'g', 'G' and 'P'. */ -/* Need two task id pointers in order to handle Hct and Hgt commands. */ -static int current_thread_c = 0; -static int current_thread_g = 0; - -/* Need two register images in order to handle Hct and Hgt commands. The - variable reg_g is in addition to reg above. */ -static registers reg_g; - /********************************** Breakpoint *******************************/ /* Use an internal stack in the breakpoint and interrupt response routines */ #define INTERNAL_STACK_SIZE 1024 -static char internal_stack[INTERNAL_STACK_SIZE]; +char internal_stack[INTERNAL_STACK_SIZE]; /* Due to the breakpoint return pointer, a state variable is needed to keep track of whether it is a static (compiled) or dynamic (gdb-invoked) @@ -500,164 +412,6 @@ gdb_cris_strtol (const char *s, char **endptr, int base) return x; } -/********************************* Register image ****************************/ -/* Copy the content of a register image into another. The size n is - the size of the register image. Due to struct assignment generation of - memcpy in libc. */ -static void -copy_registers (registers *dptr, registers *sptr, int n) -{ - unsigned char *dreg; - unsigned char *sreg; - - for (dreg = (unsigned char*)dptr, sreg = (unsigned char*)sptr; n > 0; n--) - *dreg++ = *sreg++; -} - -#ifdef PROCESS_SUPPORT -/* Copy the stored registers from the stack. Put the register contents - of thread thread_id in the struct reg. */ -static void -copy_registers_from_stack (int thread_id, registers *regptr) -{ - int j; - stack_registers *s = (stack_registers *)stack_list[thread_id]; - unsigned int *d = (unsigned int *)regptr; - - for (j = 13; j >= 0; j--) - *d++ = s->r[j]; - regptr->sp = (unsigned int)stack_list[thread_id]; - regptr->pc = s->pc; - regptr->dccr = s->dccr; - regptr->srp = s->srp; -} - -/* Copy the registers to the stack. Put the register contents of thread - thread_id from struct reg to the stack. */ -static void -copy_registers_to_stack (int thread_id, registers *regptr) -{ - int i; - stack_registers *d = (stack_registers *)stack_list[thread_id]; - unsigned int *s = (unsigned int *)regptr; - - for (i = 0; i < 14; i++) { - d->r[i] = *s++; - } - d->pc = regptr->pc; - d->dccr = regptr->dccr; - d->srp = regptr->srp; -} -#endif - -/* Write a value to a specified register in the register image of the current - thread. Returns status code SUCCESS, E02 or E05. */ -static int -write_register (int regno, char *val) -{ - int status = SUCCESS; - registers *current_reg = ® - - if (regno >= R0 && regno <= PC) { - /* 32-bit register with simple offset. */ - hex2mem ((unsigned char *)current_reg + regno * sizeof(unsigned int), - val, sizeof(unsigned int)); - } - else if (regno == P0 || regno == VR || regno == P4 || regno == P8) { - /* Do not support read-only registers. */ - status = E02; - } - else if (regno == CCR) { - /* 16 bit register with complex offset. (P4 is read-only, P6 is not implemented, - and P7 (MOF) is 32 bits in ETRAX 100LX. */ - hex2mem ((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short), - val, sizeof(unsigned short)); - } - else if (regno >= MOF && regno <= USP) { - /* 32 bit register with complex offset. (P8 has been taken care of.) */ - hex2mem ((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int), - val, sizeof(unsigned int)); - } - else { - /* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */ - status = E05; - } - return status; -} - -#ifdef PROCESS_SUPPORT -/* Write a value to a specified register in the stack of a thread other - than the current thread. Returns status code SUCCESS or E07. */ -static int -write_stack_register (int thread_id, int regno, char *valptr) -{ - int status = SUCCESS; - stack_registers *d = (stack_registers *)stack_list[thread_id]; - unsigned int val; - - hex2mem ((unsigned char *)&val, valptr, sizeof(unsigned int)); - if (regno >= R0 && regno < SP) { - d->r[regno] = val; - } - else if (regno == SP) { - stack_list[thread_id] = val; - } - else if (regno == PC) { - d->pc = val; - } - else if (regno == SRP) { - d->srp = val; - } - else if (regno == DCCR) { - d->dccr = val; - } - else { - /* Do not support registers in the current thread. */ - status = E07; - } - return status; -} -#endif - -/* Read a value from a specified register in the register image. Returns the - value in the register or -1 for non-implemented registers. - Should check consistency_status after a call which may be E05 after changes - in the implementation. */ -static int -read_register (char regno, unsigned int *valptr) -{ - registers *current_reg = ® - - if (regno >= R0 && regno <= PC) { - /* 32-bit register with simple offset. */ - *valptr = *(unsigned int *)((char *)current_reg + regno * sizeof(unsigned int)); - return SUCCESS; - } - else if (regno == P0 || regno == VR) { - /* 8 bit register with complex offset. */ - *valptr = (unsigned int)(*(unsigned char *) - ((char *)&(current_reg->p0) + (regno-P0) * sizeof(char))); - return SUCCESS; - } - else if (regno == P4 || regno == CCR) { - /* 16 bit register with complex offset. */ - *valptr = (unsigned int)(*(unsigned short *) - ((char *)&(current_reg->p4) + (regno-P4) * sizeof(unsigned short))); - return SUCCESS; - } - else if (regno >= MOF && regno <= USP) { - /* 32 bit register with complex offset. */ - *valptr = *(unsigned int *)((char *)&(current_reg->p8) - + (regno-P8) * sizeof(unsigned int)); - return SUCCESS; - } - else { - /* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */ - consistency_status = E05; - return E05; - } -} - /********************************** Packet I/O ******************************/ /* Returns the integer equivalent of a hexadecimal character. */ static int @@ -676,8 +430,6 @@ hex (char ch) Put the result in buf, and return a pointer to the last character in buf (null). */ -static int do_printk = 0; - static char * mem2hex(char *buf, unsigned char *mem, int count) { @@ -761,7 +513,7 @@ getpacket (char *buffer) xmitcsum = -1; count = 0; /* Read until a # or the end of the buffer is reached */ - while (count < BUFMAX) { + while (count < BUFMAX - 1) { ch = getDebugChar (); if (ch == '#') break; @@ -845,6 +597,81 @@ putDebugString (const unsigned char *str, int length) putpacket(remcomOutBuffer); } +/********************************* Register image ****************************/ +/* Write a value to a specified register in the register image of the current + thread. Returns status code SUCCESS, E02 or E05. */ +static int +write_register (int regno, char *val) +{ + int status = SUCCESS; + registers *current_reg = &cris_reg; + + if (regno >= R0 && regno <= PC) { + /* 32-bit register with simple offset. */ + hex2mem ((unsigned char *)current_reg + regno * sizeof(unsigned int), + val, sizeof(unsigned int)); + } + else if (regno == P0 || regno == VR || regno == P4 || regno == P8) { + /* Do not support read-only registers. */ + status = E02; + } + else if (regno == CCR) { + /* 16 bit register with complex offset. (P4 is read-only, P6 is not implemented, + and P7 (MOF) is 32 bits in ETRAX 100LX. */ + hex2mem ((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short), + val, sizeof(unsigned short)); + } + else if (regno >= MOF && regno <= USP) { + /* 32 bit register with complex offset. (P8 has been taken care of.) */ + hex2mem ((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int), + val, sizeof(unsigned int)); + } + else { + /* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */ + status = E05; + } + return status; +} + +/* Read a value from a specified register in the register image. Returns the + value in the register or -1 for non-implemented registers. + Should check consistency_status after a call which may be E05 after changes + in the implementation. */ +static int +read_register (char regno, unsigned int *valptr) +{ + registers *current_reg = &cris_reg; + + if (regno >= R0 && regno <= PC) { + /* 32-bit register with simple offset. */ + *valptr = *(unsigned int *)((char *)current_reg + regno * sizeof(unsigned int)); + return SUCCESS; + } + else if (regno == P0 || regno == VR) { + /* 8 bit register with complex offset. */ + *valptr = (unsigned int)(*(unsigned char *) + ((char *)&(current_reg->p0) + (regno-P0) * sizeof(char))); + return SUCCESS; + } + else if (regno == P4 || regno == CCR) { + /* 16 bit register with complex offset. */ + *valptr = (unsigned int)(*(unsigned short *) + ((char *)&(current_reg->p4) + (regno-P4) * sizeof(unsigned short))); + return SUCCESS; + } + else if (regno >= MOF && regno <= USP) { + /* 32 bit register with complex offset. */ + *valptr = *(unsigned int *)((char *)&(current_reg->p8) + + (regno-P8) * sizeof(unsigned int)); + return SUCCESS; + } + else { + /* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */ + consistency_status = E05; + return E05; + } +} + /********************************** Handle exceptions ************************/ /* Build and send a response packet in order to inform the host the stub is stopped. TAAn...:r...;n...:r...;n...:r...; @@ -891,26 +718,6 @@ stub_is_stopped(int sigval) } -#ifdef PROCESS_SUPPORT - /* Store the registers of the executing thread. Assume that both step, - continue, and register content requests are with respect to this - thread. The executing task is from the operating system scheduler. */ - - current_thread_c = executing_task; - current_thread_g = executing_task; - - /* A struct assignment translates into a libc memcpy call. Avoid - all libc functions in order to prevent recursive break points. */ - copy_registers (®_g, ®, sizeof(registers)); - - /* Store thread:r...; with the executing task TID. */ - gdb_cris_strcpy (&remcomOutBuffer[pos], "thread:"); - pos += gdb_cris_strlen ("thread:"); - remcomOutBuffer[pos++] = hex_asc_hi(executing_task); - remcomOutBuffer[pos++] = hex_asc_lo(executing_task); - gdb_cris_strcpy (&remcomOutBuffer[pos], ";"); -#endif - /* null-terminate and send it off */ *ptr = 0; @@ -918,16 +725,18 @@ stub_is_stopped(int sigval) putpacket (remcomOutBuffer); } +/* Performs a complete re-start from scratch. */ +static void +kill_restart (void) +{ + machine_restart(""); +} + /* All expected commands are sent from remote.c. Send a response according to the description in remote.c. */ -static void +void handle_exception (int sigval) { - /* Avoid warning of not used. */ - - USEDFUN(handle_exception); - USEDVAR(internal_stack[0]); - /* Send response. */ stub_is_stopped (sigval); @@ -943,19 +752,7 @@ handle_exception (int sigval) in a register are in the same order the machine uses. Failure: void. */ - { -#ifdef PROCESS_SUPPORT - /* Use the special register content in the executing thread. */ - copy_registers (®_g, ®, sizeof(registers)); - /* Replace the content available on the stack. */ - if (current_thread_g != executing_task) { - copy_registers_from_stack (current_thread_g, ®_g); - } - mem2hex ((unsigned char *)remcomOutBuffer, (unsigned char *)®_g, sizeof(registers)); -#else - mem2hex(remcomOutBuffer, (char *)®, sizeof(registers)); -#endif - } + mem2hex(remcomOutBuffer, (char *)&cris_reg, sizeof(registers)); break; case 'G': @@ -963,17 +760,7 @@ handle_exception (int sigval) Each byte of register data is described by two hex digits. Success: OK Failure: void. */ -#ifdef PROCESS_SUPPORT - hex2mem ((unsigned char *)®_g, &remcomInBuffer[1], sizeof(registers)); - if (current_thread_g == executing_task) { - copy_registers (®, ®_g, sizeof(registers)); - } - else { - copy_registers_to_stack(current_thread_g, ®_g); - } -#else - hex2mem((char *)®, &remcomInBuffer[1], sizeof(registers)); -#endif + hex2mem((char *)&cris_reg, &remcomInBuffer[1], sizeof(registers)); gdb_cris_strcpy (remcomOutBuffer, "OK"); break; @@ -989,12 +776,7 @@ handle_exception (int sigval) char *suffix; int regno = gdb_cris_strtol (&remcomInBuffer[1], &suffix, 16); int status; -#ifdef PROCESS_SUPPORT - if (current_thread_g != executing_task) - status = write_stack_register (current_thread_g, regno, suffix+1); - else -#endif - status = write_register (regno, suffix+1); + status = write_register (regno, suffix+1); switch (status) { case E02: @@ -1073,7 +855,7 @@ handle_exception (int sigval) Success: return to the executing thread. Failure: will never know. */ if (remcomInBuffer[1] != '\0') { - reg.pc = gdb_cris_strtol (&remcomInBuffer[1], 0, 16); + cris_reg.pc = gdb_cris_strtol (&remcomInBuffer[1], 0, 16); } enableDebugIRQ(); return; @@ -1129,119 +911,6 @@ handle_exception (int sigval) Not supported: E04 */ gdb_cris_strcpy (remcomOutBuffer, error_message[E04]); break; -#ifdef PROCESS_SUPPORT - - case 'T': - /* Thread alive. TXX - Is thread XX alive? - Success: OK, thread XX is alive. - Failure: E03, thread XX is dead. */ - { - int thread_id = (int)gdb_cris_strtol (&remcomInBuffer[1], 0, 16); - /* Cannot tell whether it is alive or not. */ - if (thread_id >= 0 && thread_id < number_of_tasks) - gdb_cris_strcpy (remcomOutBuffer, "OK"); - } - break; - - case 'H': - /* Set thread for subsequent operations: Hct - c = 'c' for thread used in step and continue; - t can be -1 for all threads. - c = 'g' for thread used in other operations. - t = 0 means pick any thread. - Success: OK - Failure: E01 */ - { - int thread_id = gdb_cris_strtol (&remcomInBuffer[2], 0, 16); - if (remcomInBuffer[1] == 'c') { - /* c = 'c' for thread used in step and continue */ - /* Do not change current_thread_c here. It would create a mess in - the scheduler. */ - gdb_cris_strcpy (remcomOutBuffer, "OK"); - } - else if (remcomInBuffer[1] == 'g') { - /* c = 'g' for thread used in other operations. - t = 0 means pick any thread. Impossible since the scheduler does - not allow that. */ - if (thread_id >= 0 && thread_id < number_of_tasks) { - current_thread_g = thread_id; - gdb_cris_strcpy (remcomOutBuffer, "OK"); - } - else { - /* Not expected - send an error message. */ - gdb_cris_strcpy (remcomOutBuffer, error_message[E01]); - } - } - else { - /* Not expected - send an error message. */ - gdb_cris_strcpy (remcomOutBuffer, error_message[E01]); - } - } - break; - - case 'q': - case 'Q': - /* Query of general interest. qXXXX - Set general value XXXX. QXXXX=yyyy */ - { - int pos; - int nextpos; - int thread_id; - - switch (remcomInBuffer[1]) { - case 'C': - /* Identify the remote current thread. */ - gdb_cris_strcpy (&remcomOutBuffer[0], "QC"); - remcomOutBuffer[2] = hex_asc_hi(current_thread_c); - remcomOutBuffer[3] = hex_asc_lo(current_thread_c); - remcomOutBuffer[4] = '\0'; - break; - case 'L': - gdb_cris_strcpy (&remcomOutBuffer[0], "QM"); - /* Reply with number of threads. */ - if (os_is_started()) { - remcomOutBuffer[2] = hex_asc_hi(number_of_tasks); - remcomOutBuffer[3] = hex_asc_lo(number_of_tasks); - } - else { - remcomOutBuffer[2] = hex_asc_hi(0); - remcomOutBuffer[3] = hex_asc_lo(1); - } - /* Done with the reply. */ - remcomOutBuffer[4] = hex_asc_lo(1); - pos = 5; - /* Expects the argument thread id. */ - for (; pos < (5 + HEXCHARS_IN_THREAD_ID); pos++) - remcomOutBuffer[pos] = remcomInBuffer[pos]; - /* Reply with the thread identifiers. */ - if (os_is_started()) { - /* Store the thread identifiers of all tasks. */ - for (thread_id = 0; thread_id < number_of_tasks; thread_id++) { - nextpos = pos + HEXCHARS_IN_THREAD_ID - 1; - for (; pos < nextpos; pos ++) - remcomOutBuffer[pos] = hex_asc_lo(0); - remcomOutBuffer[pos++] = hex_asc_lo(thread_id); - } - } - else { - /* Store the thread identifier of the boot task. */ - nextpos = pos + HEXCHARS_IN_THREAD_ID - 1; - for (; pos < nextpos; pos ++) - remcomOutBuffer[pos] = hex_asc_lo(0); - remcomOutBuffer[pos++] = hex_asc_lo(current_thread_c); - } - remcomOutBuffer[pos] = '\0'; - break; - default: - /* Not supported: "" */ - /* Request information about section offsets: qOffsets. */ - remcomOutBuffer[0] = 0; - break; - } - } - break; -#endif /* PROCESS_SUPPORT */ default: /* The stub should ignore other request and send an empty @@ -1254,13 +923,6 @@ handle_exception (int sigval) } } -/* Performs a complete re-start from scratch. */ -static void -kill_restart () -{ - machine_restart(""); -} - /********************************** Breakpoint *******************************/ /* The hook for both a static (compiled) and a dynamic breakpoint set by GDB. An internal stack is used by the stub. The register image of the caller is @@ -1270,93 +932,93 @@ kill_restart () void kgdb_handle_breakpoint(void); -asm (" - .global kgdb_handle_breakpoint -kgdb_handle_breakpoint: -;; -;; Response to the break-instruction -;; -;; Create a register image of the caller -;; - move $dccr,[reg+0x5E] ; Save the flags in DCCR before disable interrupts - di ; Disable interrupts - move.d $r0,[reg] ; Save R0 - move.d $r1,[reg+0x04] ; Save R1 - move.d $r2,[reg+0x08] ; Save R2 - move.d $r3,[reg+0x0C] ; Save R3 - move.d $r4,[reg+0x10] ; Save R4 - move.d $r5,[reg+0x14] ; Save R5 - move.d $r6,[reg+0x18] ; Save R6 - move.d $r7,[reg+0x1C] ; Save R7 - move.d $r8,[reg+0x20] ; Save R8 - move.d $r9,[reg+0x24] ; Save R9 - move.d $r10,[reg+0x28] ; Save R10 - move.d $r11,[reg+0x2C] ; Save R11 - move.d $r12,[reg+0x30] ; Save R12 - move.d $r13,[reg+0x34] ; Save R13 - move.d $sp,[reg+0x38] ; Save SP (R14) -;; Due to the old assembler-versions BRP might not be recognized - .word 0xE670 ; move brp,$r0 - subq 2,$r0 ; Set to address of previous instruction. - move.d $r0,[reg+0x3c] ; Save the address in PC (R15) - clear.b [reg+0x40] ; Clear P0 - move $vr,[reg+0x41] ; Save special register P1 - clear.w [reg+0x42] ; Clear P4 - move $ccr,[reg+0x44] ; Save special register CCR - move $mof,[reg+0x46] ; P7 - clear.d [reg+0x4A] ; Clear P8 - move $ibr,[reg+0x4E] ; P9, - move $irp,[reg+0x52] ; P10, - move $srp,[reg+0x56] ; P11, - move $dtp0,[reg+0x5A] ; P12, register BAR, assembler might not know BAR - ; P13, register DCCR already saved -;; Due to the old assembler-versions BRP might not be recognized - .word 0xE670 ; move brp,r0 -;; Static (compiled) breakpoints must return to the next instruction in order -;; to avoid infinite loops. Dynamic (gdb-invoked) must restore the instruction -;; in order to execute it when execution is continued. - test.b [is_dyn_brkp] ; Is this a dynamic breakpoint? - beq is_static ; No, a static breakpoint - nop - subq 2,$r0 ; rerun the instruction the break replaced -is_static: - moveq 1,$r1 - move.b $r1,[is_dyn_brkp] ; Set the state variable to dynamic breakpoint - move.d $r0,[reg+0x62] ; Save the return address in BRP - move $usp,[reg+0x66] ; USP -;; -;; Handle the communication -;; - move.d internal_stack+1020,$sp ; Use the internal stack which grows upward - moveq 5,$r10 ; SIGTRAP - jsr handle_exception ; Interactive routine -;; -;; Return to the caller -;; - move.d [reg],$r0 ; Restore R0 - move.d [reg+0x04],$r1 ; Restore R1 - move.d [reg+0x08],$r2 ; Restore R2 - move.d [reg+0x0C],$r3 ; Restore R3 - move.d [reg+0x10],$r4 ; Restore R4 - move.d [reg+0x14],$r5 ; Restore R5 - move.d [reg+0x18],$r6 ; Restore R6 - move.d [reg+0x1C],$r7 ; Restore R7 - move.d [reg+0x20],$r8 ; Restore R8 - move.d [reg+0x24],$r9 ; Restore R9 - move.d [reg+0x28],$r10 ; Restore R10 - move.d [reg+0x2C],$r11 ; Restore R11 - move.d [reg+0x30],$r12 ; Restore R12 - move.d [reg+0x34],$r13 ; Restore R13 -;; -;; FIXME: Which registers should be restored? -;; - move.d [reg+0x38],$sp ; Restore SP (R14) - move [reg+0x56],$srp ; Restore the subroutine return pointer. - move [reg+0x5E],$dccr ; Restore DCCR - move [reg+0x66],$usp ; Restore USP - jump [reg+0x62] ; A jump to the content in register BRP works. - nop ; -"); +asm ("\n" +" .global kgdb_handle_breakpoint\n" +"kgdb_handle_breakpoint:\n" +";;\n" +";; Response to the break-instruction\n" +";;\n" +";; Create a register image of the caller\n" +";;\n" +" move $dccr,[cris_reg+0x5E] ; Save the flags in DCCR before disable interrupts\n" +" di ; Disable interrupts\n" +" move.d $r0,[cris_reg] ; Save R0\n" +" move.d $r1,[cris_reg+0x04] ; Save R1\n" +" move.d $r2,[cris_reg+0x08] ; Save R2\n" +" move.d $r3,[cris_reg+0x0C] ; Save R3\n" +" move.d $r4,[cris_reg+0x10] ; Save R4\n" +" move.d $r5,[cris_reg+0x14] ; Save R5\n" +" move.d $r6,[cris_reg+0x18] ; Save R6\n" +" move.d $r7,[cris_reg+0x1C] ; Save R7\n" +" move.d $r8,[cris_reg+0x20] ; Save R8\n" +" move.d $r9,[cris_reg+0x24] ; Save R9\n" +" move.d $r10,[cris_reg+0x28] ; Save R10\n" +" move.d $r11,[cris_reg+0x2C] ; Save R11\n" +" move.d $r12,[cris_reg+0x30] ; Save R12\n" +" move.d $r13,[cris_reg+0x34] ; Save R13\n" +" move.d $sp,[cris_reg+0x38] ; Save SP (R14)\n" +";; Due to the old assembler-versions BRP might not be recognized\n" +" .word 0xE670 ; move brp,$r0\n" +" subq 2,$r0 ; Set to address of previous instruction.\n" +" move.d $r0,[cris_reg+0x3c] ; Save the address in PC (R15)\n" +" clear.b [cris_reg+0x40] ; Clear P0\n" +" move $vr,[cris_reg+0x41] ; Save special register P1\n" +" clear.w [cris_reg+0x42] ; Clear P4\n" +" move $ccr,[cris_reg+0x44] ; Save special register CCR\n" +" move $mof,[cris_reg+0x46] ; P7\n" +" clear.d [cris_reg+0x4A] ; Clear P8\n" +" move $ibr,[cris_reg+0x4E] ; P9,\n" +" move $irp,[cris_reg+0x52] ; P10,\n" +" move $srp,[cris_reg+0x56] ; P11,\n" +" move $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n" +" ; P13, register DCCR already saved\n" +";; Due to the old assembler-versions BRP might not be recognized\n" +" .word 0xE670 ; move brp,r0\n" +";; Static (compiled) breakpoints must return to the next instruction in order\n" +";; to avoid infinite loops. Dynamic (gdb-invoked) must restore the instruction\n" +";; in order to execute it when execution is continued.\n" +" test.b [is_dyn_brkp] ; Is this a dynamic breakpoint?\n" +" beq is_static ; No, a static breakpoint\n" +" nop\n" +" subq 2,$r0 ; rerun the instruction the break replaced\n" +"is_static:\n" +" moveq 1,$r1\n" +" move.b $r1,[is_dyn_brkp] ; Set the state variable to dynamic breakpoint\n" +" move.d $r0,[cris_reg+0x62] ; Save the return address in BRP\n" +" move $usp,[cris_reg+0x66] ; USP\n" +";;\n" +";; Handle the communication\n" +";;\n" +" move.d internal_stack+1020,$sp ; Use the internal stack which grows upward\n" +" moveq 5,$r10 ; SIGTRAP\n" +" jsr handle_exception ; Interactive routine\n" +";;\n" +";; Return to the caller\n" +";;\n" +" move.d [cris_reg],$r0 ; Restore R0\n" +" move.d [cris_reg+0x04],$r1 ; Restore R1\n" +" move.d [cris_reg+0x08],$r2 ; Restore R2\n" +" move.d [cris_reg+0x0C],$r3 ; Restore R3\n" +" move.d [cris_reg+0x10],$r4 ; Restore R4\n" +" move.d [cris_reg+0x14],$r5 ; Restore R5\n" +" move.d [cris_reg+0x18],$r6 ; Restore R6\n" +" move.d [cris_reg+0x1C],$r7 ; Restore R7\n" +" move.d [cris_reg+0x20],$r8 ; Restore R8\n" +" move.d [cris_reg+0x24],$r9 ; Restore R9\n" +" move.d [cris_reg+0x28],$r10 ; Restore R10\n" +" move.d [cris_reg+0x2C],$r11 ; Restore R11\n" +" move.d [cris_reg+0x30],$r12 ; Restore R12\n" +" move.d [cris_reg+0x34],$r13 ; Restore R13\n" +";;\n" +";; FIXME: Which registers should be restored?\n" +";;\n" +" move.d [cris_reg+0x38],$sp ; Restore SP (R14)\n" +" move [cris_reg+0x56],$srp ; Restore the subroutine return pointer.\n" +" move [cris_reg+0x5E],$dccr ; Restore DCCR\n" +" move [cris_reg+0x66],$usp ; Restore USP\n" +" jump [cris_reg+0x62] ; A jump to the content in register BRP works.\n" +" nop ;\n" +"\n"); /* The hook for an interrupt generated by GDB. An internal stack is used by the stub. The register image of the caller is stored in the structure @@ -1367,94 +1029,94 @@ is_static: void kgdb_handle_serial(void); -asm (" - .global kgdb_handle_serial -kgdb_handle_serial: -;; -;; Response to a serial interrupt -;; - - move $dccr,[reg+0x5E] ; Save the flags in DCCR - di ; Disable interrupts - move.d $r0,[reg] ; Save R0 - move.d $r1,[reg+0x04] ; Save R1 - move.d $r2,[reg+0x08] ; Save R2 - move.d $r3,[reg+0x0C] ; Save R3 - move.d $r4,[reg+0x10] ; Save R4 - move.d $r5,[reg+0x14] ; Save R5 - move.d $r6,[reg+0x18] ; Save R6 - move.d $r7,[reg+0x1C] ; Save R7 - move.d $r8,[reg+0x20] ; Save R8 - move.d $r9,[reg+0x24] ; Save R9 - move.d $r10,[reg+0x28] ; Save R10 - move.d $r11,[reg+0x2C] ; Save R11 - move.d $r12,[reg+0x30] ; Save R12 - move.d $r13,[reg+0x34] ; Save R13 - move.d $sp,[reg+0x38] ; Save SP (R14) - move $irp,[reg+0x3c] ; Save the address in PC (R15) - clear.b [reg+0x40] ; Clear P0 - move $vr,[reg+0x41] ; Save special register P1, - clear.w [reg+0x42] ; Clear P4 - move $ccr,[reg+0x44] ; Save special register CCR - move $mof,[reg+0x46] ; P7 - clear.d [reg+0x4A] ; Clear P8 - move $ibr,[reg+0x4E] ; P9, - move $irp,[reg+0x52] ; P10, - move $srp,[reg+0x56] ; P11, - move $dtp0,[reg+0x5A] ; P12, register BAR, assembler might not know BAR - ; P13, register DCCR already saved -;; Due to the old assembler-versions BRP might not be recognized - .word 0xE670 ; move brp,r0 - move.d $r0,[reg+0x62] ; Save the return address in BRP - move $usp,[reg+0x66] ; USP - -;; get the serial character (from debugport.c) and check if it is a ctrl-c - - jsr getDebugChar - cmp.b 3, $r10 - bne goback - nop - - move.d [reg+0x5E], $r10 ; Get DCCR - btstq 8, $r10 ; Test the U-flag. - bmi goback - nop - -;; -;; Handle the communication -;; - move.d internal_stack+1020,$sp ; Use the internal stack - moveq 2,$r10 ; SIGINT - jsr handle_exception ; Interactive routine - -goback: -;; -;; Return to the caller -;; - move.d [reg],$r0 ; Restore R0 - move.d [reg+0x04],$r1 ; Restore R1 - move.d [reg+0x08],$r2 ; Restore R2 - move.d [reg+0x0C],$r3 ; Restore R3 - move.d [reg+0x10],$r4 ; Restore R4 - move.d [reg+0x14],$r5 ; Restore R5 - move.d [reg+0x18],$r6 ; Restore R6 - move.d [reg+0x1C],$r7 ; Restore R7 - move.d [reg+0x20],$r8 ; Restore R8 - move.d [reg+0x24],$r9 ; Restore R9 - move.d [reg+0x28],$r10 ; Restore R10 - move.d [reg+0x2C],$r11 ; Restore R11 - move.d [reg+0x30],$r12 ; Restore R12 - move.d [reg+0x34],$r13 ; Restore R13 -;; -;; FIXME: Which registers should be restored? -;; - move.d [reg+0x38],$sp ; Restore SP (R14) - move [reg+0x56],$srp ; Restore the subroutine return pointer. - move [reg+0x5E],$dccr ; Restore DCCR - move [reg+0x66],$usp ; Restore USP - reti ; Return from the interrupt routine - nop -"); +asm ("\n" +" .global kgdb_handle_serial\n" +"kgdb_handle_serial:\n" +";;\n" +";; Response to a serial interrupt\n" +";;\n" +"\n" +" move $dccr,[cris_reg+0x5E] ; Save the flags in DCCR\n" +" di ; Disable interrupts\n" +" move.d $r0,[cris_reg] ; Save R0\n" +" move.d $r1,[cris_reg+0x04] ; Save R1\n" +" move.d $r2,[cris_reg+0x08] ; Save R2\n" +" move.d $r3,[cris_reg+0x0C] ; Save R3\n" +" move.d $r4,[cris_reg+0x10] ; Save R4\n" +" move.d $r5,[cris_reg+0x14] ; Save R5\n" +" move.d $r6,[cris_reg+0x18] ; Save R6\n" +" move.d $r7,[cris_reg+0x1C] ; Save R7\n" +" move.d $r8,[cris_reg+0x20] ; Save R8\n" +" move.d $r9,[cris_reg+0x24] ; Save R9\n" +" move.d $r10,[cris_reg+0x28] ; Save R10\n" +" move.d $r11,[cris_reg+0x2C] ; Save R11\n" +" move.d $r12,[cris_reg+0x30] ; Save R12\n" +" move.d $r13,[cris_reg+0x34] ; Save R13\n" +" move.d $sp,[cris_reg+0x38] ; Save SP (R14)\n" +" move $irp,[cris_reg+0x3c] ; Save the address in PC (R15)\n" +" clear.b [cris_reg+0x40] ; Clear P0\n" +" move $vr,[cris_reg+0x41] ; Save special register P1,\n" +" clear.w [cris_reg+0x42] ; Clear P4\n" +" move $ccr,[cris_reg+0x44] ; Save special register CCR\n" +" move $mof,[cris_reg+0x46] ; P7\n" +" clear.d [cris_reg+0x4A] ; Clear P8\n" +" move $ibr,[cris_reg+0x4E] ; P9,\n" +" move $irp,[cris_reg+0x52] ; P10,\n" +" move $srp,[cris_reg+0x56] ; P11,\n" +" move $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n" +" ; P13, register DCCR already saved\n" +";; Due to the old assembler-versions BRP might not be recognized\n" +" .word 0xE670 ; move brp,r0\n" +" move.d $r0,[cris_reg+0x62] ; Save the return address in BRP\n" +" move $usp,[cris_reg+0x66] ; USP\n" +"\n" +";; get the serial character (from debugport.c) and check if it is a ctrl-c\n" +"\n" +" jsr getDebugChar\n" +" cmp.b 3, $r10\n" +" bne goback\n" +" nop\n" +"\n" +" move.d [cris_reg+0x5E], $r10 ; Get DCCR\n" +" btstq 8, $r10 ; Test the U-flag.\n" +" bmi goback\n" +" nop\n" +"\n" +";;\n" +";; Handle the communication\n" +";;\n" +" move.d internal_stack+1020,$sp ; Use the internal stack\n" +" moveq 2,$r10 ; SIGINT\n" +" jsr handle_exception ; Interactive routine\n" +"\n" +"goback:\n" +";;\n" +";; Return to the caller\n" +";;\n" +" move.d [cris_reg],$r0 ; Restore R0\n" +" move.d [cris_reg+0x04],$r1 ; Restore R1\n" +" move.d [cris_reg+0x08],$r2 ; Restore R2\n" +" move.d [cris_reg+0x0C],$r3 ; Restore R3\n" +" move.d [cris_reg+0x10],$r4 ; Restore R4\n" +" move.d [cris_reg+0x14],$r5 ; Restore R5\n" +" move.d [cris_reg+0x18],$r6 ; Restore R6\n" +" move.d [cris_reg+0x1C],$r7 ; Restore R7\n" +" move.d [cris_reg+0x20],$r8 ; Restore R8\n" +" move.d [cris_reg+0x24],$r9 ; Restore R9\n" +" move.d [cris_reg+0x28],$r10 ; Restore R10\n" +" move.d [cris_reg+0x2C],$r11 ; Restore R11\n" +" move.d [cris_reg+0x30],$r12 ; Restore R12\n" +" move.d [cris_reg+0x34],$r13 ; Restore R13\n" +";;\n" +";; FIXME: Which registers should be restored?\n" +";;\n" +" move.d [cris_reg+0x38],$sp ; Restore SP (R14)\n" +" move [cris_reg+0x56],$srp ; Restore the subroutine return pointer.\n" +" move [cris_reg+0x5E],$dccr ; Restore DCCR\n" +" move [cris_reg+0x66],$usp ; Restore USP\n" +" reti ; Return from the interrupt routine\n" +" nop\n" +"\n"); /* Use this static breakpoint in the start-up only. */ diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index ab725edbc680..1d866d3ee2f8 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -2,9 +2,7 @@ if ETRAX_ARCH_V32 config ETRAX_ETHERNET bool "Ethernet support" - depends on ETRAX_ARCH_V32 - select ETHERNET - select NET_CORE + depends on ETRAX_ARCH_V32 && NETDEVICES select MII help This option enables the ETRAX FS built-in 10/100Mbit Ethernet @@ -640,8 +638,6 @@ config ETRAX_STREAMCOPROC This option enables a driver for the stream co-processor for cryptographic operations. -source drivers/mmc/Kconfig - config ETRAX_MMC_IOP tristate "MMC/SD host driver using IO-processor" depends on ETRAX_ARCH_V32 && MMC @@ -833,9 +829,4 @@ config ETRAX_SPI_MMC_WP_GPIO_PIN The pin to use for the SD/MMC write-protect signal for a memory card. If defined as " " (space), the card is considered writable. -# Avoid choices causing non-working configs by conditionalizing the inclusion. -if ETRAX_SPI_MMC -source drivers/spi/Kconfig -endif - endif diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index f1e79edc9dd2..c8325455520e 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -5,5 +5,9 @@ header-y += arch-v32/ generic-y += clkdev.h generic-y += exec.h +generic-y += kvm_para.h +generic-y += linkage.h generic-y += module.h generic-y += trace_clock.h +generic-y += vga.h +generic-y += xor.h diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h index ac12ae2b9286..5d3047e5563b 100644 --- a/arch/cris/include/asm/io.h +++ b/arch/cris/include/asm/io.h @@ -167,6 +167,9 @@ static inline void outsl(unsigned int port, const void *addr, cris_iops->write_io(port, (void *)addr, 4, count); } +#define inb_p(port) inb(port) +#define outb_p(val, port) outb((val), (port)) + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access diff --git a/arch/cris/include/asm/linkage.h b/arch/cris/include/asm/linkage.h deleted file mode 100644 index 291c2d01c44f..000000000000 --- a/arch/cris/include/asm/linkage.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -/* Nothing to see here... */ - -#endif diff --git a/arch/cris/include/asm/page.h b/arch/cris/include/asm/page.h index be45ee366be9..dfc53f9b88ec 100644 --- a/arch/cris/include/asm/page.h +++ b/arch/cris/include/asm/page.h @@ -51,7 +51,6 @@ typedef struct page *pgtable_t; */ #define virt_to_page(kaddr) (mem_map + (((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT)) -#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid((unsigned)(kaddr) >> PAGE_SHIFT) /* convert a page (based on mem_map and forward) to a physical address diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index ba409c9947bc..47b1ec55092d 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -76,6 +76,8 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 9ac80946dada..c81af5bd9167 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -19,9 +19,6 @@ unsigned long empty_zero_page; void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - unsigned long tmp; - BUG_ON(!mem_map); /* max/min_low_pfn was set by setup.c @@ -29,35 +26,9 @@ mem_init(void) * * high_memory was also set in setup.c */ - - max_mapnr = num_physpages = max_low_pfn - min_low_pfn; - - /* this will put all memory onto the freelists */ - totalram_pages = free_all_bootmem(); - - reservedpages = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) { - /* - * Only count reserved RAM pages - */ - if (PageReserved(mem_map + tmp)) - reservedpages++; - } - - codesize = (unsigned long) &_etext - (unsigned long) &_stext; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO - "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, " - "%dk init)\n" , - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10 - ); + max_mapnr = max_low_pfn - min_low_pfn; + free_all_bootmem(); + mem_init_print_info(NULL); } /* free the pages occupied by initialization code */ @@ -65,5 +36,5 @@ mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 2ce731f9aa4d..4b6628ea381e 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -14,6 +14,7 @@ config FRV select ARCH_WANT_IPC_PARSE_VERSION select OLD_SIGSUSPEND3 select OLD_SIGACTION + select HAVE_DEBUG_STACKOVERFLOW config ZONE_DMA bool diff --git a/arch/frv/Kconfig.debug b/arch/frv/Kconfig.debug index 211f01bc4caa..98c99a3ed2be 100644 --- a/arch/frv/Kconfig.debug +++ b/arch/frv/Kconfig.debug @@ -2,10 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - config GDBSTUB bool "Remote GDB kernel debugging" depends on DEBUG_KERNEL diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 31dbb5d8e13d..dbc08520f22c 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -74,5 +74,7 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/kernel/head.S b/arch/frv/kernel/head.S index e9a8cc63ac94..a7d0bea9c036 100644 --- a/arch/frv/kernel/head.S +++ b/arch/frv/kernel/head.S @@ -479,11 +479,6 @@ __head_mmu_enabled: LEDS 0x000c - # initialise the processor and the peripherals - #call SYMBOL_NAME(processor_init) - #call SYMBOL_NAME(unit_init) - #LEDS 0x0aff - sethi.p #0xe5e5,gr3 setlo #0xe5e5,gr3 or.p gr3,gr0,gr4 diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index 0b579927439d..ac767d94a880 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len) /* * Send us to sleep. */ -static int sysctl_pm_do_suspend(ctl_table *ctl, int write, +static int sysctl_pm_do_suspend(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int mode; @@ -197,7 +197,7 @@ static int try_set_cmode(int new_cmode) } -static int cmode_procctl(ctl_table *ctl, int write, +static int cmode_procctl(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cmode; @@ -269,7 +269,7 @@ static int try_set_cm(int new_cm) return 0; } -static int p0_procctl(ctl_table *ctl, int write, +static int p0_procctl(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_p0; @@ -282,7 +282,7 @@ static int p0_procctl(ctl_table *ctl, int write, return try_set_p0(new_p0)?:*lenp; } -static int cm_procctl(ctl_table *ctl, int write, +static int cm_procctl(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cm; diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index a5136474c6fd..ae3a6706419b 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -735,7 +735,7 @@ static void __init parse_cmdline_early(char *cmdline) /* "mem=XXX[kKmM]" sets SDRAM size to <mem>, overriding the value we worked * out from the SDRAM controller mask register */ - if (!memcmp(cmdline, "mem=", 4)) { + if (!strncmp(cmdline, "mem=", 4)) { unsigned long long mem_size; mem_size = memparse(cmdline + 4, &cmdline); @@ -876,6 +876,7 @@ late_initcall(setup_arch_serial); static void __init setup_linux_memory(void) { unsigned long bootmap_size, low_top_pfn, kstart, kend, high_mem; + unsigned long physpages; kstart = (unsigned long) &__kernel_image_start - PAGE_OFFSET; kend = (unsigned long) &__kernel_image_end - PAGE_OFFSET; @@ -893,19 +894,19 @@ static void __init setup_linux_memory(void) ); /* pass the memory that the kernel can immediately use over to the bootmem allocator */ - max_mapnr = num_physpages = (memory_end - memory_start) >> PAGE_SHIFT; + max_mapnr = physpages = (memory_end - memory_start) >> PAGE_SHIFT; low_top_pfn = (KERNEL_LOWMEM_END - KERNEL_LOWMEM_START) >> PAGE_SHIFT; high_mem = 0; - if (num_physpages > low_top_pfn) { + if (physpages > low_top_pfn) { #ifdef CONFIG_HIGHMEM - high_mem = num_physpages - low_top_pfn; + high_mem = physpages - low_top_pfn; #else - max_mapnr = num_physpages = low_top_pfn; + max_mapnr = physpages = low_top_pfn; #endif } else { - low_top_pfn = num_physpages; + low_top_pfn = physpages; } min_low_pfn = memory_start >> PAGE_SHIFT; @@ -979,7 +980,7 @@ static void __init setup_uclinux_memory(void) free_bootmem(memory_start, memory_end - memory_start); high_memory = (void *) (memory_end & PAGE_MASK); - max_mapnr = num_physpages = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT; + max_mapnr = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT; min_low_pfn = memory_start >> PAGE_SHIFT; max_low_pfn = memory_end >> PAGE_SHIFT; diff --git a/arch/frv/kernel/sysctl.c b/arch/frv/kernel/sysctl.c index 6c155d69da29..f4dfae2c75ad 100644 --- a/arch/frv/kernel/sysctl.c +++ b/arch/frv/kernel/sysctl.c @@ -46,7 +46,7 @@ static void frv_change_dcache_mode(unsigned long newmode) /* * handle requests to dynamically switch the write caching mode delivered by /proc */ -static int procctl_frv_cachemode(ctl_table *table, int write, +static int procctl_frv_cachemode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -121,7 +121,7 @@ static int procctl_frv_cachemode(ctl_table *table, int write, * permit the mm_struct the nominated process is using have its MMU context ID pinned */ #ifdef CONFIG_MMU -static int procctl_frv_pin_cxnr(ctl_table *table, int write, +static int procctl_frv_pin_cxnr(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 4bff48c19d29..a6d105d61b26 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -523,7 +523,7 @@ void die_if_kernel(const char *str, ...) return; va_start(va, str); - vsprintf(buffer, str, va); + vsnprintf(buffer, sizeof(buffer), str, va); va_end(va); console_verbose(); diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index dee354fa6b64..88a159743528 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -78,7 +78,7 @@ void __init paging_init(void) memset((void *) empty_zero_page, 0, PAGE_SIZE); #ifdef CONFIG_HIGHMEM - if (num_physpages - num_mappedpages) { + if (get_num_physpages() - num_mappedpages) { pgd_t *pge; pud_t *pue; pmd_t *pme; @@ -96,7 +96,7 @@ void __init paging_init(void) */ zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = num_physpages - num_mappedpages; + zones_size[ZONE_HIGHMEM] = get_num_physpages() - num_mappedpages; #endif free_area_init(zones_size); @@ -114,45 +114,24 @@ void __init paging_init(void) */ void __init mem_init(void) { - unsigned long npages = (memory_end - memory_start) >> PAGE_SHIFT; - unsigned long tmp; -#ifdef CONFIG_MMU - unsigned long loop, pfn; - int datapages = 0; -#endif - int codek = 0, datak = 0; + unsigned long code_size = _etext - _stext; /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); - -#ifdef CONFIG_MMU - for (loop = 0 ; loop < npages ; loop++) - if (PageReserved(&mem_map[loop])) - datapages++; - -#ifdef CONFIG_HIGHMEM - for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--) - free_highmem_page(&mem_map[pfn]); -#endif - - codek = ((unsigned long) &_etext - (unsigned long) &_stext) >> 10; - datak = datapages << (PAGE_SHIFT - 10); - -#else - codek = (_etext - _stext) >> 10; - datak = 0; //(__bss_stop - _sdata) >> 10; + free_all_bootmem(); +#if defined(CONFIG_MMU) && defined(CONFIG_HIGHMEM) + { + unsigned long pfn; + + for (pfn = get_num_physpages() - 1; + pfn >= num_mappedpages; pfn--) + free_highmem_page(&mem_map[pfn]); + } #endif - tmp = nr_free_pages() << PAGE_SHIFT; - printk("Memory available: %luKiB/%luKiB RAM, %luKiB/%luKiB ROM (%dKiB kernel code, %dKiB data)\n", - tmp >> 10, - npages << (PAGE_SHIFT - 10), - (rom_length > 0) ? ((rom_length >> 10) - codek) : 0, - rom_length >> 10, - codek, - datak - ); - + mem_init_print_info(NULL); + if (rom_length > 0 && rom_length >= code_size) + printk("Memory available: %luKiB/%luKiB ROM\n", + (rom_length - code_size) >> 10, rom_length >> 10); } /* end mem_init() */ /*****************************************************************************/ @@ -162,7 +141,7 @@ void __init mem_init(void) void free_initmem(void) { #if defined(CONFIG_RAMKERNEL) && !defined(CONFIG_PROTECT_KERNEL) - free_initmem_default(0); + free_initmem_default(-1); #endif } /* end free_initmem() */ @@ -173,6 +152,6 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } /* end free_initrd_mem() */ #endif diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 303e4f9a79d1..3d6759ee382f 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -94,126 +94,10 @@ endmenu source "net/Kconfig" -source "drivers/base/Kconfig" - -source "drivers/mtd/Kconfig" - -source "drivers/block/Kconfig" - -source "drivers/ide/Kconfig" +source "drivers/Kconfig" source "arch/h8300/Kconfig.ide" -source "drivers/net/Kconfig" - -# -# input - input/joystick depends on it. As does USB. -# -source "drivers/input/Kconfig" - -menu "Character devices" - -config VT - bool "Virtual terminal" - ---help--- - If you say Y here, you will get support for terminal devices with - display and keyboard devices. These are called "virtual" because you - can run several virtual terminals (also called virtual consoles) on - one physical terminal. This is rather useful, for example one - virtual terminal can collect system messages and warnings, another - one can be used for a text-mode user session, and a third could run - an X session, all in parallel. Switching between virtual terminals - is done with certain key combinations, usually Alt-<function key>. - - The setterm command ("man setterm") can be used to change the - properties (such as colors or beeping) of a virtual terminal. The - man page console_codes(4) ("man console_codes") contains the special - character sequences that can be used to change those properties - directly. The fonts used on virtual terminals can be changed with - the setfont ("man setfont") command and the key bindings are defined - with the loadkeys ("man loadkeys") command. - - You need at least one virtual terminal device in order to make use - of your keyboard and monitor. Therefore, only people configuring an - embedded system would want to say N here in order to save some - memory; the only way to log into such a system is then via a serial - or network connection. - - If unsure, say Y, or else you won't be able to do much with your new - shiny Linux system :-) - -config VT_CONSOLE - bool "Support for console on virtual terminal" - depends on VT - ---help--- - The system console is the device which receives all kernel messages - and warnings and which allows logins in single user mode. If you - answer Y here, a virtual terminal (the device used to interact with - a physical terminal) can be used as system console. This is the most - common mode of operations, so you should say Y here unless you want - the kernel messages be output only to a serial port (in which case - you should say Y to "Console on serial port", below). - - If you do say Y here, by default the currently visible virtual - terminal (/dev/tty0) will be used as system console. You can change - that with a kernel command line option such as "console=tty3" which - would use the third virtual terminal as system console. (Try "man - bootparam" or see the documentation of your boot loader (lilo or - loadlin) about how to pass options to the kernel at boot time.) - - If unsure, say Y. - -config HW_CONSOLE - bool - depends on VT - default y - -comment "Unix98 PTY support" - -config UNIX98_PTYS - bool "Unix98 PTY support" - ---help--- - A pseudo terminal (PTY) is a software device consisting of two - halves: a master and a slave. The slave device behaves identical to - a physical terminal; the master device is used by a process to - read data from and write data to the slave, thereby emulating a - terminal. Typical programs for the master side are telnet servers - and xterms. - - Linux has traditionally used the BSD-like names /dev/ptyxx for - masters and /dev/ttyxx for slaves of pseudo terminals. This scheme - has a number of problems. The GNU C library glibc 2.1 and later, - however, supports the Unix98 naming standard: in order to acquire a - pseudo terminal, a process opens /dev/ptmx; the number of the pseudo - terminal is then made available to the process and the pseudo - terminal slave can be accessed as /dev/pts/<number>. What was - traditionally /dev/ttyp2 will then be /dev/pts/2, for example. - - The entries in /dev/pts/ are created on the fly by a virtual - file system; therefore, if you say Y here you should say Y to - "/dev/pts file system for Unix98 PTYs" as well. - - If you want to say Y here, you need to have the C library glibc 2.1 - or later (equal to libc-6.1, check with "ls -l /lib/libc.so.*"). - Read the instructions in <file:Documentation/Changes> pertaining to - pseudo terminals. It's safe to say N. - -source "drivers/char/pcmcia/Kconfig" - -source "drivers/tty/serial/Kconfig" - -source "drivers/i2c/Kconfig" - -source "drivers/hwmon/Kconfig" - -source "drivers/usb/Kconfig" - -source "drivers/uwb/Kconfig" - -endmenu - -source "drivers/staging/Kconfig" - source "fs/Kconfig" source "arch/h8300/Kconfig.debug" diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index 321f3922728b..cdee771460ed 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -64,6 +64,7 @@ choice config H83002 bool "H8/3001,3002,3003" + depends on BROKEN select CPU_H8300H config H83007 @@ -72,6 +73,7 @@ config H83007 config H83048 bool "H8/3044,3045,3046,3047,3048,3052" + depends on BROKEN select CPU_H8300H config H83068 @@ -155,10 +157,12 @@ config H8300_TIMER16_CH config H8300_ITU_CH int "ITU channel" depends on H8300_ITU + range 0 4 config H8300_TPU_CH int "TPU channel" depends on H8300_TPU + range 0 4 source "kernel/Kconfig.preempt" diff --git a/arch/h8300/boot/compressed/Makefile b/arch/h8300/boot/compressed/Makefile index 6745cb1ffb4f..a6c98fe3bbc3 100644 --- a/arch/h8300/boot/compressed/Makefile +++ b/arch/h8300/boot/compressed/Makefile @@ -16,7 +16,7 @@ OBJECTS = $(obj)/head.o $(obj)/misc.o # CONFIG_MEMORY_START ?= 0x00400000 CONFIG_BOOT_LINK_OFFSET ?= 0x00140000 -IMAGE_OFFSET := $(shell printf "0x%08x" $$[$(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET)]) +IMAGE_OFFSET := $(shell printf "0x%08x" $$(($(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET)))) LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup $(obj)/vmlinux.lds diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index 51ab6cbd030f..4a1e3dd43948 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c @@ -79,7 +79,6 @@ static void error(char *m); int puts(const char *); -extern int _text; /* Defined in vmlinux.lds.S */ extern int _end; static unsigned long free_mem_ptr; static unsigned long free_mem_end_ptr; diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 995eb47e01bb..8ada3cf0c98d 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -1,6 +1,8 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += linkage.h generic-y += mmu.h generic-y += module.h generic-y += trace_clock.h +generic-y += xor.h diff --git a/arch/h8300/include/asm/barrier.h b/arch/h8300/include/asm/barrier.h index c7283c343c55..9e0aa9fc195d 100644 --- a/arch/h8300/include/asm/barrier.h +++ b/arch/h8300/include/asm/barrier.h @@ -12,6 +12,8 @@ #define wmb() asm volatile ("" : : :"memory") #define set_mb(var, value) do { xchg(&var, value); } while (0) +#define read_barrier_depends() do { } while (0) + #ifdef CONFIG_SMP #define smp_mb() mb() #define smp_rmb() rmb() diff --git a/arch/h8300/include/asm/linkage.h b/arch/h8300/include/asm/linkage.h deleted file mode 100644 index 1d81604fb0ad..000000000000 --- a/arch/h8300/include/asm/linkage.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _H8300_LINKAGE_H -#define _H8300_LINKAGE_H - -#undef SYMBOL_NAME_LABEL -#define SYMBOL_NAME_LABEL(_name_) _##_name_##: -#endif diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h index 3dea80ad9e6f..7f0743051ad5 100644 --- a/arch/h8300/include/asm/tlb.h +++ b/arch/h8300/include/asm/tlb.h @@ -1,16 +1,3 @@ -/* - include/asm-h8300/tlb.h -*/ - -#ifndef __H8300_TLB_H__ -#define __H8300_TLB_H__ - -#define tlb_flush(tlb) do { } while(0) - -/* - include/asm-h8300/tlb.h -*/ - #ifndef __H8300_TLB_H__ #define __H8300_TLB_H__ @@ -19,5 +6,3 @@ #include <asm-generic/tlb.h> #endif - -#endif diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index 5d1c6d0870e6..a38d38a6520b 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -74,4 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S index 617a6878787f..94bd30f11df6 100644 --- a/arch/h8300/kernel/entry.S +++ b/arch/h8300/kernel/entry.S @@ -87,13 +87,13 @@ INTERRUPTS = 128 bne 5f /* user mode */ - mov.l sp,@SYMBOL_NAME(sw_usp) + mov.l sp,@_sw_usp mov.l @sp,er0 /* restore saved er0 */ orc #0x10,ccr /* switch kernel stack */ - mov.l @SYMBOL_NAME(sw_ksp),sp + mov.l @_sw_ksp,sp sub.l #(LRET-LORIG),sp /* allocate LORIG - LRET */ SAVEREGS - mov.l @SYMBOL_NAME(sw_usp),er0 + mov.l @_sw_usp,er0 mov.l @(USERRET:16,er0),er1 /* copy the RET addr */ mov.l er1,@(LRET-LER3:16,sp) SAVEEXR @@ -128,7 +128,7 @@ INTERRUPTS = 128 bne 7f orc #0x80,ccr - mov.l @SYMBOL_NAME(sw_usp),er0 + mov.l @_sw_usp,er0 mov.l @(LER0-LER1:16,sp),er1 /* restore ER0 */ mov.l er1,@er0 RESTOREEXR @@ -141,7 +141,7 @@ INTERRUPTS = 128 mov.l @sp+,er1 add.l #(LRET-LER1),sp /* remove LORIG - LRET */ - mov.l sp,@SYMBOL_NAME(sw_ksp) + mov.l sp,@_sw_ksp andc #0xef,ccr /* switch to user mode */ mov.l er0,sp bra 8f @@ -155,20 +155,20 @@ INTERRUPTS = 128 rte .endm -.globl SYMBOL_NAME(system_call) -.globl SYMBOL_NAME(ret_from_exception) -.globl SYMBOL_NAME(ret_from_fork) -.globl SYMBOL_NAME(ret_from_kernel_thread) -.globl SYMBOL_NAME(ret_from_interrupt) -.globl SYMBOL_NAME(interrupt_redirect_table) -.globl SYMBOL_NAME(sw_ksp),SYMBOL_NAME(sw_usp) -.globl SYMBOL_NAME(resume) -.globl SYMBOL_NAME(interrupt_entry) -.globl SYMBOL_NAME(trace_break) +.globl _system_call +.globl _ret_from_exception +.globl _ret_from_fork +.globl _ret_from_kernel_thread +.globl _ret_from_interrupt +.globl _interrupt_redirect_table +.globl _sw_ksp,_sw_usp +.globl _resume +.globl _interrupt_entry +.globl _trace_break #if defined(CONFIG_ROMKERNEL) .section .int_redirect,"ax" -SYMBOL_NAME_LABEL(interrupt_redirect_table) +_interrupt_redirect_table: #if defined(CONFIG_CPU_H8300H) .rept 7 .long 0 @@ -178,54 +178,54 @@ SYMBOL_NAME_LABEL(interrupt_redirect_table) .rept 5 .long 0 .endr - jmp @SYMBOL_NAME(trace_break) + jmp @_trace_break .long 0 #endif - jsr @SYMBOL_NAME(interrupt_entry) /* NMI */ - jmp @SYMBOL_NAME(system_call) /* TRAPA #0 (System call) */ + jsr @_interrupt_entry /* NMI */ + jmp @_system_call /* TRAPA #0 (System call) */ .long 0 .long 0 - jmp @SYMBOL_NAME(trace_break) /* TRAPA #3 (breakpoint) */ + jmp @_trace_break /* TRAPA #3 (breakpoint) */ .rept INTERRUPTS-12 - jsr @SYMBOL_NAME(interrupt_entry) + jsr @_interrupt_entry .endr #endif #if defined(CONFIG_RAMKERNEL) -.globl SYMBOL_NAME(interrupt_redirect_table) +.globl _interrupt_redirect_table .section .bss -SYMBOL_NAME_LABEL(interrupt_redirect_table) +_interrupt_redirect_table: .space 4 #endif .section .text .align 2 -SYMBOL_NAME_LABEL(interrupt_entry) +_interrupt_entry: SAVE_ALL mov.l sp,er0 add.l #LVEC,er0 btst #4,r1l bne 1f /* user LVEC */ - mov.l @SYMBOL_NAME(sw_usp),er0 + mov.l @_sw_usp,er0 adds #4,er0 1: mov.l @er0,er0 /* LVEC address */ #if defined(CONFIG_ROMKERNEL) - sub.l #SYMBOL_NAME(interrupt_redirect_table),er0 + sub.l #_interrupt_redirect_table,er0 #endif #if defined(CONFIG_RAMKERNEL) - mov.l @SYMBOL_NAME(interrupt_redirect_table),er1 + mov.l @_interrupt_redirect_table,er1 sub.l er1,er0 #endif SHLR2 er0 dec.l #1,er0 mov.l sp,er1 subs #4,er1 /* adjust ret_pc */ - jsr @SYMBOL_NAME(do_IRQ) - jmp @SYMBOL_NAME(ret_from_interrupt) + jsr @_do_IRQ + jmp @_ret_from_interrupt -SYMBOL_NAME_LABEL(system_call) +_system_call: subs #4,sp /* dummy LVEC */ SAVE_ALL andc #0x7f,ccr @@ -233,21 +233,21 @@ SYMBOL_NAME_LABEL(system_call) /* save top of frame */ mov.l sp,er0 - jsr @SYMBOL_NAME(set_esp0) + jsr @_set_esp0 mov.l sp,er2 and.w #0xe000,r2 mov.b @((TI_FLAGS+3-(TIF_SYSCALL_TRACE >> 3)):16,er2),r2l btst #(TIF_SYSCALL_TRACE & 7),r2l beq 1f - jsr @SYMBOL_NAME(do_syscall_trace) + jsr @_do_syscall_trace 1: cmp.l #NR_syscalls,er4 bcc badsys SHLL2 er4 - mov.l #SYMBOL_NAME(sys_call_table),er0 + mov.l #_sys_call_table,er0 add.l er4,er0 mov.l @er0,er4 - beq SYMBOL_NAME(ret_from_exception):16 + beq _ret_from_exception:16 mov.l @(LER1:16,sp),er0 mov.l @(LER2:16,sp),er1 mov.l @(LER3:16,sp),er2 @@ -258,10 +258,10 @@ SYMBOL_NAME_LABEL(system_call) mov.b @((TI_FLAGS+3-(TIF_SYSCALL_TRACE >> 3)):16,er2),r2l btst #(TIF_SYSCALL_TRACE & 7),r2l beq 2f - jsr @SYMBOL_NAME(do_syscall_trace) + jsr @_do_syscall_trace 2: #if defined(CONFIG_SYSCALL_PRINT) - jsr @SYMBOL_NAME(syscall_print) + jsr @_syscall_print #endif orc #0x80,ccr bra resume_userspace @@ -275,11 +275,11 @@ badsys: #define resume_kernel restore_all #endif -SYMBOL_NAME_LABEL(ret_from_exception) +_ret_from_exception: #if defined(CONFIG_PREEMPT) orc #0x80,ccr #endif -SYMBOL_NAME_LABEL(ret_from_interrupt) +_ret_from_interrupt: mov.b @(LCCR+1:16,sp),r0l btst #4,r0l bne resume_kernel:8 /* return from kernel */ @@ -296,12 +296,12 @@ work_pending: /* work notifysig */ mov.l sp,er0 subs #4,er0 /* er0: pt_regs */ - jsr @SYMBOL_NAME(do_notify_resume) + jsr @_do_notify_resume bra restore_all:8 work_resched: mov.l sp,er0 - jsr @SYMBOL_NAME(set_esp0) - jsr @SYMBOL_NAME(schedule) + jsr @_set_esp0 + jsr @_schedule bra resume_userspace:8 restore_all: RESTORE_ALL /* Does RTE */ @@ -320,26 +320,26 @@ need_resched: mov.l er0,@(TI_PRE_COUNT:16,er4) andc #0x7f,ccr mov.l sp,er0 - jsr @SYMBOL_NAME(set_esp0) - jsr @SYMBOL_NAME(schedule) + jsr @_set_esp0 + jsr @_schedule orc #0x80,ccr bra need_resched:8 #endif -SYMBOL_NAME_LABEL(ret_from_fork) +_ret_from_fork: mov.l er2,er0 - jsr @SYMBOL_NAME(schedule_tail) - jmp @SYMBOL_NAME(ret_from_exception) + jsr @_schedule_tail + jmp @_ret_from_exception -SYMBOL_NAME_LABEL(ret_from_kernel_thread) +_ret_from_kernel_thread: mov.l er2,er0 - jsr @SYMBOL_NAME(schedule_tail) + jsr @_schedule_tail mov.l @(LER4:16,sp),er0 mov.l @(LER5:16,sp),er1 jsr @er1 - jmp @SYMBOL_NAME(ret_from_exception) + jmp @_ret_from_exception -SYMBOL_NAME_LABEL(resume) +_resume: /* * Beware - when entering resume, offset of tss is in d1, * prev (the current task) is in a0, next (the new task) @@ -355,7 +355,7 @@ SYMBOL_NAME_LABEL(resume) /* disable interrupts */ orc #0x80,ccr - mov.l @SYMBOL_NAME(sw_usp),er3 + mov.l @_sw_usp,er3 mov.l er3,@(THREAD_USP:16,er0) mov.l sp,@(THREAD_KSP:16,er0) @@ -363,7 +363,7 @@ SYMBOL_NAME_LABEL(resume) /* FIXME: what did we hack out of here, this does nothing! */ mov.l @(THREAD_USP:16,er1),er0 - mov.l er0,@SYMBOL_NAME(sw_usp) + mov.l er0,@_sw_usp mov.l @(THREAD_KSP:16,er1),sp /* restore status register */ @@ -372,15 +372,15 @@ SYMBOL_NAME_LABEL(resume) ldc r3l,ccr rts -SYMBOL_NAME_LABEL(trace_break) +_trace_break: subs #4,sp SAVE_ALL sub.l er1,er1 dec.l #1,er1 mov.l er1,@(LORIG,sp) mov.l sp,er0 - jsr @SYMBOL_NAME(set_esp0) - mov.l @SYMBOL_NAME(sw_usp),er0 + jsr @_set_esp0 + mov.l @_sw_usp,er0 mov.l @er0,er1 mov.w @(-2:16,er1),r2 cmp.w #0x5730,r2 @@ -390,13 +390,13 @@ SYMBOL_NAME_LABEL(trace_break) 1: and.w #0xff,e1 mov.l er1,er0 - jsr @SYMBOL_NAME(trace_trap) - jmp @SYMBOL_NAME(ret_from_exception) + jsr @_trace_trap + jmp @_ret_from_exception .section .bss -SYMBOL_NAME_LABEL(sw_ksp) +_sw_ksp: .space 4 -SYMBOL_NAME_LABEL(sw_usp) +_sw_usp: .space 4 .end diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S index 5c2168fb9b9e..c55e0ed270d5 100644 --- a/arch/h8300/kernel/syscalls.S +++ b/arch/h8300/kernel/syscalls.S @@ -2,8 +2,10 @@ #include <linux/sys.h> #include <asm/linkage.h> #include <asm/unistd.h> - -.globl SYMBOL_NAME(sys_call_table) + +#define CALL(x) .long _ ## x + +.globl _sys_call_table #if defined(CONFIG_CPU_H8300H) .h8300h @@ -13,324 +15,324 @@ #endif .section .text .align 2 -SYMBOL_NAME_LABEL(sys_call_table) - .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ - .long SYMBOL_NAME(sys_exit) - .long SYMBOL_NAME(sys_fork) - .long SYMBOL_NAME(sys_read) - .long SYMBOL_NAME(sys_write) - .long SYMBOL_NAME(sys_open) /* 5 */ - .long SYMBOL_NAME(sys_close) - .long SYMBOL_NAME(sys_waitpid) - .long SYMBOL_NAME(sys_creat) - .long SYMBOL_NAME(sys_link) - .long SYMBOL_NAME(sys_unlink) /* 10 */ - .long SYMBOL_NAME(sys_execve) - .long SYMBOL_NAME(sys_chdir) - .long SYMBOL_NAME(sys_time) - .long SYMBOL_NAME(sys_mknod) - .long SYMBOL_NAME(sys_chmod) /* 15 */ - .long SYMBOL_NAME(sys_chown16) - .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ - .long SYMBOL_NAME(sys_stat) - .long SYMBOL_NAME(sys_lseek) - .long SYMBOL_NAME(sys_getpid) /* 20 */ - .long SYMBOL_NAME(sys_mount) - .long SYMBOL_NAME(sys_oldumount) - .long SYMBOL_NAME(sys_setuid16) - .long SYMBOL_NAME(sys_getuid16) - .long SYMBOL_NAME(sys_stime) /* 25 */ - .long SYMBOL_NAME(sys_ptrace) - .long SYMBOL_NAME(sys_alarm) - .long SYMBOL_NAME(sys_fstat) - .long SYMBOL_NAME(sys_pause) - .long SYMBOL_NAME(sys_utime) /* 30 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ - .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ - .long SYMBOL_NAME(sys_access) - .long SYMBOL_NAME(sys_nice) - .long SYMBOL_NAME(sys_ni_syscall) /* 35 old ftime syscall holder */ - .long SYMBOL_NAME(sys_sync) - .long SYMBOL_NAME(sys_kill) - .long SYMBOL_NAME(sys_rename) - .long SYMBOL_NAME(sys_mkdir) - .long SYMBOL_NAME(sys_rmdir) /* 40 */ - .long SYMBOL_NAME(sys_dup) - .long SYMBOL_NAME(sys_pipe) - .long SYMBOL_NAME(sys_times) - .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ - .long SYMBOL_NAME(sys_brk) /* 45 */ - .long SYMBOL_NAME(sys_setgid16) - .long SYMBOL_NAME(sys_getgid16) - .long SYMBOL_NAME(sys_signal) - .long SYMBOL_NAME(sys_geteuid16) - .long SYMBOL_NAME(sys_getegid16) /* 50 */ - .long SYMBOL_NAME(sys_acct) - .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ - .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ - .long SYMBOL_NAME(sys_ioctl) - .long SYMBOL_NAME(sys_fcntl) /* 55 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ - .long SYMBOL_NAME(sys_setpgid) - .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_umask) /* 60 */ - .long SYMBOL_NAME(sys_chroot) - .long SYMBOL_NAME(sys_ustat) - .long SYMBOL_NAME(sys_dup2) - .long SYMBOL_NAME(sys_getppid) - .long SYMBOL_NAME(sys_getpgrp) /* 65 */ - .long SYMBOL_NAME(sys_setsid) - .long SYMBOL_NAME(sys_sigaction) - .long SYMBOL_NAME(sys_sgetmask) - .long SYMBOL_NAME(sys_ssetmask) - .long SYMBOL_NAME(sys_setreuid16) /* 70 */ - .long SYMBOL_NAME(sys_setregid16) - .long SYMBOL_NAME(sys_sigsuspend) - .long SYMBOL_NAME(sys_sigpending) - .long SYMBOL_NAME(sys_sethostname) - .long SYMBOL_NAME(sys_setrlimit) /* 75 */ - .long SYMBOL_NAME(sys_old_getrlimit) - .long SYMBOL_NAME(sys_getrusage) - .long SYMBOL_NAME(sys_gettimeofday) - .long SYMBOL_NAME(sys_settimeofday) - .long SYMBOL_NAME(sys_getgroups16) /* 80 */ - .long SYMBOL_NAME(sys_setgroups16) - .long SYMBOL_NAME(sys_old_select) - .long SYMBOL_NAME(sys_symlink) - .long SYMBOL_NAME(sys_lstat) - .long SYMBOL_NAME(sys_readlink) /* 85 */ - .long SYMBOL_NAME(sys_uselib) - .long SYMBOL_NAME(sys_swapon) - .long SYMBOL_NAME(sys_reboot) - .long SYMBOL_NAME(sys_old_readdir) - .long SYMBOL_NAME(sys_old_mmap) /* 90 */ - .long SYMBOL_NAME(sys_munmap) - .long SYMBOL_NAME(sys_truncate) - .long SYMBOL_NAME(sys_ftruncate) - .long SYMBOL_NAME(sys_fchmod) - .long SYMBOL_NAME(sys_fchown16) /* 95 */ - .long SYMBOL_NAME(sys_getpriority) - .long SYMBOL_NAME(sys_setpriority) - .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ - .long SYMBOL_NAME(sys_statfs) - .long SYMBOL_NAME(sys_fstatfs) /* 100 */ - .long SYMBOL_NAME(sys_ni_syscall) /* ioperm for i386 */ - .long SYMBOL_NAME(sys_socketcall) - .long SYMBOL_NAME(sys_syslog) - .long SYMBOL_NAME(sys_setitimer) - .long SYMBOL_NAME(sys_getitimer) /* 105 */ - .long SYMBOL_NAME(sys_newstat) - .long SYMBOL_NAME(sys_newlstat) - .long SYMBOL_NAME(sys_newfstat) - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_ni_syscall) /* iopl for i386 */ /* 110 */ - .long SYMBOL_NAME(sys_vhangup) - .long SYMBOL_NAME(sys_ni_syscall) /* obsolete idle() syscall */ - .long SYMBOL_NAME(sys_ni_syscall) /* vm86old for i386 */ - .long SYMBOL_NAME(sys_wait4) - .long SYMBOL_NAME(sys_swapoff) /* 115 */ - .long SYMBOL_NAME(sys_sysinfo) - .long SYMBOL_NAME(sys_ipc) - .long SYMBOL_NAME(sys_fsync) - .long SYMBOL_NAME(sys_sigreturn) - .long SYMBOL_NAME(sys_clone) /* 120 */ - .long SYMBOL_NAME(sys_setdomainname) - .long SYMBOL_NAME(sys_newuname) - .long SYMBOL_NAME(sys_cacheflush) /* modify_ldt for i386 */ - .long SYMBOL_NAME(sys_adjtimex) - .long SYMBOL_NAME(sys_ni_syscall) /* 125 sys_mprotect */ - .long SYMBOL_NAME(sys_sigprocmask) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_create_module */ - .long SYMBOL_NAME(sys_init_module) - .long SYMBOL_NAME(sys_delete_module) - .long SYMBOL_NAME(sys_ni_syscall) /* 130 sys_get_kernel_syms */ - .long SYMBOL_NAME(sys_quotactl) - .long SYMBOL_NAME(sys_getpgid) - .long SYMBOL_NAME(sys_fchdir) - .long SYMBOL_NAME(sys_bdflush) - .long SYMBOL_NAME(sys_sysfs) /* 135 */ - .long SYMBOL_NAME(sys_personality) - .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ - .long SYMBOL_NAME(sys_setfsuid16) - .long SYMBOL_NAME(sys_setfsgid16) - .long SYMBOL_NAME(sys_llseek) /* 140 */ - .long SYMBOL_NAME(sys_getdents) - .long SYMBOL_NAME(sys_select) - .long SYMBOL_NAME(sys_flock) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_msync */ - .long SYMBOL_NAME(sys_readv) /* 145 */ - .long SYMBOL_NAME(sys_writev) - .long SYMBOL_NAME(sys_getsid) - .long SYMBOL_NAME(sys_fdatasync) - .long SYMBOL_NAME(sys_sysctl) - .long SYMBOL_NAME(sys_ni_syscall) /* 150 sys_mlock */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_munlock */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_mlockall */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_munlockall */ - .long SYMBOL_NAME(sys_sched_setparam) - .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ - .long SYMBOL_NAME(sys_sched_setscheduler) - .long SYMBOL_NAME(sys_sched_getscheduler) - .long SYMBOL_NAME(sys_sched_yield) - .long SYMBOL_NAME(sys_sched_get_priority_max) - .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ - .long SYMBOL_NAME(sys_sched_rr_get_interval) - .long SYMBOL_NAME(sys_nanosleep) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_mremap */ - .long SYMBOL_NAME(sys_setresuid16) - .long SYMBOL_NAME(sys_getresuid16) /* 165 */ - .long SYMBOL_NAME(sys_ni_syscall) /* for vm86 */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_query_module */ - .long SYMBOL_NAME(sys_poll) - .long SYMBOL_NAME(sys_ni_syscall) /* old nfsservctl */ - .long SYMBOL_NAME(sys_setresgid16) /* 170 */ - .long SYMBOL_NAME(sys_getresgid16) - .long SYMBOL_NAME(sys_prctl) - .long SYMBOL_NAME(sys_rt_sigreturn) - .long SYMBOL_NAME(sys_rt_sigaction) - .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ - .long SYMBOL_NAME(sys_rt_sigpending) - .long SYMBOL_NAME(sys_rt_sigtimedwait) - .long SYMBOL_NAME(sys_rt_sigqueueinfo) - .long SYMBOL_NAME(sys_rt_sigsuspend) - .long SYMBOL_NAME(sys_pread64) /* 180 */ - .long SYMBOL_NAME(sys_pwrite64) - .long SYMBOL_NAME(sys_lchown16); - .long SYMBOL_NAME(sys_getcwd) - .long SYMBOL_NAME(sys_capget) - .long SYMBOL_NAME(sys_capset) /* 185 */ - .long SYMBOL_NAME(sys_sigaltstack) - .long SYMBOL_NAME(sys_sendfile) - .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ - .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ - .long SYMBOL_NAME(sys_vfork) /* 190 */ - .long SYMBOL_NAME(sys_getrlimit) - .long SYMBOL_NAME(sys_mmap_pgoff) - .long SYMBOL_NAME(sys_truncate64) - .long SYMBOL_NAME(sys_ftruncate64) - .long SYMBOL_NAME(sys_stat64) /* 195 */ - .long SYMBOL_NAME(sys_lstat64) - .long SYMBOL_NAME(sys_fstat64) - .long SYMBOL_NAME(sys_chown) - .long SYMBOL_NAME(sys_getuid) - .long SYMBOL_NAME(sys_getgid) /* 200 */ - .long SYMBOL_NAME(sys_geteuid) - .long SYMBOL_NAME(sys_getegid) - .long SYMBOL_NAME(sys_setreuid) - .long SYMBOL_NAME(sys_setregid) - .long SYMBOL_NAME(sys_getgroups) /* 205 */ - .long SYMBOL_NAME(sys_setgroups) - .long SYMBOL_NAME(sys_fchown) - .long SYMBOL_NAME(sys_setresuid) - .long SYMBOL_NAME(sys_getresuid) - .long SYMBOL_NAME(sys_setresgid) /* 210 */ - .long SYMBOL_NAME(sys_getresgid) - .long SYMBOL_NAME(sys_lchown) - .long SYMBOL_NAME(sys_setuid) - .long SYMBOL_NAME(sys_setgid) - .long SYMBOL_NAME(sys_setfsuid) /* 215 */ - .long SYMBOL_NAME(sys_setfsgid) - .long SYMBOL_NAME(sys_pivot_root) - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_getdents64) /* 220 */ - .long SYMBOL_NAME(sys_fcntl64) - .long SYMBOL_NAME(sys_ni_syscall) /* reserved TUX */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved Security */ - .long SYMBOL_NAME(sys_gettid) - .long SYMBOL_NAME(sys_readahead) /* 225 */ - .long SYMBOL_NAME(sys_setxattr) - .long SYMBOL_NAME(sys_lsetxattr) - .long SYMBOL_NAME(sys_fsetxattr) - .long SYMBOL_NAME(sys_getxattr) - .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ - .long SYMBOL_NAME(sys_fgetxattr) - .long SYMBOL_NAME(sys_listxattr) - .long SYMBOL_NAME(sys_llistxattr) - .long SYMBOL_NAME(sys_flistxattr) - .long SYMBOL_NAME(sys_removexattr) /* 235 */ - .long SYMBOL_NAME(sys_lremovexattr) - .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - .long SYMBOL_NAME(sys_sendfile64) - .long SYMBOL_NAME(sys_futex) /* 240 */ - .long SYMBOL_NAME(sys_sched_setaffinity) - .long SYMBOL_NAME(sys_sched_getaffinity) - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_io_setup) /* 245 */ - .long SYMBOL_NAME(sys_io_destroy) - .long SYMBOL_NAME(sys_io_getevents) - .long SYMBOL_NAME(sys_io_submit) - .long SYMBOL_NAME(sys_io_cancel) - .long SYMBOL_NAME(sys_fadvise64) /* 250 */ - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_exit_group) - .long SYMBOL_NAME(sys_lookup_dcookie) - .long SYMBOL_NAME(sys_epoll_create) - .long SYMBOL_NAME(sys_epoll_ctl) /* 255 */ - .long SYMBOL_NAME(sys_epoll_wait) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ - .long SYMBOL_NAME(sys_set_tid_address) - .long SYMBOL_NAME(sys_timer_create) - .long SYMBOL_NAME(sys_timer_settime) /* 260 */ - .long SYMBOL_NAME(sys_timer_gettime) - .long SYMBOL_NAME(sys_timer_getoverrun) - .long SYMBOL_NAME(sys_timer_delete) - .long SYMBOL_NAME(sys_clock_settime) - .long SYMBOL_NAME(sys_clock_gettime) /* 265 */ - .long SYMBOL_NAME(sys_clock_getres) - .long SYMBOL_NAME(sys_clock_nanosleep) - .long SYMBOL_NAME(sys_statfs64) - .long SYMBOL_NAME(sys_fstatfs64) - .long SYMBOL_NAME(sys_tgkill) /* 270 */ - .long SYMBOL_NAME(sys_utimes) - .long SYMBOL_NAME(sys_fadvise64_64) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_vserver */ - .long SYMBOL_NAME(sys_ni_syscall) - .long SYMBOL_NAME(sys_get_mempolicy) /* 275 */ - .long SYMBOL_NAME(sys_set_mempolicy) - .long SYMBOL_NAME(sys_mq_open) - .long SYMBOL_NAME(sys_mq_unlink) - .long SYMBOL_NAME(sys_mq_timedsend) - .long SYMBOL_NAME(sys_mq_timedreceive) /* 280 */ - .long SYMBOL_NAME(sys_mq_notify) - .long SYMBOL_NAME(sys_mq_getsetattr) - .long SYMBOL_NAME(sys_waitid) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_kexec_load */ - .long SYMBOL_NAME(sys_add_key) /* 285 */ - .long SYMBOL_NAME(sys_request_key) - .long SYMBOL_NAME(sys_keyctl) - .long SYMBOL_NAME(sys_ioprio_set) - .long SYMBOL_NAME(sys_ioprio_get) /* 290 */ - .long SYMBOL_NAME(sys_inotify_init) - .long SYMBOL_NAME(sys_inotify_add_watch) - .long SYMBOL_NAME(sys_inotify_rm_watch) - .long SYMBOL_NAME(sys_migrate_pages) - .long SYMBOL_NAME(sys_openat) /* 295 */ - .long SYMBOL_NAME(sys_mkdirat) - .long SYMBOL_NAME(sys_mknodat) - .long SYMBOL_NAME(sys_fchownat) - .long SYMBOL_NAME(sys_futimesat) - .long SYMBOL_NAME(sys_fstatat64) /* 300 */ - .long SYMBOL_NAME(sys_unlinkat) - .long SYMBOL_NAME(sys_renameat) - .long SYMBOL_NAME(sys_linkat) - .long SYMBOL_NAME(sys_symlinkat) - .long SYMBOL_NAME(sys_readlinkat) /* 305 */ - .long SYMBOL_NAME(sys_fchmodat) - .long SYMBOL_NAME(sys_faccessat) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_pselect6 */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_ppoll */ - .long SYMBOL_NAME(sys_unshare) /* 310 */ - .long SYMBOL_NAME(sys_set_robust_list) - .long SYMBOL_NAME(sys_get_robust_list) - .long SYMBOL_NAME(sys_splice) - .long SYMBOL_NAME(sys_sync_file_range) - .long SYMBOL_NAME(sys_tee) /* 315 */ - .long SYMBOL_NAME(sys_vmsplice) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_move_pages */ - .long SYMBOL_NAME(sys_getcpu) - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_pwait */ - .long SYMBOL_NAME(sys_setns) /* 320 */ +_sys_call_table: + CALL(sys_ni_syscall) /* 0 - old "setup()" system call*/ + CALL(sys_exit) + CALL(sys_fork) + CALL(sys_read) + CALL(sys_write) + CALL(sys_open) /* 5 */ + CALL(sys_close) + CALL(sys_waitpid) + CALL(sys_creat) + CALL(sys_link) + CALL(sys_unlink) /* 10 */ + CALL(sys_execve) + CALL(sys_chdir) + CALL(sys_time) + CALL(sys_mknod) + CALL(sys_chmod) /* 15 */ + CALL(sys_chown16) + CALL(sys_ni_syscall) /* old break syscall holder */ + CALL(sys_stat) + CALL(sys_lseek) + CALL(sys_getpid) /* 20 */ + CALL(sys_mount) + CALL(sys_oldumount) + CALL(sys_setuid16) + CALL(sys_getuid16) + CALL(sys_stime) /* 25 */ + CALL(sys_ptrace) + CALL(sys_alarm) + CALL(sys_fstat) + CALL(sys_pause) + CALL(sys_utime) /* 30 */ + CALL(sys_ni_syscall) /* old stty syscall holder */ + CALL(sys_ni_syscall) /* old gtty syscall holder */ + CALL(sys_access) + CALL(sys_nice) + CALL(sys_ni_syscall) /* 35 old ftime syscall holder */ + CALL(sys_sync) + CALL(sys_kill) + CALL(sys_rename) + CALL(sys_mkdir) + CALL(sys_rmdir) /* 40 */ + CALL(sys_dup) + CALL(sys_pipe) + CALL(sys_times) + CALL(sys_ni_syscall) /* old prof syscall holder */ + CALL(sys_brk) /* 45 */ + CALL(sys_setgid16) + CALL(sys_getgid16) + CALL(sys_signal) + CALL(sys_geteuid16) + CALL(sys_getegid16) /* 50 */ + CALL(sys_acct) + CALL(sys_umount) /* recycled never used phys() */ + CALL(sys_ni_syscall) /* old lock syscall holder */ + CALL(sys_ioctl) + CALL(sys_fcntl) /* 55 */ + CALL(sys_ni_syscall) /* old mpx syscall holder */ + CALL(sys_setpgid) + CALL(sys_ni_syscall) /* old ulimit syscall holder */ + CALL(sys_ni_syscall) + CALL(sys_umask) /* 60 */ + CALL(sys_chroot) + CALL(sys_ustat) + CALL(sys_dup2) + CALL(sys_getppid) + CALL(sys_getpgrp) /* 65 */ + CALL(sys_setsid) + CALL(sys_sigaction) + CALL(sys_sgetmask) + CALL(sys_ssetmask) + CALL(sys_setreuid16) /* 70 */ + CALL(sys_setregid16) + CALL(sys_sigsuspend) + CALL(sys_sigpending) + CALL(sys_sethostname) + CALL(sys_setrlimit) /* 75 */ + CALL(sys_old_getrlimit) + CALL(sys_getrusage) + CALL(sys_gettimeofday) + CALL(sys_settimeofday) + CALL(sys_getgroups16) /* 80 */ + CALL(sys_setgroups16) + CALL(sys_old_select) + CALL(sys_symlink) + CALL(sys_lstat) + CALL(sys_readlink) /* 85 */ + CALL(sys_uselib) + CALL(sys_swapon) + CALL(sys_reboot) + CALL(sys_old_readdir) + CALL(sys_old_mmap) /* 90 */ + CALL(sys_munmap) + CALL(sys_truncate) + CALL(sys_ftruncate) + CALL(sys_fchmod) + CALL(sys_fchown16) /* 95 */ + CALL(sys_getpriority) + CALL(sys_setpriority) + CALL(sys_ni_syscall) /* old profil syscall holder */ + CALL(sys_statfs) + CALL(sys_fstatfs) /* 100 */ + CALL(sys_ni_syscall) /* ioperm for i386 */ + CALL(sys_socketcall) + CALL(sys_syslog) + CALL(sys_setitimer) + CALL(sys_getitimer) /* 105 */ + CALL(sys_newstat) + CALL(sys_newlstat) + CALL(sys_newfstat) + CALL(sys_ni_syscall) + CALL(sys_ni_syscall) /* iopl for i386 */ /* 110 */ + CALL(sys_vhangup) + CALL(sys_ni_syscall) /* obsolete idle() syscall */ + CALL(sys_ni_syscall) /* vm86old for i386 */ + CALL(sys_wait4) + CALL(sys_swapoff) /* 115 */ + CALL(sys_sysinfo) + CALL(sys_ipc) + CALL(sys_fsync) + CALL(sys_sigreturn) + CALL(sys_clone) /* 120 */ + CALL(sys_setdomainname) + CALL(sys_newuname) + CALL(sys_cacheflush) /* modify_ldt for i386 */ + CALL(sys_adjtimex) + CALL(sys_ni_syscall) /* 125 sys_mprotect */ + CALL(sys_sigprocmask) + CALL(sys_ni_syscall) /* sys_create_module */ + CALL(sys_init_module) + CALL(sys_delete_module) + CALL(sys_ni_syscall) /* 130 sys_get_kernel_syms */ + CALL(sys_quotactl) + CALL(sys_getpgid) + CALL(sys_fchdir) + CALL(sys_bdflush) + CALL(sys_sysfs) /* 135 */ + CALL(sys_personality) + CALL(sys_ni_syscall) /* for afs_syscall */ + CALL(sys_setfsuid16) + CALL(sys_setfsgid16) + CALL(sys_llseek) /* 140 */ + CALL(sys_getdents) + CALL(sys_select) + CALL(sys_flock) + CALL(sys_ni_syscall) /* sys_msync */ + CALL(sys_readv) /* 145 */ + CALL(sys_writev) + CALL(sys_getsid) + CALL(sys_fdatasync) + CALL(sys_sysctl) + CALL(sys_ni_syscall) /* 150 sys_mlock */ + CALL(sys_ni_syscall) /* sys_munlock */ + CALL(sys_ni_syscall) /* sys_mlockall */ + CALL(sys_ni_syscall) /* sys_munlockall */ + CALL(sys_sched_setparam) + CALL(sys_sched_getparam) /* 155 */ + CALL(sys_sched_setscheduler) + CALL(sys_sched_getscheduler) + CALL(sys_sched_yield) + CALL(sys_sched_get_priority_max) + CALL(sys_sched_get_priority_min) /* 160 */ + CALL(sys_sched_rr_get_interval) + CALL(sys_nanosleep) + CALL(sys_ni_syscall) /* sys_mremap */ + CALL(sys_setresuid16) + CALL(sys_getresuid16) /* 165 */ + CALL(sys_ni_syscall) /* for vm86 */ + CALL(sys_ni_syscall) /* sys_query_module */ + CALL(sys_poll) + CALL(sys_ni_syscall) /* old nfsservctl */ + CALL(sys_setresgid16) /* 170 */ + CALL(sys_getresgid16) + CALL(sys_prctl) + CALL(sys_rt_sigreturn) + CALL(sys_rt_sigaction) + CALL(sys_rt_sigprocmask) /* 175 */ + CALL(sys_rt_sigpending) + CALL(sys_rt_sigtimedwait) + CALL(sys_rt_sigqueueinfo) + CALL(sys_rt_sigsuspend) + CALL(sys_pread64) /* 180 */ + CALL(sys_pwrite64) + CALL(sys_lchown16); + CALL(sys_getcwd) + CALL(sys_capget) + CALL(sys_capset) /* 185 */ + CALL(sys_sigaltstack) + CALL(sys_sendfile) + CALL(sys_ni_syscall) /* streams1 */ + CALL(sys_ni_syscall) /* streams2 */ + CALL(sys_vfork) /* 190 */ + CALL(sys_getrlimit) + CALL(sys_mmap_pgoff) + CALL(sys_truncate64) + CALL(sys_ftruncate64) + CALL(sys_stat64) /* 195 */ + CALL(sys_lstat64) + CALL(sys_fstat64) + CALL(sys_chown) + CALL(sys_getuid) + CALL(sys_getgid) /* 200 */ + CALL(sys_geteuid) + CALL(sys_getegid) + CALL(sys_setreuid) + CALL(sys_setregid) + CALL(sys_getgroups) /* 205 */ + CALL(sys_setgroups) + CALL(sys_fchown) + CALL(sys_setresuid) + CALL(sys_getresuid) + CALL(sys_setresgid) /* 210 */ + CALL(sys_getresgid) + CALL(sys_lchown) + CALL(sys_setuid) + CALL(sys_setgid) + CALL(sys_setfsuid) /* 215 */ + CALL(sys_setfsgid) + CALL(sys_pivot_root) + CALL(sys_ni_syscall) + CALL(sys_ni_syscall) + CALL(sys_getdents64) /* 220 */ + CALL(sys_fcntl64) + CALL(sys_ni_syscall) /* reserved TUX */ + CALL(sys_ni_syscall) /* reserved Security */ + CALL(sys_gettid) + CALL(sys_readahead) /* 225 */ + CALL(sys_setxattr) + CALL(sys_lsetxattr) + CALL(sys_fsetxattr) + CALL(sys_getxattr) + CALL(sys_lgetxattr) /* 230 */ + CALL(sys_fgetxattr) + CALL(sys_listxattr) + CALL(sys_llistxattr) + CALL(sys_flistxattr) + CALL(sys_removexattr) /* 235 */ + CALL(sys_lremovexattr) + CALL(sys_fremovexattr) + CALL(sys_tkill) + CALL(sys_sendfile64) + CALL(sys_futex) /* 240 */ + CALL(sys_sched_setaffinity) + CALL(sys_sched_getaffinity) + CALL(sys_ni_syscall) + CALL(sys_ni_syscall) + CALL(sys_io_setup) /* 245 */ + CALL(sys_io_destroy) + CALL(sys_io_getevents) + CALL(sys_io_submit) + CALL(sys_io_cancel) + CALL(sys_fadvise64) /* 250 */ + CALL(sys_ni_syscall) + CALL(sys_exit_group) + CALL(sys_lookup_dcookie) + CALL(sys_epoll_create) + CALL(sys_epoll_ctl) /* 255 */ + CALL(sys_epoll_wait) + CALL(sys_ni_syscall) /* sys_remap_file_pages */ + CALL(sys_set_tid_address) + CALL(sys_timer_create) + CALL(sys_timer_settime) /* 260 */ + CALL(sys_timer_gettime) + CALL(sys_timer_getoverrun) + CALL(sys_timer_delete) + CALL(sys_clock_settime) + CALL(sys_clock_gettime) /* 265 */ + CALL(sys_clock_getres) + CALL(sys_clock_nanosleep) + CALL(sys_statfs64) + CALL(sys_fstatfs64) + CALL(sys_tgkill) /* 270 */ + CALL(sys_utimes) + CALL(sys_fadvise64_64) + CALL(sys_ni_syscall) /* sys_vserver */ + CALL(sys_ni_syscall) + CALL(sys_get_mempolicy) /* 275 */ + CALL(sys_set_mempolicy) + CALL(sys_mq_open) + CALL(sys_mq_unlink) + CALL(sys_mq_timedsend) + CALL(sys_mq_timedreceive) /* 280 */ + CALL(sys_mq_notify) + CALL(sys_mq_getsetattr) + CALL(sys_waitid) + CALL(sys_ni_syscall) /* sys_kexec_load */ + CALL(sys_add_key) /* 285 */ + CALL(sys_request_key) + CALL(sys_keyctl) + CALL(sys_ioprio_set) + CALL(sys_ioprio_get) /* 290 */ + CALL(sys_inotify_init) + CALL(sys_inotify_add_watch) + CALL(sys_inotify_rm_watch) + CALL(sys_migrate_pages) + CALL(sys_openat) /* 295 */ + CALL(sys_mkdirat) + CALL(sys_mknodat) + CALL(sys_fchownat) + CALL(sys_futimesat) + CALL(sys_fstatat64) /* 300 */ + CALL(sys_unlinkat) + CALL(sys_renameat) + CALL(sys_linkat) + CALL(sys_symlinkat) + CALL(sys_readlinkat) /* 305 */ + CALL(sys_fchmodat) + CALL(sys_faccessat) + CALL(sys_ni_syscall) /* sys_pselect6 */ + CALL(sys_ni_syscall) /* sys_ppoll */ + CALL(sys_unshare) /* 310 */ + CALL(sys_set_robust_list) + CALL(sys_get_robust_list) + CALL(sys_splice) + CALL(sys_sync_file_range) + CALL(sys_tee) /* 315 */ + CALL(sys_vmsplice) + CALL(sys_ni_syscall) /* sys_move_pages */ + CALL(sys_getcpu) + CALL(sys_ni_syscall) /* sys_epoll_pwait */ + CALL(sys_setns) /* 320 */ diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 03d356d96e5d..3253fed42ac1 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -132,10 +132,12 @@ SECTIONS { . = ALIGN(0x4) ; __sbss = . ; + ___bss_start = . ; *(.bss*) . = ALIGN(0x4) ; *(COMMON) . = ALIGN(0x4) ; + ___bss_stop = . ; __ebss = . ; __end = . ; __ramstart = .; diff --git a/arch/h8300/lib/abs.S b/arch/h8300/lib/abs.S index cabdd46b41db..ddd1fb3d01ad 100644 --- a/arch/h8300/lib/abs.S +++ b/arch/h8300/lib/abs.S @@ -9,10 +9,10 @@ .h8300s #endif .text -.global SYMBOL_NAME(abs) +.global _abs ;;; int abs(int n) -SYMBOL_NAME_LABEL(abs) +_abs: mov.l er0,er0 bpl 1f neg.l er0 diff --git a/arch/h8300/lib/memcpy.S b/arch/h8300/lib/memcpy.S index fdcbc1ee673c..cad325e2c0e8 100644 --- a/arch/h8300/lib/memcpy.S +++ b/arch/h8300/lib/memcpy.S @@ -10,10 +10,10 @@ #endif .text -.global SYMBOL_NAME(memcpy) +.global _memcpy ;;; void *memcpy(void *to, void *from, size_t n) -SYMBOL_NAME_LABEL(memcpy) +_memcpy: mov.l er2,er2 bne 1f rts diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S index 59abdf9485a5..4549a64c5b79 100644 --- a/arch/h8300/lib/memset.S +++ b/arch/h8300/lib/memset.S @@ -10,13 +10,13 @@ #endif .text -.global SYMBOL_NAME(memset) +.global _memset ;;void *memset(*ptr, int c, size_t count) ;; ptr = er0 ;; c = er1(r1l) ;; count = er2 -SYMBOL_NAME_LABEL(memset) +_memset: btst #0,r0l beq 2f diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index ff349d70a29b..6c1251e491af 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -121,47 +121,27 @@ void __init paging_init(void) void __init mem_init(void) { - int codek = 0, datak = 0, initk = 0; - /* DAVIDM look at setup memory map generically with reserved area */ - unsigned long tmp; - extern unsigned long _ramend, _ramstart; - unsigned long len = &_ramend - &_ramstart; - unsigned long start_mem = memory_start; /* DAVIDM - these must start at end of kernel */ - unsigned long end_mem = memory_end; /* DAVIDM - this must not include kernel stack at top */ + unsigned long codesize = _etext - _stext; -#ifdef DEBUG - printk(KERN_DEBUG "Mem_init: start=%lx, end=%lx\n", start_mem, end_mem); -#endif + pr_devel("Mem_init: start=%lx, end=%lx\n", memory_start, memory_end); - end_mem &= PAGE_MASK; - high_memory = (void *) end_mem; - - start_mem = PAGE_ALIGN(start_mem); - max_mapnr = num_physpages = MAP_NR(high_memory); + high_memory = (void *) (memory_end & PAGE_MASK); + max_mapnr = MAP_NR(high_memory); /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); - - codek = (_etext - _stext) >> 10; - datak = (__bss_stop - _sdata) >> 10; - initk = (__init_begin - __init_end) >> 10; - - tmp = nr_free_pages() << PAGE_SHIFT; - printk(KERN_INFO "Memory available: %luk/%luk RAM, %luk/%luk ROM (%dk kernel code, %dk data)\n", - tmp >> 10, - len >> 10, - (rom_length > 0) ? ((rom_length >> 10) - codek) : 0, - rom_length >> 10, - codek, - datak - ); + free_all_bootmem(); + + mem_init_print_info(NULL); + if (rom_length > 0 && rom_length > codesize) + pr_info("Memory available: %luK/%luK ROM\n", + (rom_length - codesize) >> 10, rom_length >> 10); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif @@ -169,7 +149,7 @@ void free_initmem(void) { #ifdef CONFIG_RAMKERNEL - free_initmem_default(0); + free_initmem_default(-1); #endif } diff --git a/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S b/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S index ecaeb31ae9a4..b2ad0f2d0417 100644 --- a/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S +++ b/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S @@ -22,10 +22,10 @@ #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS #endif - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global _command_line + .global __platform_gpio_table + .global __target_name .h8300h @@ -33,7 +33,7 @@ .file "crt0_ram.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #RAMEND,sp ldc #0x80,ccr @@ -59,13 +59,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(command_line),er6 + mov.l #_command_line,er6 mov.w #512,r4 eepmov.w /* uClinux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: @@ -107,4 +107,4 @@ __target_name: .asciz "AE-3068" .section .bootvec,"ax" - jmp @SYMBOL_NAME(_start) + jmp @__start diff --git a/arch/h8300/platform/h8300h/generic/crt0_ram.S b/arch/h8300/platform/h8300h/generic/crt0_ram.S index 80d0e16a4499..5ab7d9c12910 100644 --- a/arch/h8300/platform/h8300h/generic/crt0_ram.S +++ b/arch/h8300/platform/h8300h/generic/crt0_ram.S @@ -22,10 +22,10 @@ #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS #endif - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global _command_line + .global __platform_gpio_table + .global __target_name .h8300h @@ -33,7 +33,7 @@ .file "crt0_ram.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #RAMEND,sp ldc #0x80,ccr @@ -59,13 +59,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(command_line),er6 + mov.l #_command_line,er6 mov.w #512,r4 eepmov.w /* uClinux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: diff --git a/arch/h8300/platform/h8300h/generic/crt0_rom.S b/arch/h8300/platform/h8300h/generic/crt0_rom.S index 120add7ca832..dda1dfa15a5e 100644 --- a/arch/h8300/platform/h8300h/generic/crt0_rom.S +++ b/arch/h8300/platform/h8300h/generic/crt0_rom.S @@ -12,17 +12,17 @@ #include <asm/linkage.h> - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(_command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global __command_line + .global __platform_gpio_table + .global __target_name .h8300h .section .text .file "crt0_rom.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #__ramend,sp ldc #0x80,ccr @@ -60,13 +60,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(_command_line),er6 + mov.l #__command_line,er6 mov.w #512,r4 eepmov.w /* linux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: diff --git a/arch/h8300/platform/h8300h/h8max/crt0_ram.S b/arch/h8300/platform/h8300h/h8max/crt0_ram.S index efcbefb91b67..6a0d4e2d9ec6 100644 --- a/arch/h8300/platform/h8300h/h8max/crt0_ram.S +++ b/arch/h8300/platform/h8300h/h8max/crt0_ram.S @@ -22,10 +22,10 @@ #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS #endif - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global _command_line + .global __platform_gpio_table + .global __target_name .h8300h @@ -33,7 +33,7 @@ .file "crt0_ram.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #RAMEND,sp ldc #0x80,ccr @@ -59,13 +59,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(command_line),er6 + mov.l #_command_line,er6 mov.w #512,r4 eepmov.w /* uClinux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: @@ -107,4 +107,4 @@ __target_name: .asciz "H8MAX" .section .bootvec,"ax" - jmp @SYMBOL_NAME(_start) + jmp @__start diff --git a/arch/h8300/platform/h8s/edosk2674/crt0_ram.S b/arch/h8300/platform/h8s/edosk2674/crt0_ram.S index d12b0debe478..5ed191b37cde 100644 --- a/arch/h8300/platform/h8s/edosk2674/crt0_ram.S +++ b/arch/h8300/platform/h8s/edosk2674/crt0_ram.S @@ -23,10 +23,10 @@ #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS #endif - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(_command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global __command_line + .global __platform_gpio_table + .global __target_name .h8300s @@ -34,7 +34,7 @@ .file "crt0_ram.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #RAMEND,sp ldc #0x80,ccr ldc #0x00,exr @@ -66,13 +66,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(command_line),er6 + mov.l #_command_line,er6 mov.w #512,r4 eepmov.w /* uClinux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: @@ -127,4 +127,4 @@ __target_name: .asciz "EDOSK-2674" .section .bootvec,"ax" - jmp @SYMBOL_NAME(_start) + jmp @__start diff --git a/arch/h8300/platform/h8s/edosk2674/crt0_rom.S b/arch/h8300/platform/h8s/edosk2674/crt0_rom.S index c03d23c6fe12..06d1d7f324ca 100644 --- a/arch/h8300/platform/h8s/edosk2674/crt0_rom.S +++ b/arch/h8300/platform/h8s/edosk2674/crt0_rom.S @@ -13,17 +13,17 @@ #include <asm/linkage.h> #include <asm/regs267x.h> - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(_command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global __command_line + .global __platform_gpio_table + .global __target_name .h8300s .section .text .file "crt0_rom.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #__ramend,sp ldc #0x80,ccr ldc #0,exr @@ -82,13 +82,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(_command_line),er6 + mov.l #__command_line,er6 mov.w #512,r4 eepmov.w /* linux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: diff --git a/arch/h8300/platform/h8s/generic/crt0_ram.S b/arch/h8300/platform/h8s/generic/crt0_ram.S index b04541069976..7018915de74f 100644 --- a/arch/h8300/platform/h8s/generic/crt0_ram.S +++ b/arch/h8300/platform/h8s/generic/crt0_ram.S @@ -23,10 +23,10 @@ #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS #endif - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(_command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global __command_line + .global __platform_gpio_table + .global __target_name .h8300s @@ -34,7 +34,7 @@ .file "crt0_ram.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #RAMEND,sp ldc #0x80,ccr ldc #0x00,exr @@ -63,13 +63,13 @@ SYMBOL_NAME_LABEL(_start) /* copy kernel commandline */ mov.l #COMMAND_START,er5 - mov.l #SYMBOL_NAME(command_line),er6 + mov.l #_command_line,er6 mov.w #512,r4 eepmov.w /* uClinux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: @@ -124,4 +124,4 @@ __target_name: .asciz "generic" .section .bootvec,"ax" - jmp @SYMBOL_NAME(_start) + jmp @__start diff --git a/arch/h8300/platform/h8s/generic/crt0_rom.S b/arch/h8300/platform/h8s/generic/crt0_rom.S index 95b6f2898f52..623ba7828193 100644 --- a/arch/h8300/platform/h8s/generic/crt0_rom.S +++ b/arch/h8300/platform/h8s/generic/crt0_rom.S @@ -13,17 +13,17 @@ #include <asm/linkage.h> #include <asm/regs267x.h> - .global SYMBOL_NAME(_start) - .global SYMBOL_NAME(_command_line) - .global SYMBOL_NAME(_platform_gpio_table) - .global SYMBOL_NAME(_target_name) + .global __start + .global __command_line + .global __platform_gpio_table + .global __target_name .h8300s .section .text .file "crt0_rom.S" /* CPU Reset entry */ -SYMBOL_NAME_LABEL(_start) +__start: mov.l #__ramend,sp ldc #0x80,ccr ldc #0,exr @@ -61,7 +61,7 @@ SYMBOL_NAME_LABEL(_start) /* linux kernel start */ ldc #0x90,ccr /* running kernel */ - mov.l #SYMBOL_NAME(init_thread_union),sp + mov.l #_init_thread_union,sp add.l #0x2000,sp jsr @_start_kernel _exit: diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 2561d259a296..88977e42af0a 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -70,10 +70,8 @@ unsigned long long kmap_generation; void __init mem_init(void) { /* No idea where this is actually declared. Seems to evade LXR. */ - totalram_pages += free_all_bootmem(); - num_physpages = bootmem_lastpg-ARCH_PFN_OFFSET; - - printk(KERN_INFO "totalram_pages = %ld\n", totalram_pages); + free_all_bootmem(); + mem_init_print_info(NULL); /* * To-Do: someone somewhere should wipe out the bootmem map diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index bcda5b2d121a..d43daf192b21 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2042,7 +2042,8 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) #endif static int __init -acpi_sba_ioc_add(struct acpi_device *device) +acpi_sba_ioc_add(struct acpi_device *device, + const struct acpi_device_id *not_used) { struct ioc *ioc; acpi_status status; @@ -2090,14 +2091,18 @@ static const struct acpi_device_id hp_ioc_iommu_device_ids[] = { {"HWP0004", 0}, {"", 0}, }; -static struct acpi_driver acpi_sba_ioc_driver = { - .name = "IOC IOMMU Driver", - .ids = hp_ioc_iommu_device_ids, - .ops = { - .add = acpi_sba_ioc_add, - }, +static struct acpi_scan_handler acpi_sba_ioc_handler = { + .ids = hp_ioc_iommu_device_ids, + .attach = acpi_sba_ioc_add, }; +static int __init acpi_sba_ioc_init_acpi(void) +{ + return acpi_scan_add_handler(&acpi_sba_ioc_handler); +} +/* This has to run before acpi_scan_init(). */ +arch_initcall(acpi_sba_ioc_init_acpi); + extern struct dma_map_ops swiotlb_dma_ops; static int __init @@ -2122,7 +2127,10 @@ sba_init(void) } #endif - acpi_bus_register_driver(&acpi_sba_ioc_driver); + /* + * ioc_list should be populated by the acpi_sba_ioc_handler's .attach() + * routine, but that only happens if acpi_scan_init() has already run. + */ if (!ioc_list) { #ifdef CONFIG_IA64_GENERIC /* diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c index 271f412bda1a..87bf9ad8cf0f 100644 --- a/arch/ia64/hp/sim/boot/fw-emu.c +++ b/arch/ia64/hp/sim/boot/fw-emu.c @@ -290,16 +290,16 @@ sys_fw_init (const char *args, int arglen) efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); - efi_runtime->get_time = __pa(&fw_efi_get_time); - efi_runtime->set_time = __pa(&efi_unimplemented); - efi_runtime->get_wakeup_time = __pa(&efi_unimplemented); - efi_runtime->set_wakeup_time = __pa(&efi_unimplemented); - efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented); - efi_runtime->get_variable = __pa(&efi_unimplemented); - efi_runtime->get_next_variable = __pa(&efi_unimplemented); - efi_runtime->set_variable = __pa(&efi_unimplemented); - efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented); - efi_runtime->reset_system = __pa(&efi_reset_system); + efi_runtime->get_time = (void *)__pa(&fw_efi_get_time); + efi_runtime->set_time = (void *)__pa(&efi_unimplemented); + efi_runtime->get_wakeup_time = (void *)__pa(&efi_unimplemented); + efi_runtime->set_wakeup_time = (void *)__pa(&efi_unimplemented); + efi_runtime->set_virtual_address_map = (void *)__pa(&efi_unimplemented); + efi_runtime->get_variable = (void *)__pa(&efi_unimplemented); + efi_runtime->get_next_variable = (void *)__pa(&efi_unimplemented); + efi_runtime->set_variable = (void *)__pa(&efi_unimplemented); + efi_runtime->get_next_high_mono_count = (void *)__pa(&efi_unimplemented); + efi_runtime->reset_system = (void *)__pa(&efi_reset_system); efi_tables->guid = SAL_SYSTEM_TABLE_GUID; efi_tables->table = __pa(sal_systab); diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index c13064e422df..d1b04c4c95e3 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -268,7 +268,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev) static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct simeth_local *local; struct in_device *in_dev; struct in_ifaddr **ifap = NULL; diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 331de723c676..3a428f19a001 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -88,8 +88,8 @@ simscsi_setup (char *s) if (strlen(s) > MAX_ROOT_LEN) { printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n", simscsi_root); - } - simscsi_root = s; + } else + simscsi_root = s; return 1; } diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 5e04b591e423..80775f55f03f 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -89,9 +89,9 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define pci_legacy_read platform_pci_legacy_read #define pci_legacy_write platform_pci_legacy_write -struct pci_window { - struct resource resource; - u64 offset; +struct iospace_resource { + struct list_head list; + struct resource res; }; struct pci_controller { @@ -100,12 +100,10 @@ struct pci_controller { int segment; int node; /* nearest node with memory or -1 for global allocation */ - unsigned int windows; - struct pci_window *window; - void *platform_data; }; + #define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata) #define pci_domain_nr(busdev) (PCI_CONTROLLER(busdev)->segment) diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 6b4329f18b29..d3358b760681 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -83,4 +83,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 335eb07480fe..5eb71d22c3d5 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -807,7 +807,7 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) * ACPI based hotplug CPU support */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -static __cpuinit +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA @@ -882,7 +882,7 @@ __init void prefill_possible_map(void) set_cpu_possible(i, true); } -static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) +static int _acpi_map_lsapic(acpi_handle handle, int *pcpu) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f034563aeae5..51bce594eb83 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1116,11 +1116,6 @@ efi_memmap_init(u64 *s, u64 *e) if (!is_memory_available(md)) continue; -#ifdef CONFIG_CRASH_DUMP - /* saved_max_pfn should ignore max_addr= command line arg */ - if (saved_max_pfn < (efi_md_end(md) >> PAGE_SHIFT)) - saved_max_pfn = (efi_md_end(md) >> PAGE_SHIFT); -#endif /* * Round ends inward to granule boundaries * Give trimmings to uncached allocator diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 2d67317a1ec2..f59c0b844e88 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -225,17 +225,17 @@ static struct attribute_group err_inject_attr_group = { .name = "err_inject" }; /* Add/Remove err_inject interface for CPU device */ -static int __cpuinit err_inject_add_dev(struct device * sys_dev) +static int err_inject_add_dev(struct device *sys_dev) { return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); } -static int __cpuinit err_inject_remove_dev(struct device * sys_dev) +static int err_inject_remove_dev(struct device *sys_dev) { sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); return 0; } -static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, +static int err_inject_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -256,7 +256,7 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata err_inject_cpu_notifier = +static struct notifier_block err_inject_cpu_notifier = { .notifier_call = err_inject_cpu_callback, }; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index d7396dbb07bb..b8edfa75a83f 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -631,7 +631,7 @@ ia64_mca_register_cpev (int cpev) * Outputs * None */ -void __cpuinit +void ia64_mca_cmc_vector_setup (void) { cmcv_reg_t cmcv; @@ -1814,7 +1814,7 @@ static struct irqaction mca_cpep_irqaction = { * format most of the fields. */ -static void __cpuinit +static void format_mca_init_stack(void *mca_data, unsigned long offset, const char *type, int cpu) { @@ -1844,7 +1844,7 @@ static void * __init_refok mca_bootmem(void) } /* Do per-CPU MCA-related initialization. */ -void __cpuinit +void ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; @@ -1896,7 +1896,7 @@ ia64_mca_cpu_init(void *cpu_data) PAGE_KERNEL)); } -static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy) +static void ia64_mca_cmc_vector_adjust(void *dummy) { unsigned long flags; @@ -1906,7 +1906,7 @@ static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy) local_irq_restore(flags); } -static int __cpuinit mca_cpu_callback(struct notifier_block *nfb, +static int mca_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1922,7 +1922,7 @@ static int __cpuinit mca_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block mca_cpu_notifier __cpuinitdata = { +static struct notifier_block mca_cpu_notifier = { .notifier_call = mca_cpu_callback }; diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index c93420c97409..d288cde93606 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -30,7 +30,7 @@ EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; EXPORT_SYMBOL(node_to_cpu_mask); -void __cpuinit map_cpu_to_node(int cpu, int nid) +void map_cpu_to_node(int cpu, int nid) { int oldnid; if (nid < 0) { /* just initialize by zero */ @@ -51,7 +51,7 @@ void __cpuinit map_cpu_to_node(int cpu, int nid) return; } -void __cpuinit unmap_cpu_from_node(int cpu, int nid) +void unmap_cpu_from_node(int cpu, int nid) { WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid])); WARN_ON(cpu_to_node_map[cpu] != nid); diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 2b3c2d79256f..ab333284f4b2 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -932,7 +932,7 @@ static const struct file_operations proc_palinfo_fops = { .release = single_release, }; -static void __cpuinit +static void create_palinfo_proc_entries(unsigned int cpu) { pal_func_cpu_u_t f; @@ -962,7 +962,7 @@ remove_palinfo_proc_entries(unsigned int hcpu) remove_proc_subtree(cpustr, palinfo_dir); } -static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, +static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int hotcpu = (unsigned long)hcpu; diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index 1ddcfe5ef353..992c1098c522 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -33,15 +33,6 @@ int force_iommu __read_mostly; int iommu_pass_through; -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ -struct device fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = DMA_BIT_MASK(32), - .dma_mask = &fallback_dev.coherent_dma_mask, -}; - extern struct dma_map_ops intel_dma_ops; static int __init pci_iommu_init(void) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9ea25fce06d5..5a9ff1c3c3e9 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5647,24 +5647,8 @@ pfm_proc_show_header(struct seq_file *m) list_for_each(pos, &pfm_buffer_fmt_list) { entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", - entry->fmt_uuid[0], - entry->fmt_uuid[1], - entry->fmt_uuid[2], - entry->fmt_uuid[3], - entry->fmt_uuid[4], - entry->fmt_uuid[5], - entry->fmt_uuid[6], - entry->fmt_uuid[7], - entry->fmt_uuid[8], - entry->fmt_uuid[9], - entry->fmt_uuid[10], - entry->fmt_uuid[11], - entry->fmt_uuid[12], - entry->fmt_uuid[13], - entry->fmt_uuid[14], - entry->fmt_uuid[15], - entry->fmt_name); + seq_printf(m, "format : %16phD %s\n", + entry->fmt_uuid, entry->fmt_name); } spin_unlock(&pfm_buffer_fmt_lock); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 4bc580af67b3..960a396f5929 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -568,7 +568,7 @@ static const struct file_operations salinfo_data_fops = { .llseek = default_llseek, }; -static int __cpuinit +static int salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned int i, cpu = (unsigned long)hcpu; @@ -609,7 +609,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu return NOTIFY_OK; } -static struct notifier_block salinfo_cpu_notifier __cpuinitdata = +static struct notifier_block salinfo_cpu_notifier = { .notifier_call = salinfo_cpu_callback, .priority = 0, diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 13bfdd22afc8..4fc2e9569bb2 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -748,7 +748,7 @@ const struct seq_operations cpuinfo_op = { #define MAX_BRANDS 8 static char brandname[MAX_BRANDS][128]; -static char * __cpuinit +static char * get_model_name(__u8 family, __u8 model) { static int overflow; @@ -778,7 +778,7 @@ get_model_name(__u8 family, __u8 model) return "Unknown"; } -static void __cpuinit +static void identify_cpu (struct cpuinfo_ia64 *c) { union { @@ -850,7 +850,7 @@ identify_cpu (struct cpuinfo_ia64 *c) * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". * 3. the minimum of the cache stride sizes for "clflush_cache_range()". */ -static void __cpuinit +static void get_cache_info(void) { unsigned long line_size, max = 1; @@ -915,10 +915,10 @@ get_cache_info(void) * cpu_init() initializes state that is per-CPU. This function acts * as a 'CPU state barrier', nothing should get across. */ -void __cpuinit +void cpu_init (void) { - extern void __cpuinit ia64_mmu_init (void *); + extern void ia64_mmu_init(void *); static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 8d87168d218d..547a48d78bd7 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -351,7 +351,7 @@ static inline void smp_setup_percpu_timer(void) { } -static void __cpuinit +static void smp_callin (void) { int cpuid, phys_id, itc_master; @@ -442,7 +442,7 @@ smp_callin (void) /* * Activate a secondary processor. head.S calls this. */ -int __cpuinit +int start_secondary (void *unused) { /* Early console may use I/O ports */ @@ -459,7 +459,7 @@ start_secondary (void *unused) return 0; } -static int __cpuinit +static int do_boot_cpu (int sapicid, int cpu, struct task_struct *idle) { int timeout; @@ -728,7 +728,7 @@ static inline void set_cpu_sibling_map(int cpu) } } -int __cpuinit +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index dc00b2c1b42a..ca69a5a96dcc 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -135,11 +135,11 @@ struct cpu_cache_info { struct kobject kobj; }; -static struct cpu_cache_info all_cpu_cache_info[NR_CPUS] __cpuinitdata; +static struct cpu_cache_info all_cpu_cache_info[NR_CPUS]; #define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y]) #ifdef CONFIG_SMP -static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu, +static void cache_shared_cpu_map_setup(unsigned int cpu, struct cache_info * this_leaf) { pal_cache_shared_info_t csi; @@ -174,7 +174,7 @@ static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu, &csi) == PAL_STATUS_SUCCESS); } #else -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, +static void cache_shared_cpu_map_setup(unsigned int cpu, struct cache_info * this_leaf) { cpu_set(cpu, this_leaf->shared_cpu_map); @@ -298,7 +298,7 @@ static struct kobj_type cache_ktype_percpu_entry = { .sysfs_ops = &cache_sysfs_ops, }; -static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu) +static void cpu_cache_sysfs_exit(unsigned int cpu) { kfree(all_cpu_cache_info[cpu].cache_leaves); all_cpu_cache_info[cpu].cache_leaves = NULL; @@ -307,7 +307,7 @@ static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu) return; } -static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu) +static int cpu_cache_sysfs_init(unsigned int cpu) { unsigned long i, levels, unique_caches; pal_cache_config_info_t cci; @@ -351,7 +351,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu) } /* Add cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct device * sys_dev) +static int cache_add_dev(struct device *sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; @@ -401,7 +401,7 @@ static int __cpuinit cache_add_dev(struct device * sys_dev) } /* Remove cache interface for CPU device */ -static int __cpuinit cache_remove_dev(struct device * sys_dev) +static int cache_remove_dev(struct device *sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; @@ -425,7 +425,7 @@ static int __cpuinit cache_remove_dev(struct device * sys_dev) * When a cpu is hot-plugged, do a check and initiate * cache kobject if necessary */ -static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, +static int cache_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -445,7 +445,7 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cache_cpu_notifier = +static struct notifier_block cache_cpu_notifier = { .notifier_call = cache_cpu_callback }; diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index f7f9f9c6caf0..d3636e67a98e 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -630,7 +630,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", iip, ifa, isr); force_sig(SIGSEGV, current); - break; + return; case 46: printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 1a4053789d01..18e45ec49bbf 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file) ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ +KVM := ../../../virt/kvm -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) +common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) -common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) +common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 67c59ebec899..da5237d636d6 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -156,8 +156,7 @@ static void *cpu_data; * * Allocate and setup per-cpu data areas. */ -void * __cpuinit -per_cpu_init (void) +void *per_cpu_init(void) { static bool first_time = true; void *cpu0_data = __cpu0_per_cpu; @@ -295,14 +294,6 @@ find_memory (void) alloc_per_cpu_data(); } -static int count_pages(u64 start, u64 end, void *arg) -{ - unsigned long *count = arg; - - *count += (end - start) >> PAGE_SHIFT; - return 0; -} - /* * Set up the page tables. */ @@ -313,9 +304,6 @@ paging_init (void) unsigned long max_dma; unsigned long max_zone_pfns[MAX_NR_ZONES]; - num_physpages = 0; - efi_memmap_walk(count_pages, &num_physpages); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_ZONE_DMA max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index ae4db4bd6d97..2de08f4d9930 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -37,7 +37,6 @@ struct early_node_data { struct ia64_node_data *node_data; unsigned long pernode_addr; unsigned long pernode_size; - unsigned long num_physpages; #ifdef CONFIG_ZONE_DMA unsigned long num_dma_physpages; #endif @@ -593,7 +592,7 @@ void __init find_memory(void) * find_pernode_space() does most of this already, we just need to set * local_per_cpu_offset */ -void __cpuinit *per_cpu_init(void) +void *per_cpu_init(void) { int cpu; static int first_time = 1; @@ -732,7 +731,6 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n { unsigned long end = start + len; - mem_data[node].num_physpages += len >> PAGE_SHIFT; #ifdef CONFIG_ZONE_DMA if (start <= __pa(MAX_DMA_ADDRESS)) mem_data[node].num_dma_physpages += @@ -778,7 +776,6 @@ void __init paging_init(void) #endif for_each_online_node(node) { - num_physpages += mem_data[node].num_physpages; pfn_offset = mem_data[node].min_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d1fe4b402601..b6f7f43424ec 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -154,9 +154,8 @@ ia64_init_addr_space (void) void free_initmem (void) { - free_reserved_area((unsigned long)ia64_imva(__init_begin), - (unsigned long)ia64_imva(__init_end), - 0, "unused kernel"); + free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), + -1, "unused kernel"); } void __init @@ -546,19 +545,6 @@ int __init register_active_ranges(u64 start, u64 len, int nid) return 0; } -static int __init -count_reserved_pages(u64 start, u64 end, void *arg) -{ - unsigned long num_reserved = 0; - unsigned long *count = arg; - - for (; start < end; start += PAGE_SIZE) - if (PageReserved(virt_to_page(start))) - ++num_reserved; - *count += num_reserved; - return 0; -} - int find_max_min_low_pfn (u64 start, u64 end, void *arg) { @@ -597,8 +583,6 @@ __setup("nolwsys", nolwsys_setup); void __init mem_init (void) { - long reserved_pages, codesize, datasize, initsize; - pg_data_t *pgdat; int i; BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); @@ -616,27 +600,12 @@ mem_init (void) #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); - max_mapnr = max_low_pfn; #endif + set_max_mapnr(max_low_pfn); high_memory = __va(max_low_pfn * PAGE_SIZE); - - for_each_online_pgdat(pgdat) - if (pgdat->bdata->node_bootmem_map) - totalram_pages += free_all_bootmem_node(pgdat); - - reserved_pages = 0; - efi_memmap_walk(count_reserved_pages, &reserved_pages); - - codesize = (unsigned long) _etext - (unsigned long) _stext; - datasize = (unsigned long) _edata - (unsigned long) _etext; - initsize = (unsigned long) __init_end - (unsigned long) __init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, " - "%luk data, %luk init)\n", nr_free_pages() << (PAGE_SHIFT - 10), - num_physpages << (PAGE_SHIFT - 10), codesize >> 10, - reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); - + free_all_bootmem(); + mem_init_print_info(NULL); /* * For fsyscall entrpoints with no light-weight handler, use the ordinary diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 4248492b9321..ea21d4cad540 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -86,7 +86,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) return -1; } -void __cpuinit numa_clear_node(int cpu) +void numa_clear_node(int cpu) { unmap_cpu_from_node(cpu, NUMA_NO_NODE); } diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index de1474ff0bc5..2326790b7d8b 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -134,6 +134,10 @@ struct pci_root_info { struct acpi_device *bridge; struct pci_controller *controller; struct list_head resources; + struct resource *res; + resource_size_t *res_offset; + unsigned int res_num; + struct list_head io_resources; char *name; }; @@ -153,7 +157,7 @@ new_space (u64 phys_base, int sparse) return i; if (num_io_spaces == MAX_IO_SPACES) { - printk(KERN_ERR "PCI: Too many IO port spaces " + pr_err("PCI: Too many IO port spaces " "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES); return ~0; } @@ -168,25 +172,22 @@ new_space (u64 phys_base, int sparse) static u64 add_io_space(struct pci_root_info *info, struct acpi_resource_address64 *addr) { + struct iospace_resource *iospace; struct resource *resource; char *name; unsigned long base, min, max, base_port; unsigned int sparse = 0, space_nr, len; - resource = kzalloc(sizeof(*resource), GFP_KERNEL); - if (!resource) { - printk(KERN_ERR "PCI: No memory for %s I/O port space\n", - info->name); + len = strlen(info->name) + 32; + iospace = kzalloc(sizeof(*iospace) + len, GFP_KERNEL); + if (!iospace) { + dev_err(&info->bridge->dev, + "PCI: No memory for %s I/O port space\n", + info->name); goto out; } - len = strlen(info->name) + 32; - name = kzalloc(len, GFP_KERNEL); - if (!name) { - printk(KERN_ERR "PCI: No memory for %s I/O port space name\n", - info->name); - goto free_resource; - } + name = (char *)(iospace + 1); min = addr->minimum; max = min + addr->address_length - 1; @@ -195,7 +196,7 @@ static u64 add_io_space(struct pci_root_info *info, space_nr = new_space(addr->translation_offset, sparse); if (space_nr == ~0) - goto free_name; + goto free_resource; base = __pa(io_space[space_nr].mmio_base); base_port = IO_SPACE_BASE(space_nr); @@ -210,18 +211,23 @@ static u64 add_io_space(struct pci_root_info *info, if (space_nr == 0) sparse = 1; + resource = &iospace->res; resource->name = name; resource->flags = IORESOURCE_MEM; resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); - insert_resource(&iomem_resource, resource); + if (insert_resource(&iomem_resource, resource)) { + dev_err(&info->bridge->dev, + "can't allocate host bridge io space resource %pR\n", + resource); + goto free_resource; + } + list_add_tail(&iospace->list, &info->io_resources); return base_port; -free_name: - kfree(name); free_resource: - kfree(resource); + kfree(iospace); out: return ~0; } @@ -265,7 +271,7 @@ static acpi_status count_window(struct acpi_resource *resource, void *data) static acpi_status add_window(struct acpi_resource *res, void *data) { struct pci_root_info *info = data; - struct pci_window *window; + struct resource *resource; struct acpi_resource_address64 addr; acpi_status status; unsigned long flags, offset = 0; @@ -289,55 +295,146 @@ static acpi_status add_window(struct acpi_resource *res, void *data) } else return AE_OK; - window = &info->controller->window[info->controller->windows++]; - window->resource.name = info->name; - window->resource.flags = flags; - window->resource.start = addr.minimum + offset; - window->resource.end = window->resource.start + addr.address_length - 1; - window->offset = offset; + resource = &info->res[info->res_num]; + resource->name = info->name; + resource->flags = flags; + resource->start = addr.minimum + offset; + resource->end = resource->start + addr.address_length - 1; + info->res_offset[info->res_num] = offset; - if (insert_resource(root, &window->resource)) { + if (insert_resource(root, resource)) { dev_err(&info->bridge->dev, "can't allocate host bridge window %pR\n", - &window->resource); + resource); } else { if (offset) dev_info(&info->bridge->dev, "host bridge window %pR " "(PCI address [%#llx-%#llx])\n", - &window->resource, - window->resource.start - offset, - window->resource.end - offset); + resource, + resource->start - offset, + resource->end - offset); else dev_info(&info->bridge->dev, - "host bridge window %pR\n", - &window->resource); + "host bridge window %pR\n", resource); } - /* HP's firmware has a hack to work around a Windows bug. * Ignore these tiny memory ranges */ - if (!((window->resource.flags & IORESOURCE_MEM) && - (window->resource.end - window->resource.start < 16))) - pci_add_resource_offset(&info->resources, &window->resource, - window->offset); + if (!((resource->flags & IORESOURCE_MEM) && + (resource->end - resource->start < 16))) + pci_add_resource_offset(&info->resources, resource, + info->res_offset[info->res_num]); + info->res_num++; return AE_OK; } +static void free_pci_root_info_res(struct pci_root_info *info) +{ + struct iospace_resource *iospace, *tmp; + + list_for_each_entry_safe(iospace, tmp, &info->io_resources, list) + kfree(iospace); + + kfree(info->name); + kfree(info->res); + info->res = NULL; + kfree(info->res_offset); + info->res_offset = NULL; + info->res_num = 0; + kfree(info->controller); + info->controller = NULL; +} + +static void __release_pci_root_info(struct pci_root_info *info) +{ + int i; + struct resource *res; + struct iospace_resource *iospace; + + list_for_each_entry(iospace, &info->io_resources, list) + release_resource(&iospace->res); + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (!res->parent) + continue; + + if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) + continue; + + release_resource(res); + } + + free_pci_root_info_res(info); + kfree(info); +} + +static void release_pci_root_info(struct pci_host_bridge *bridge) +{ + struct pci_root_info *info = bridge->release_data; + + __release_pci_root_info(info); +} + +static int +probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, + int busnum, int domain) +{ + char *name; + + name = kmalloc(16, GFP_KERNEL); + if (!name) + return -ENOMEM; + + sprintf(name, "PCI Bus %04x:%02x", domain, busnum); + info->bridge = device; + info->name = name; + + acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, + &info->res_num); + if (info->res_num) { + info->res = + kzalloc_node(sizeof(*info->res) * info->res_num, + GFP_KERNEL, info->controller->node); + if (!info->res) { + kfree(name); + return -ENOMEM; + } + + info->res_offset = + kzalloc_node(sizeof(*info->res_offset) * info->res_num, + GFP_KERNEL, info->controller->node); + if (!info->res_offset) { + kfree(name); + kfree(info->res); + info->res = NULL; + return -ENOMEM; + } + + info->res_num = 0; + acpi_walk_resources(device->handle, METHOD_NAME__CRS, + add_window, info); + } else + kfree(name); + + return 0; +} + struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; int domain = root->segment; int bus = root->secondary.start; struct pci_controller *controller; - unsigned int windows = 0; - struct pci_root_info info; + struct pci_root_info *info = NULL; + int busnum = root->secondary.start; struct pci_bus *pbus; - char *name; - int pxm; + int pxm, ret; controller = alloc_pci_controller(domain); if (!controller) - goto out1; + return NULL; controller->acpi_handle = device->handle; @@ -347,29 +444,27 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) controller->node = pxm_to_node(pxm); #endif - INIT_LIST_HEAD(&info.resources); - /* insert busn resource at first */ - pci_add_resource(&info.resources, &root->secondary); - acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, - &windows); - if (windows) { - controller->window = - kzalloc_node(sizeof(*controller->window) * windows, - GFP_KERNEL, controller->node); - if (!controller->window) - goto out2; - - name = kmalloc(16, GFP_KERNEL); - if (!name) - goto out3; - - sprintf(name, "PCI Bus %04x:%02x", domain, bus); - info.bridge = device; - info.controller = controller; - info.name = name; - acpi_walk_resources(device->handle, METHOD_NAME__CRS, - add_window, &info); + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + dev_err(&device->dev, + "pci_bus %04x:%02x: ignored (out of memory)\n", + domain, busnum); + kfree(controller); + return NULL; } + + info->controller = controller; + INIT_LIST_HEAD(&info->io_resources); + INIT_LIST_HEAD(&info->resources); + + ret = probe_pci_root_info(info, device, busnum, domain); + if (ret) { + kfree(info->controller); + kfree(info); + return NULL; + } + /* insert busn resource at first */ + pci_add_resource(&info->resources, &root->secondary); /* * See arch/x86/pci/acpi.c. * The desired pci bus might already be scanned in a quirk. We @@ -377,21 +472,17 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) * such quirk. So we just ignore the case now. */ pbus = pci_create_root_bus(NULL, bus, &pci_root_ops, controller, - &info.resources); + &info->resources); if (!pbus) { - pci_free_resource_list(&info.resources); + pci_free_resource_list(&info->resources); + __release_pci_root_info(info); return NULL; } + pci_set_host_bridge_release(to_pci_host_bridge(pbus->bridge), + release_pci_root_info, info); pci_scan_child_bus(pbus); return pbus; - -out3: - kfree(controller->window); -out2: - kfree(controller); -out1: - return NULL; } int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) @@ -691,7 +782,7 @@ static void __init set_pci_dfl_cacheline_size(void) status = ia64_pal_cache_summary(&levels, &unique_caches); if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_summary() failed " + pr_err("%s: ia64_pal_cache_summary() failed " "(status=%ld)\n", __func__, status); return; } @@ -699,7 +790,7 @@ static void __init set_pci_dfl_cacheline_size(void) status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2, &cci); if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed " + pr_err("%s: ia64_pal_cache_config_info() failed " "(status=%ld)\n", __func__, status); return; } diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 238e2c511d94..0b5ce82d203d 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -118,76 +118,26 @@ static void __init sn_fixup_ionodes(void) } /* - * sn_pci_legacy_window_fixup - Create PCI controller windows for + * sn_pci_legacy_window_fixup - Setup PCI resources for * legacy IO and MEM space. This needs to * be done here, as the PROM does not have * ACPI support defining the root buses * and their resources (_CRS), */ static void -sn_legacy_pci_window_fixup(struct pci_controller *controller, - u64 legacy_io, u64 legacy_mem) +sn_legacy_pci_window_fixup(struct resource *res, + u64 legacy_io, u64 legacy_mem) { - controller->window = kcalloc(2, sizeof(struct pci_window), - GFP_KERNEL); - BUG_ON(controller->window == NULL); - controller->window[0].offset = legacy_io; - controller->window[0].resource.name = "legacy_io"; - controller->window[0].resource.flags = IORESOURCE_IO; - controller->window[0].resource.start = legacy_io; - controller->window[0].resource.end = - controller->window[0].resource.start + 0xffff; - controller->window[0].resource.parent = &ioport_resource; - controller->window[1].offset = legacy_mem; - controller->window[1].resource.name = "legacy_mem"; - controller->window[1].resource.flags = IORESOURCE_MEM; - controller->window[1].resource.start = legacy_mem; - controller->window[1].resource.end = - controller->window[1].resource.start + (1024 * 1024) - 1; - controller->window[1].resource.parent = &iomem_resource; - controller->windows = 2; -} - -/* - * sn_pci_window_fixup() - Create a pci_window for each device resource. - * It will setup pci_windows for use by - * pcibios_bus_to_resource(), pcibios_resource_to_bus(), - * etc. - */ -static void -sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, - s64 * pci_addrs) -{ - struct pci_controller *controller = PCI_CONTROLLER(dev->bus); - unsigned int i; - unsigned int idx; - unsigned int new_count; - struct pci_window *new_window; - - if (count == 0) - return; - idx = controller->windows; - new_count = controller->windows + count; - new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL); - BUG_ON(new_window == NULL); - if (controller->window) { - memcpy(new_window, controller->window, - sizeof(struct pci_window) * controller->windows); - kfree(controller->window); - } - - /* Setup a pci_window for each device resource. */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - if (pci_addrs[i] == -1) - continue; - - new_window[idx].offset = dev->resource[i].start - pci_addrs[i]; - new_window[idx].resource = dev->resource[i]; - idx++; - } - - controller->windows = new_count; - controller->window = new_window; + res[0].name = "legacy_io"; + res[0].flags = IORESOURCE_IO; + res[0].start = legacy_io; + res[0].end = res[0].start + 0xffff; + res[0].parent = &ioport_resource; + res[1].name = "legacy_mem"; + res[1].flags = IORESOURCE_MEM; + res[1].start = legacy_mem; + res[1].end = res[1].start + (1024 * 1024) - 1; + res[1].parent = &iomem_resource; } /* @@ -199,9 +149,7 @@ sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, void sn_io_slot_fixup(struct pci_dev *dev) { - unsigned int count = 0; int idx; - s64 pci_addrs[PCI_ROM_RESOURCE + 1]; unsigned long addr, end, size, start; struct pcidev_info *pcidev_info; struct sn_irq_info *sn_irq_info; @@ -229,7 +177,6 @@ sn_io_slot_fixup(struct pci_dev *dev) for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { if (!pcidev_info->pdi_pio_mapped_addr[idx]) { - pci_addrs[idx] = -1; continue; } @@ -237,11 +184,8 @@ sn_io_slot_fixup(struct pci_dev *dev) end = dev->resource[idx].end; size = end - start; if (size == 0) { - pci_addrs[idx] = -1; continue; } - pci_addrs[idx] = start; - count++; addr = pcidev_info->pdi_pio_mapped_addr[idx]; addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; dev->resource[idx].start = addr; @@ -276,11 +220,6 @@ sn_io_slot_fixup(struct pci_dev *dev) IORESOURCE_ROM_BIOS_COPY; } } - /* Create a pci_window in the pci_controller struct for - * each device resource. - */ - if (count > 0) - sn_pci_window_fixup(dev, count, pci_addrs); sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); } @@ -297,8 +236,8 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) s64 status = 0; struct pci_controller *controller; struct pcibus_bussoft *prom_bussoft_ptr; + struct resource *res; LIST_HEAD(resources); - int i; status = sal_get_pcibus_info((u64) segment, (u64) busnum, (u64) ia64_tpa(&prom_bussoft_ptr)); @@ -310,32 +249,29 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) BUG_ON(!controller); controller->segment = segment; + res = kcalloc(2, sizeof(struct resource), GFP_KERNEL); + BUG_ON(!res); + /* * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup(). * (platform_data will be overwritten later in sn_common_bus_fixup()) */ controller->platform_data = prom_bussoft_ptr; - sn_legacy_pci_window_fixup(controller, - prom_bussoft_ptr->bs_legacy_io, - prom_bussoft_ptr->bs_legacy_mem); - for (i = 0; i < controller->windows; i++) - pci_add_resource_offset(&resources, - &controller->window[i].resource, - controller->window[i].offset); + sn_legacy_pci_window_fixup(res, + prom_bussoft_ptr->bs_legacy_io, + prom_bussoft_ptr->bs_legacy_mem); + pci_add_resource_offset(&resources, &res[0], + prom_bussoft_ptr->bs_legacy_io); + pci_add_resource_offset(&resources, &res[1], + prom_bussoft_ptr->bs_legacy_mem); + bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller, &resources); - if (bus == NULL) - goto error_return; /* error, or bus already scanned */ - - bus->sysdata = controller; - - return; - -error_return: - - kfree(controller); - return; + if (bus == NULL) { + kfree(res); + kfree(controller); + } } /* diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index f82e7b462b7b..53b01b8e2f19 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -192,7 +192,7 @@ void __init early_sn_setup(void) } extern int platform_intr_list[]; -static int __cpuinitdata shub_1_1_found; +static int shub_1_1_found; /* * sn_check_for_wars @@ -200,7 +200,7 @@ static int __cpuinitdata shub_1_1_found; * Set flag for enabling shub specific wars */ -static inline int __cpuinit is_shub_1_1(int nasid) +static inline int is_shub_1_1(int nasid) { unsigned long id; int rev; @@ -212,7 +212,7 @@ static inline int __cpuinit is_shub_1_1(int nasid) return rev <= 2; } -static void __cpuinit sn_check_for_wars(void) +static void sn_check_for_wars(void) { int cnode; @@ -558,7 +558,7 @@ static void __init sn_init_pdas(char **cmdline_p) * Also sets up a few fields in the nodepda. Also known as * platform_cpu_init() by the ia64 machvec code. */ -void __cpuinit sn_cpu_init(void) +void sn_cpu_init(void) { int cpuid; int cpuphyid; diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c index 52172eee8591..fab62528a80b 100644 --- a/arch/ia64/xen/hypervisor.c +++ b/arch/ia64/xen/hypervisor.c @@ -74,7 +74,7 @@ void __init xen_setup_vcpu_info_placement(void) xen_vcpu_setup(cpu); } -void __cpuinit +void xen_cpu_init(void) { xen_smp_intr_init(); diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index bcd17b206571..29a7ef4e448b 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -16,6 +16,7 @@ config M32R select GENERIC_ATOMIC64 select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA + select HAVE_DEBUG_STACKOVERFLOW config SBUS bool diff --git a/arch/m32r/Kconfig.debug b/arch/m32r/Kconfig.debug index bb1afc1a31cc..6c612b7691b0 100644 --- a/arch/m32r/Kconfig.debug +++ b/arch/m32r/Kconfig.debug @@ -2,13 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit. - config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL && BROKEN diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 2a3b59e0e171..44aaf4639a4a 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -74,4 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index 2c468e8b5853..27196303ce36 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -129,11 +129,10 @@ unsigned long __init setup_memory(void) #define START_PFN(nid) (NODE_DATA(nid)->bdata->node_min_pfn) #define MAX_LOW_PFN(nid) (NODE_DATA(nid)->bdata->node_low_pfn) -unsigned long __init zone_sizes_init(void) +void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES], zholes_size[MAX_NR_ZONES]; unsigned long low, start_pfn; - unsigned long holes = 0; int nid, i; mem_prof_t *mp; @@ -147,7 +146,6 @@ unsigned long __init zone_sizes_init(void) low = MAX_LOW_PFN(nid); zones_size[ZONE_DMA] = low - start_pfn; zholes_size[ZONE_DMA] = mp->holes; - holes += zholes_size[ZONE_DMA]; node_set_state(nid, N_NORMAL_MEMORY); free_area_init_node(nid, zones_size, start_pfn, zholes_size); @@ -161,6 +159,4 @@ unsigned long __init zone_sizes_init(void) NODE_DATA(1)->node_zones->watermark[WMARK_MIN] = 0; NODE_DATA(1)->node_zones->watermark[WMARK_LOW] = 0; NODE_DATA(1)->node_zones->watermark[WMARK_HIGH] = 0; - - return holes; } diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index ab4cbce91a9b..0d4146f644dc 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -40,7 +40,6 @@ unsigned long mmu_context_cache_dat; #else unsigned long mmu_context_cache_dat[NR_CPUS]; #endif -static unsigned long hole_pages; /* * function prototype @@ -57,7 +56,7 @@ void free_initrd_mem(unsigned long, unsigned long); #define MAX_LOW_PFN(nid) (NODE_DATA(nid)->bdata->node_low_pfn) #ifndef CONFIG_DISCONTIGMEM -unsigned long __init zone_sizes_init(void) +void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long max_dma; @@ -83,11 +82,9 @@ unsigned long __init zone_sizes_init(void) #endif /* CONFIG_MMU */ free_area_init_node(0, zones_size, start_pfn, 0); - - return 0; } #else /* CONFIG_DISCONTIGMEM */ -extern unsigned long zone_sizes_init(void); +extern void zone_sizes_init(void); #endif /* CONFIG_DISCONTIGMEM */ /*======================================================================* @@ -105,24 +102,7 @@ void __init paging_init(void) for (i = 0 ; i < USER_PTRS_PER_PGD * 2 ; i++) pgd_val(pg_dir[i]) = 0; #endif /* CONFIG_MMU */ - hole_pages = zone_sizes_init(); -} - -int __init reservedpages_count(void) -{ - int reservedpages, nid, i; - - reservedpages = 0; - for_each_online_node(nid) { - unsigned long flags; - pgdat_resize_lock(NODE_DATA(nid), &flags); - for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) - if (PageReserved(nid_page_nr(nid, i))) - reservedpages++; - pgdat_resize_unlock(NODE_DATA(nid), &flags); - } - - return reservedpages; + zone_sizes_init(); } /*======================================================================* @@ -131,48 +111,20 @@ int __init reservedpages_count(void) *======================================================================*/ void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int nid; #ifndef CONFIG_MMU extern unsigned long memory_end; -#endif - - num_physpages = 0; - for_each_online_node(nid) - num_physpages += MAX_LOW_PFN(nid) - START_PFN(nid) + 1; - - num_physpages -= hole_pages; -#ifndef CONFIG_DISCONTIGMEM - max_mapnr = num_physpages; -#endif /* CONFIG_DISCONTIGMEM */ - -#ifdef CONFIG_MMU - high_memory = (void *)__va(PFN_PHYS(MAX_LOW_PFN(0))); -#else high_memory = (void *)(memory_end & PAGE_MASK); +#else + high_memory = (void *)__va(PFN_PHYS(MAX_LOW_PFN(0))); #endif /* CONFIG_MMU */ /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); - /* this will put all low memory onto the freelists */ - for_each_online_node(nid) - totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); - - reservedpages = reservedpages_count() - hole_pages; - codesize = (unsigned long) &_etext - (unsigned long)&_text; - datasize = (unsigned long) &_edata - (unsigned long)&_etext; - initsize = (unsigned long) &__init_end - (unsigned long)&__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); + set_max_mapnr(get_num_physpages()); + free_all_bootmem(); + mem_init_print_info(NULL); } /*======================================================================* @@ -181,7 +133,7 @@ void __init mem_init(void) *======================================================================*/ void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -191,6 +143,6 @@ void free_initmem(void) *======================================================================*/ void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug index fa12283d58fc..229682721240 100644 --- a/arch/m68k/Kconfig.debug +++ b/arch/m68k/Kconfig.debug @@ -11,9 +11,8 @@ config BOOTPARAM_STRING depends on BOOTPARAM config EARLY_PRINTK - bool "Early printk" if EMBEDDED + bool "Early printk" depends on MVME16x || MAC - default y help Write kernel log output directly to a serial port. diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 0f795d8e65fa..b17a8837f0e1 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -214,6 +214,7 @@ CONFIG_DEVTMPFS=y # CONFIG_FW_LOADER_USER_HELPER is not set CONFIG_CONNECTOR=m CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m CONFIG_PARPORT_AMIGA=m CONFIG_PARPORT_MFC3=m CONFIG_PARPORT_ATARI=m @@ -325,6 +326,7 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_PLIP=m CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 8982370e8b42..be1496ed9b66 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -199,6 +199,9 @@ CONFIG_DEVTMPFS=y # CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_FW_LOADER_USER_HELPER is not set CONFIG_CONNECTOR=m +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_1284=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -267,6 +270,7 @@ CONFIG_NE2000=m # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_PLIP=m CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -292,9 +296,11 @@ CONFIG_SERIO_Q40KBD=y CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set +CONFIG_PRINTER=m # CONFIG_HW_RANDOM is not set CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m +CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y diff --git a/arch/m68k/include/asm/parport.h b/arch/m68k/include/asm/parport.h index 5ea75e6a7399..c85cece778e8 100644 --- a/arch/m68k/include/asm/parport.h +++ b/arch/m68k/include/asm/parport.h @@ -11,6 +11,8 @@ #ifndef _ASM_M68K_PARPORT_H #define _ASM_M68K_PARPORT_H 1 +#undef insl +#undef outsl #define insl(port,buf,len) isa_insb(port,buf,(len)<<2) #define outsl(port,buf,len) isa_outsb(port,buf,(len)<<2) diff --git a/arch/m68k/include/asm/string.h b/arch/m68k/include/asm/string.h index 9aea9f11fa25..c30c03d98581 100644 --- a/arch/m68k/include/asm/string.h +++ b/arch/m68k/include/asm/string.h @@ -4,20 +4,6 @@ #include <linux/types.h> #include <linux/compiler.h> -static inline char *__kernel_strcpy(char *dest, const char *src) -{ - char *xdest = dest; - - asm volatile ("\n" - "1: move.b (%1)+,(%0)+\n" - " jne 1b" - : "+a" (dest), "+a" (src) - : : "memory"); - return xdest; -} - -#ifndef __IN_STRING_C - #define __HAVE_ARCH_STRNLEN static inline size_t strnlen(const char *s, size_t count) { @@ -34,16 +20,6 @@ static inline size_t strnlen(const char *s, size_t count) return sc - s; } -#define __HAVE_ARCH_STRCPY -#if __GNUC__ >= 4 -#define strcpy(d, s) (__builtin_constant_p(s) && \ - __builtin_strlen(s) <= 32 ? \ - __builtin_strcpy(d, s) : \ - __kernel_strcpy(d, s)) -#else -#define strcpy(d, s) __kernel_strcpy(d, s) -#endif - #define __HAVE_ARCH_STRNCPY static inline char *strncpy(char *dest, const char *src, size_t n) { @@ -61,12 +37,6 @@ static inline char *strncpy(char *dest, const char *src, size_t n) return xdest; } -#define __HAVE_ARCH_STRCAT -#define strcat(d, s) ({ \ - char *__d = (d); \ - strcpy(__d + strlen(__d), (s)); \ -}) - #ifndef CONFIG_COLDFIRE #define __HAVE_ARCH_STRCMP static inline int strcmp(const char *cs, const char *ct) @@ -100,6 +70,4 @@ extern void *memset(void *, int, __kernel_size_t); extern void *memcpy(void *, const void *, __kernel_size_t); #define memcpy(d, s, n) __builtin_memcpy(d, s, n) -#endif - #endif /* _M68K_STRING_H_ */ diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h index 472c891a4aee..15901db435b9 100644 --- a/arch/m68k/include/asm/uaccess_mm.h +++ b/arch/m68k/include/asm/uaccess_mm.h @@ -90,7 +90,7 @@ asm volatile ("\n" \ __put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT); \ break; \ case 2: \ - __put_user_asm(__pu_err, __pu_val, ptr, w, d, -EFAULT); \ + __put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT); \ break; \ case 4: \ __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \ @@ -158,7 +158,7 @@ asm volatile ("\n" \ __get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT); \ break; \ case 2: \ - __get_user_asm(__gu_err, x, ptr, u16, w, d, -EFAULT); \ + __get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT); \ break; \ case 4: \ __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \ @@ -245,7 +245,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) __get_user_asm(res, *(u8 *)to, (u8 __user *)from, u8, b, d, 1); break; case 2: - __get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, d, 2); + __get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, r, 2); break; case 3: __constant_copy_from_user_asm(res, to, from, tmp, 3, w, b,); @@ -326,7 +326,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1); break; case 2: - __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, d, 2); + __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2); break; case 3: __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,); diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c index a972b00cd77d..8b7b22846366 100644 --- a/arch/m68k/kernel/asm-offsets.c +++ b/arch/m68k/kernel/asm-offsets.c @@ -77,7 +77,7 @@ int main(void) DEFINE(BIR_SIZE, offsetof(struct bi_record, size)); DEFINE(BIR_DATA, offsetof(struct bi_record, data)); - /* offsets into font_desc (drivers/video/console/font.h) */ + /* offsets into the font_desc struct */ DEFINE(FONT_DESC_IDX, offsetof(struct font_desc, idx)); DEFINE(FONT_DESC_NAME, offsetof(struct font_desc, name)); DEFINE(FONT_DESC_WIDTH, offsetof(struct font_desc, width)); diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index 6b32b64bac35..4d7da384eea0 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -101,7 +101,7 @@ void __init m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt) BUG_ON(IRQ_USER + cnt > NR_IRQS); m68k_first_user_vec = vec; for (i = 0; i < cnt; i++) - irq_set_chip(IRQ_USER + i, &user_irq_chip); + irq_set_chip_and_handler(i, &user_irq_chip, handle_simple_irq); *user_irqvec_fixup = vec - IRQ_USER; flush_icache(); } diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile index a9d782d34276..fcd8eb1d7c7d 100644 --- a/arch/m68k/lib/Makefile +++ b/arch/m68k/lib/Makefile @@ -6,7 +6,7 @@ lib-y := ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ memcpy.o memset.o memmove.o -lib-$(CONFIG_MMU) += string.o uaccess.o +lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_CPU_HAS_NO_MULDIV64) += mulsi3.o divsi3.o udivsi3.o lib-$(CONFIG_CPU_HAS_NO_MULDIV64) += modsi3.o umodsi3.o diff --git a/arch/m68k/lib/string.c b/arch/m68k/lib/string.c deleted file mode 100644 index 4d61fa8a112c..000000000000 --- a/arch/m68k/lib/string.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -#define __IN_STRING_C - -#include <linux/module.h> -#include <linux/string.h> - -char *strcpy(char *dest, const char *src) -{ - return __kernel_strcpy(dest, src); -} -EXPORT_SYMBOL(strcpy); - -char *strcat(char *dest, const char *src) -{ - return __kernel_strcpy(dest + strlen(dest), src); -} -EXPORT_SYMBOL(strcat); diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 5e97f2ee7c11..35d1442dee89 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c @@ -52,7 +52,7 @@ unsigned long __generic_copy_from_user(void *to, const void __user *from, " .long 3b,30b\n" " .long 5b,50b\n" " .previous" - : "=d" (res), "+a" (from), "+a" (to), "=&r" (tmp) + : "=d" (res), "+a" (from), "+a" (to), "=&d" (tmp) : "0" (n / 4), "d" (n & 3)); return res; @@ -96,7 +96,7 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from, " .long 7b,50b\n" " .long 8b,50b\n" " .previous" - : "=d" (res), "+a" (from), "+a" (to), "=&r" (tmp) + : "=d" (res), "+a" (from), "+a" (to), "=&d" (tmp) : "0" (n / 4), "d" (n & 3)); return res; @@ -141,7 +141,7 @@ unsigned long __clear_user(void __user *to, unsigned long n) " .long 7b,40b\n" " .previous" : "=d" (res), "+a" (to) - : "r" (0), "0" (n / 4), "d" (n & 3)); + : "d" (0), "0" (n / 4), "d" (n & 3)); return res; } diff --git a/arch/m68k/math-emu/fp_arith.c b/arch/m68k/math-emu/fp_arith.c index 08f286db3c5a..239eb1990184 100644 --- a/arch/m68k/math-emu/fp_arith.c +++ b/arch/m68k/math-emu/fp_arith.c @@ -519,7 +519,7 @@ static void fp_roundint(struct fp_ext *dest, int mode) return; break; case 0x401e: - if (!(oldmant.m32[1] >= 0)) + if (oldmant.m32[1] & 0x80000000) return; if (oldmant.m32[0] & 1) break; diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 1af2ca3411f6..6b4baa6e4d31 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -110,7 +110,7 @@ void __init paging_init(void) void free_initmem(void) { #ifndef CONFIG_MMU_SUN3 - free_initmem_default(0); + free_initmem_default(-1); #endif /* CONFIG_MMU_SUN3 */ } @@ -146,38 +146,11 @@ void __init print_memmap(void) MLK_ROUNDUP(__bss_start, __bss_stop)); } -void __init mem_init(void) +static inline void init_pointer_tables(void) { - pg_data_t *pgdat; - int codepages = 0; - int datapages = 0; - int initpages = 0; +#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) int i; - /* this will put all memory onto the freelists */ - totalram_pages = num_physpages = 0; - for_each_online_pgdat(pgdat) { - num_physpages += pgdat->node_present_pages; - - totalram_pages += free_all_bootmem_node(pgdat); - for (i = 0; i < pgdat->node_spanned_pages; i++) { - struct page *page = pgdat->node_mem_map + i; - char *addr = page_to_virt(page); - - if (!PageReserved(page)) - continue; - if (addr >= _text && - addr < _etext) - codepages++; - else if (addr >= __init_begin && - addr < __init_end) - initpages++; - else - datapages++; - } - } - -#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) /* insert pointer tables allocated so far into the tablelist */ init_pointer_table((unsigned long)kernel_pg_dir); for (i = 0; i < PTRS_PER_PGD; i++) { @@ -189,19 +162,20 @@ void __init mem_init(void) if (zero_pgtable) init_pointer_table((unsigned long)zero_pgtable); #endif +} - pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - totalram_pages << (PAGE_SHIFT-10), - codepages << (PAGE_SHIFT-10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10)); +void __init mem_init(void) +{ + /* this will put all memory onto the freelists */ + free_all_bootmem(); + init_pointer_tables(); + mem_init_print_info(NULL); print_memmap(); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/m68k/platform/coldfire/pci.c b/arch/m68k/platform/coldfire/pci.c index 8572246db84d..b33f97a13e6d 100644 --- a/arch/m68k/platform/coldfire/pci.c +++ b/arch/m68k/platform/coldfire/pci.c @@ -320,7 +320,6 @@ static int __init mcf_pci_init(void) pci_bus_size_bridges(rootbus); pci_bus_assign_resources(rootbus); pci_enable_bridges(rootbus); - pci_bus_add_devices(rootbus); return 0; } diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index ca0966cac72a..cab54482ca34 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -275,7 +275,7 @@ void dvma_init(void) } -inline unsigned long dvma_map_align(unsigned long kaddr, int len, int align) +unsigned long dvma_map_align(unsigned long kaddr, int len, int align) { unsigned long baddr; diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index dcd94406030e..cfd831c29824 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -30,6 +30,7 @@ config METAG select OF select OF_EARLY_FLATTREE select SPARSE_IRQ + select HAVE_DEBUG_STACKOVERFLOW config STACKTRACE_SUPPORT def_bool y diff --git a/arch/metag/Kconfig.debug b/arch/metag/Kconfig.debug index e45bbf6a7a5d..cb5c92860540 100644 --- a/arch/metag/Kconfig.debug +++ b/arch/metag/Kconfig.debug @@ -6,13 +6,6 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit. - config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on DEBUG_KERNEL diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc index ec079cfb7c6a..2a3c860c7525 100644 --- a/arch/metag/Kconfig.soc +++ b/arch/metag/Kconfig.soc @@ -14,6 +14,18 @@ config META21_FPGA help This is a Meta 2.1 FPGA bitstream, just a bare CPU. +config SOC_TZ1090 + bool "Toumaz Xenif TZ1090 SoC (Comet)" + select METAG_LNKGET_AROUND_CACHE + select METAG_META21 + select METAG_SMP_WRITE_REORDERING + select PINCTRL + select PINCTRL_TZ1090 + select PINCTRL_TZ1090_PDC + help + This is a Toumaz Technology Xenif TZ1090 (A.K.A. Comet) SoC containing + a 2-threaded HTP. + endchoice menu "SoC configuration" diff --git a/arch/metag/Makefile b/arch/metag/Makefile index b566116b171b..9739857bdedc 100644 --- a/arch/metag/Makefile +++ b/arch/metag/Makefile @@ -20,7 +20,7 @@ checkflags-$(CONFIG_METAG_META12) += -DMETAC_1_2 checkflags-$(CONFIG_METAG_META21) += -DMETAC_2_1 CHECKFLAGS += -D__metag__ $(checkflags-y) -KBUILD_DEFCONFIG := meta2_defconfig +KBUILD_DEFCONFIG := tz1090_defconfig sflags-$(CONFIG_METAG_META12) += -mmetac=1.2 ifeq ($(CONFIG_METAG_META12),y) diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore index a021da201156..2d6c0c160884 100644 --- a/arch/metag/boot/.gitignore +++ b/arch/metag/boot/.gitignore @@ -1,4 +1,4 @@ vmlinux* uImage* ramdisk.* -*.dtb +*.dtb* diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile index dbd95217733a..72c121879426 100644 --- a/arch/metag/boot/dts/Makefile +++ b/arch/metag/boot/dts/Makefile @@ -1,7 +1,9 @@ dtb-y += skeleton.dtb +dtb-y += tz1090_generic.dtb # Built-in dtb builtindtb-y := skeleton +builtindtb-$(CONFIG_SOC_TZ1090) := tz1090_generic ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"") builtindtb-y := $(patsubst "%",%,$(CONFIG_METAG_BUILTIN_DTB_NAME)) diff --git a/arch/metag/boot/dts/include/dt-bindings b/arch/metag/boot/dts/include/dt-bindings new file mode 120000 index 000000000000..08c00e4972fa --- /dev/null +++ b/arch/metag/boot/dts/include/dt-bindings @@ -0,0 +1 @@ +../../../../../include/dt-bindings
\ No newline at end of file diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts index 7244d1f0d555..7a49aeb365d0 100644 --- a/arch/metag/boot/dts/skeleton.dts +++ b/arch/metag/boot/dts/skeleton.dts @@ -7,4 +7,4 @@ */ /dts-v1/; -/include/ "skeleton.dtsi" +#include "skeleton.dtsi" diff --git a/arch/metag/boot/dts/tz1090.dtsi b/arch/metag/boot/dts/tz1090.dtsi new file mode 100644 index 000000000000..853744652b93 --- /dev/null +++ b/arch/metag/boot/dts/tz1090.dtsi @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "skeleton.dtsi" + +/ { + compatible = "toumaz,tz1090", "img,meta"; + + interrupt-parent = <&intc>; + + intc: interrupt-controller { + compatible = "img,meta-intc"; + interrupt-controller; + #interrupt-cells = <2>; + num-banks = <2>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + pinctrl: pinctrl@02005800 { + #gpio-range-cells = <3>; + compatible = "img,tz1090-pinctrl"; + reg = <0x02005800 0xe4>; + }; + + pdc_pinctrl: pinctrl@02006500 { + #gpio-range-cells = <3>; + compatible = "img,tz1090-pdc-pinctrl"; + reg = <0x02006500 0x100>; + }; + }; +}; diff --git a/arch/metag/boot/dts/tz1090_generic.dts b/arch/metag/boot/dts/tz1090_generic.dts new file mode 100644 index 000000000000..f96090955964 --- /dev/null +++ b/arch/metag/boot/dts/tz1090_generic.dts @@ -0,0 +1,10 @@ +/* + * Copyright (C) 2012 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +#include "tz1090.dtsi" diff --git a/arch/metag/configs/tz1090_defconfig b/arch/metag/configs/tz1090_defconfig new file mode 100644 index 000000000000..9f9316a6df27 --- /dev/null +++ b/arch/metag/configs/tz1090_defconfig @@ -0,0 +1,42 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_ELF_CORE is not set +CONFIG_SLAB=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_FLATMEM_MANUAL=y +CONFIG_SOC_TZ1090=y +CONFIG_METAG_HALT_ON_PANIC=y +# CONFIG_METAG_FPU is not set +CONFIG_METAG_DA=y +CONFIG_HZ_100=y +CONFIG_DEVTMPFS=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=1 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_DA_TTY=y +CONFIG_DA_CONSOLE=y +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +CONFIG_GPIOLIB=y +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_DNOTIFY is not set +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h index d04b48cefecc..9f8967f10f8c 100644 --- a/arch/metag/include/asm/bug.h +++ b/arch/metag/include/asm/bug.h @@ -6,7 +6,7 @@ struct pt_regs; extern const char *trap_name(int trapno); -extern void die(const char *str, struct pt_regs *regs, long err, - unsigned long addr) __attribute__ ((noreturn)); +extern void __noreturn die(const char *str, struct pt_regs *regs, long err, + unsigned long addr); #endif diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h index 999bf761a732..08dd1cc65799 100644 --- a/arch/metag/include/asm/checksum.h +++ b/arch/metag/include/asm/checksum.h @@ -64,7 +64,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __wsum sum) { unsigned long len_proto = (proto + len) << 8; - asm ("ADD %0, %0, %1\n" + asm ("ADDS %0, %0, %1\n" + "ADDCS %0, %0, #1\n" "ADDS %0, %0, %2\n" "ADDCS %0, %0, #1\n" "ADDS %0, %0, %3\n" diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h index 3e2915a280c7..ded4ab2e1fd0 100644 --- a/arch/metag/include/asm/clock.h +++ b/arch/metag/include/asm/clock.h @@ -19,6 +19,8 @@ * core frequency will be determined like this: * Meta 1: based on loops_per_jiffy. * Meta 2: (EXPAND_TIMER_DIV + 1) MHz. + * If a "core" clock is provided by the device tree, it + * will override this function. */ struct meta_clock_desc { unsigned long (*get_core_freq)(void); @@ -27,6 +29,12 @@ struct meta_clock_desc { extern struct meta_clock_desc _meta_clock; /* + * Perform platform clock initialisation, reading clocks from device tree etc. + * Only accessible during boot. + */ +void init_metag_clocks(void); + +/* * Set up the default clock, ensuring all callbacks are valid - only accessible * during boot. */ diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h index be0c8f3c5a5d..ad6bd0edbc3b 100644 --- a/arch/metag/include/asm/irq.h +++ b/arch/metag/include/asm/irq.h @@ -17,6 +17,7 @@ struct pt_regs; int tbisig_map(unsigned int hw); extern void do_IRQ(int irq, struct pt_regs *regs); +extern void init_IRQ(void); #ifdef CONFIG_METAG_SUSPEND_MEM int traps_save_context(void); diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index 9b029a7911c3..f16477d1f571 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -199,4 +199,6 @@ extern void (*soc_halt)(void); extern void show_trace(struct task_struct *tsk, unsigned long *sp, struct pt_regs *regs); +extern const struct seq_operations cpuinfo_op; + #endif diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c index 954548b1bea8..0a2385fa2a1d 100644 --- a/arch/metag/kernel/cachepart.c +++ b/arch/metag/kernel/cachepart.c @@ -100,22 +100,23 @@ void check_for_cache_aliasing(int thread_id) thread_cache_size = get_thread_cache_size(cache_type, thread_id); if (thread_cache_size < 0) - pr_emerg("Can't read %s cache size", \ + pr_emerg("Can't read %s cache size\n", cache_type ? "DCACHE" : "ICACHE"); else if (thread_cache_size == 0) /* Cache is off. No need to check for aliasing */ continue; if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) { - pr_emerg("Cache aliasing detected in %s on Thread %d", + pr_emerg("Potential cache aliasing detected in %s on Thread %d\n", cache_type ? "DCACHE" : "ICACHE", thread_id); - pr_warn("Total %s size: %u bytes", - cache_type ? "DCACHE" : "ICACHE ", + pr_warn("Total %s size: %u bytes\n", + cache_type ? "DCACHE" : "ICACHE", cache_type ? get_dcache_size() : get_icache_size()); - pr_warn("Thread %s size: %d bytes", + pr_warn("Thread %s size: %d bytes\n", cache_type ? "CACHE" : "ICACHE", thread_cache_size); - pr_warn("Page Size: %lu bytes", PAGE_SIZE); + pr_warn("Page Size: %lu bytes\n", PAGE_SIZE); + panic("Potential cache aliasing detected"); } } } diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c index defc84056f18..6339c9c6d0ab 100644 --- a/arch/metag/kernel/clock.c +++ b/arch/metag/kernel/clock.c @@ -8,8 +8,10 @@ * published by the Free Software Foundation. */ +#include <linux/clk.h> #include <linux/delay.h> #include <linux/io.h> +#include <linux/of.h> #include <asm/param.h> #include <asm/clock.h> @@ -34,8 +36,63 @@ static unsigned long get_core_freq_default(void) #endif } +static struct clk *clk_core; + +/* Clk based get_core_freq callback. */ +static unsigned long get_core_freq_clk(void) +{ + return clk_get_rate(clk_core); +} + +/** + * init_metag_core_clock() - Set up core clock from devicetree. + * + * Checks to see if a "core" clock is provided in the device tree, and overrides + * the get_core_freq callback to use it. + */ +static void __init init_metag_core_clock(void) +{ + /* + * See if a core clock is provided by the devicetree (and + * registered by the init callback above). + */ + struct device_node *node; + node = of_find_compatible_node(NULL, NULL, "img,meta"); + if (!node) { + pr_warn("%s: no compatible img,meta DT node found\n", + __func__); + return; + } + + clk_core = of_clk_get_by_name(node, "core"); + if (IS_ERR(clk_core)) { + pr_warn("%s: no core clock found in DT\n", + __func__); + return; + } + + /* + * Override the core frequency callback to use + * this clk. + */ + _meta_clock.get_core_freq = get_core_freq_clk; +} + +/** + * init_metag_clocks() - Set up clocks from devicetree. + * + * Set up important clocks from device tree. In particular any needed for clock + * sources. + */ +void __init init_metag_clocks(void) +{ + init_metag_core_clock(); + + pr_info("Core clock frequency: %lu Hz\n", get_coreclock()); +} + /** - * setup_meta_clocks() - Set up the Meta clock. + * setup_meta_clocks() - Early set up of the Meta clock. * @desc: Clock descriptor usually provided by machine description * * Ensures all callbacks are valid. diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c index 87707efeb0a3..2a2c9d55187e 100644 --- a/arch/metag/kernel/irq.c +++ b/arch/metag/kernel/irq.c @@ -25,7 +25,7 @@ static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; #endif -struct irq_domain *root_domain; +static struct irq_domain *root_domain; static unsigned int startup_meta_irq(struct irq_data *data) { @@ -279,11 +279,12 @@ static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu) { struct irq_desc *desc = irq_to_desc(irq); struct irq_chip *chip = irq_data_get_irq_chip(data); + unsigned long flags; - raw_spin_lock_irq(&desc->lock); + raw_spin_lock_irqsave(&desc->lock, flags); if (chip->irq_set_affinity) chip->irq_set_affinity(data, cpumask_of(cpu), false); - raw_spin_unlock_irq(&desc->lock); + raw_spin_unlock_irqrestore(&desc->lock, flags); } /* diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c index 50fcbec98cd2..beb377621322 100644 --- a/arch/metag/kernel/kick.c +++ b/arch/metag/kernel/kick.c @@ -26,6 +26,8 @@ * pass it as an argument. */ #include <linux/export.h> +#include <linux/hardirq.h> +#include <linux/irq.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/types.h> @@ -66,6 +68,7 @@ EXPORT_SYMBOL(kick_unregister_func); TBIRES kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) { + struct pt_regs *old_regs; struct kick_irq_handler *kh; struct list_head *lh; int handled = 0; @@ -79,6 +82,9 @@ kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) trace_hardirqs_off(); + old_regs = set_irq_regs((struct pt_regs *)State.Sig.pCtx); + irq_enter(); + /* * There is no need to disable interrupts here because we * can't nest KICK interrupts in a KICK interrupt handler. @@ -97,5 +103,8 @@ kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI) WARN_ON(!handled); + irq_exit(); + set_irq_regs(old_regs); + return tail_end(ret); } diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c index ec872ef14eb1..215c94ad63ac 100644 --- a/arch/metag/kernel/metag_ksyms.c +++ b/arch/metag/kernel/metag_ksyms.c @@ -1,5 +1,7 @@ #include <linux/export.h> +#include <linux/types.h> +#include <asm/checksum.h> #include <asm/div64.h> #include <asm/ftrace.h> #include <asm/page.h> @@ -15,6 +17,9 @@ EXPORT_SYMBOL(max_pfn); EXPORT_SYMBOL(min_low_pfn); #endif +/* Network checksum functions */ +EXPORT_SYMBOL(csum_partial); + /* TBI symbols */ EXPORT_SYMBOL(__TBI); EXPORT_SYMBOL(__TBIFindSeg); diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c index 4f5726f1a55b..c396cd0b425f 100644 --- a/arch/metag/kernel/setup.c +++ b/arch/metag/kernel/setup.c @@ -20,6 +20,7 @@ #include <linux/memblock.h> #include <linux/mm.h> #include <linux/of_fdt.h> +#include <linux/of_platform.h> #include <linux/pfn.h> #include <linux/root_dev.h> #include <linux/sched.h> @@ -424,6 +425,9 @@ static int __init customize_machine(void) /* customizes platform devices, or adds new ones */ if (machine_desc->init_machine) machine_desc->init_machine(); + else + of_platform_populate(NULL, of_default_bus_match_table, NULL, + NULL); return 0; } arch_initcall(customize_machine); @@ -587,20 +591,20 @@ PTBI pTBI_get(unsigned int cpu) EXPORT_SYMBOL(pTBI_get); #if defined(CONFIG_METAG_DSP) && defined(CONFIG_METAG_FPU) -char capabilites[] = "dsp fpu"; +static char capabilities[] = "dsp fpu"; #elif defined(CONFIG_METAG_DSP) -char capabilites[] = "dsp"; +static char capabilities[] = "dsp"; #elif defined(CONFIG_METAG_FPU) -char capabilites[] = "fpu"; +static char capabilities[] = "fpu"; #else -char capabilites[] = ""; +static char capabilities[] = ""; #endif static struct ctl_table caps_kern_table[] = { { .procname = "capabilities", - .data = capabilites, - .maxlen = sizeof(capabilites), + .data = capabilities, + .maxlen = sizeof(capabilities), .mode = 0444, .proc_handler = proc_dostring, }, diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index f443ec9a7cbe..e413875cf6d2 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include <linux/atomic.h> +#include <linux/completion.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/spinlock.h> @@ -62,6 +63,8 @@ static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { static DEFINE_SPINLOCK(boot_lock); +static DECLARE_COMPLETION(cpu_running); + /* * "thread" is assumed to be a valid Meta hardware thread ID. */ @@ -235,20 +238,12 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) */ ret = boot_secondary(thread, idle); if (ret == 0) { - unsigned long timeout; - /* * CPU was successfully started, wait for it * to come online or time out. */ - timeout = jiffies + HZ; - while (time_before(jiffies, timeout)) { - if (cpu_online(cpu)) - break; - - udelay(10); - barrier(); - } + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); if (!cpu_online(cpu)) ret = -EIO; @@ -276,7 +271,6 @@ static DECLARE_COMPLETION(cpu_killed); int __cpuexit __cpu_disable(void) { unsigned int cpu = smp_processor_id(); - struct task_struct *p; /* * Take this CPU offline. Once we clear this, we can't return, @@ -296,12 +290,7 @@ int __cpuexit __cpu_disable(void) flush_cache_all(); local_flush_tlb_all(); - read_lock(&tasklist_lock); - for_each_process(p) { - if (p->mm) - cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); - } - read_unlock(&tasklist_lock); + clear_tasks_mm_cpumask(cpu); return 0; } @@ -385,12 +374,7 @@ asmlinkage void secondary_start_kernel(void) setup_priv(); - /* - * Enable local interrupts. - */ - tbi_startup_interrupt(TBID_SIGNUM_TRT); notify_cpu_starting(cpu); - local_irq_enable(); pr_info("CPU%u (thread %u): Booted secondary processor\n", cpu, cpu_2_hwthread_id[cpu]); @@ -402,12 +386,13 @@ asmlinkage void secondary_start_kernel(void) * OK, now it's safe to let the boot CPU continue */ set_cpu_online(cpu, true); + complete(&cpu_running); /* - * Check for cache aliasing. - * Preemption is disabled + * Enable local interrupts. */ - check_for_cache_aliasing(cpu); + tbi_startup_interrupt(TBID_SIGNUM_TRT); + local_irq_enable(); /* * OK, it's off to the idle thread for us diff --git a/arch/metag/kernel/time.c b/arch/metag/kernel/time.c index 17dc10733b2f..f1c8c53dace7 100644 --- a/arch/metag/kernel/time.c +++ b/arch/metag/kernel/time.c @@ -5,11 +5,21 @@ * */ -#include <linux/init.h> - #include <clocksource/metag_generic.h> +#include <linux/clk-provider.h> +#include <linux/init.h> +#include <asm/clock.h> void __init time_init(void) { +#ifdef CONFIG_COMMON_CLK + /* Init clocks from device tree */ + of_clk_init(NULL); +#endif + + /* Init meta clocks, particularly the core clock */ + init_metag_clocks(); + + /* Set up the timer clock sources */ metag_generic_timer_init(); } diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 2ceeaae5b199..c00ade0228ef 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -33,6 +33,7 @@ #include <asm/siginfo.h> #include <asm/traps.h> #include <asm/hwthread.h> +#include <asm/setup.h> #include <asm/switch.h> #include <asm/user_gateway.h> #include <asm/syscall.h> @@ -87,8 +88,8 @@ const char *trap_name(int trapno) static DEFINE_SPINLOCK(die_lock); -void die(const char *str, struct pt_regs *regs, long err, - unsigned long addr) +void __noreturn die(const char *str, struct pt_regs *regs, + long err, unsigned long addr) { static int die_counter; diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c index 44d2e1913560..5d6a98a05e9d 100644 --- a/arch/metag/lib/checksum.c +++ b/arch/metag/lib/checksum.c @@ -124,7 +124,6 @@ __wsum csum_partial(const void *buff, int len, __wsum wsum) result += 1; return (__force __wsum)result; } -EXPORT_SYMBOL(csum_partial); /* * this routine is used for miscellaneous IP-like checksums, mainly diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c index b5d3b2e7c160..a62285284ab8 100644 --- a/arch/metag/mm/cache.c +++ b/arch/metag/mm/cache.c @@ -45,7 +45,7 @@ static volatile u32 lnkget_testdata[16] __initdata __aligned(64); #define LNKGET_CONSTANT 0xdeadbeef -void __init metag_lnkget_probe(void) +static void __init metag_lnkget_probe(void) { int temp; long flags; diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 2c75bf7357c5..8fddf46e6c62 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -224,8 +224,10 @@ do_sigbus: */ out_of_memory: up_read(&mm->mmap_sem); - if (user_mode(regs)) - do_group_exit(SIGKILL); + if (user_mode(regs)) { + pagefault_out_of_memory(); + return 1; + } no_context: /* Are we prepared to handle this kernel fault? */ diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d05b8455c44c..28813f164730 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -376,34 +376,21 @@ void __init paging_init(unsigned long mem_end) void __init mem_init(void) { - int nid; - #ifdef CONFIG_HIGHMEM unsigned long tmp; + + /* + * Explicitly reset zone->managed_pages because highmem pages are + * freed before calling free_all_bootmem(); + */ + reset_all_zones_managed_pages(); for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); - num_physpages += totalhigh_pages; #endif /* CONFIG_HIGHMEM */ - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - unsigned long node_pages = 0; - - num_physpages += pgdat->node_present_pages; - - if (pgdat->node_spanned_pages) - node_pages = free_all_bootmem_node(pgdat); - - totalram_pages += node_pages; - } - - pr_info("Memory: %luk/%luk available\n", - (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), - num_physpages << (PAGE_SHIFT - 10)); - + free_all_bootmem(); + mem_init_print_info(NULL); show_mem(0); - - return; } void free_initmem(void) @@ -414,7 +401,8 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 3649a8b150c0..deaf45ab6429 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y @@ -81,6 +80,9 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_SLAB=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_INFO=y +CONFIG_KGDB=y +CONFIG_KGDB_TESTS=y +CONFIG_KGDB_KDB=y CONFIG_EARLY_PRINTK=y CONFIG_KEYS=y CONFIG_ENCRYPTED_KEYS=y diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 85a5ae8e9bd0..fd850879854d 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -168,7 +168,6 @@ extern int page_is_ram(unsigned long pfn); # else /* CONFIG_MMU */ # define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) # define pfn_valid(pfn) ((pfn) < (max_mapnr + ARCH_PFN_OFFSET)) -# define VALID_PAGE(page) ((page - mem_map) < max_mapnr) # endif /* CONFIG_MMU */ # endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 6dece2d002dc..b14232b6878f 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -38,4 +38,7 @@ #define __ARCH_WANT_SYS_FORK #endif /* __ASSEMBLY__ */ + +#define __NR_syscalls 381 + #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h index 5f7fe7582f3a..20043b67d158 100644 --- a/arch/microblaze/include/uapi/asm/unistd.h +++ b/arch/microblaze/include/uapi/asm/unistd.h @@ -397,6 +397,4 @@ #define __NR_kcmp 379 #define __NR_finit_module 380 -#define __NR_syscalls 381 - #endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/kernel/kgdb.c b/arch/microblaze/kernel/kgdb.c index 8adc92443100..09a5e8286137 100644 --- a/arch/microblaze/kernel/kgdb.c +++ b/arch/microblaze/kernel/kgdb.c @@ -141,7 +141,7 @@ void kgdb_arch_exit(void) /* * Global data */ -const struct kgdb_arch arch_kgdb_ops = { +struct kgdb_arch arch_kgdb_ops = { #ifdef __MICROBLAZEEL__ .gdb_bpt_instr = {0x18, 0x00, 0x0c, 0xba}, /* brki r16, 0x18 */ #else diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index b38ae3acfeb4..74c7bcc1e82d 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -71,24 +71,17 @@ static void __init highmem_init(void) kmap_prot = PAGE_KERNEL; } -static unsigned long highmem_setup(void) +static void highmem_setup(void) { unsigned long pfn; - unsigned long reservedpages = 0; for (pfn = max_low_pfn; pfn < max_pfn; ++pfn) { struct page *page = pfn_to_page(pfn); /* FIXME not sure about */ - if (memblock_is_reserved(pfn << PAGE_SHIFT)) - continue; - free_highmem_page(page); - reservedpages++; + if (!memblock_is_reserved(pfn << PAGE_SHIFT)) + free_highmem_page(page); } - pr_info("High memory: %luk\n", - totalhigh_pages << (PAGE_SHIFT-10)); - - return reservedpages; } #endif /* CONFIG_HIGHMEM */ @@ -167,13 +160,12 @@ void __init setup_memory(void) * min_low_pfn - the first page (mm/bootmem.c - node_boot_start) * max_low_pfn * max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn) - * num_physpages - number of all pages */ /* memory start is from the kernel end (aligned) to higher addr */ min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */ /* RAM is assumed contiguous */ - num_physpages = max_mapnr = memory_size >> PAGE_SHIFT; + max_mapnr = memory_size >> PAGE_SHIFT; max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT; max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT; @@ -235,57 +227,26 @@ void __init setup_memory(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } void __init mem_init(void) { - pg_data_t *pgdat; - unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - high_memory = (void *)__va(memory_start + lowmem_size - 1); /* this will put all memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - for_each_online_pgdat(pgdat) { - unsigned long i; - struct page *page; - - for (i = 0; i < pgdat->node_spanned_pages; i++) { - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - page = pgdat_page_nr(pgdat, i); - if (PageReserved(page)) - reservedpages++; - } - } - + free_all_bootmem(); #ifdef CONFIG_HIGHMEM - reservedpages -= highmem_setup(); + highmem_setup(); #endif - codesize = (unsigned long)&_sdata - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_sdata; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; - bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; - - pr_info("Memory: %luk/%luk available (%luk kernel code, ", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10); - pr_cont("%luk reserved, %luk data, %luk bss, %luk init)\n", - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - bsssize >> 10, - initsize >> 10); - + mem_init_print_info(NULL); #ifdef CONFIG_MMU pr_info("Kernel virtual memory layout:\n"); pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index e433b90507fb..beeff436b22f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -42,6 +42,7 @@ config MIPS select MODULES_USE_ELF_REL if MODULES select MODULES_USE_ELF_RELA if MODULES && 64BIT select CLONE_BACKWARDS + select HAVE_DEBUG_STACKOVERFLOW menu "Machine selection" diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 5a43aa0798ca..37871f0de15e 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -67,15 +67,6 @@ config CMDLINE_OVERRIDE Normally, you will choose 'N' here. -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit(2GB on MIPS). The debugging option - provides another way to check stack overflow happened on kernel mode - stack usually caused by nested interruption. - config SMTC_IDLE_HOOK_DEBUG bool "Enable additional debug checks before going into CPU idle loop" depends on DEBUG_KERNEL && MIPS_MT_SMTC diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index a9505c4867e8..9c0ddafafb6c 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -845,6 +845,10 @@ int __init board_register_devices(void) !bcm63xx_nvram_get_mac_address(board.enet1.mac_addr)) bcm63xx_enet_register(1, &board.enet1); + if (board.has_enetsw && + !bcm63xx_nvram_get_mac_address(board.enetsw.mac_addr)) + bcm63xx_enetsw_register(&board.enetsw); + if (board.has_usbd) bcm63xx_usbd_register(&board.usbd); diff --git a/arch/mips/bcm63xx/dev-enet.c b/arch/mips/bcm63xx/dev-enet.c index 39c23366c5c7..52bc01df9bfe 100644 --- a/arch/mips/bcm63xx/dev-enet.c +++ b/arch/mips/bcm63xx/dev-enet.c @@ -9,16 +9,60 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/platform_device.h> +#include <linux/export.h> #include <bcm63xx_dev_enet.h> #include <bcm63xx_io.h> #include <bcm63xx_regs.h> +#ifdef BCMCPU_RUNTIME_DETECT +static const unsigned long bcm6348_regs_enetdmac[] = { + [ENETDMAC_CHANCFG] = ENETDMAC_CHANCFG_REG, + [ENETDMAC_IR] = ENETDMAC_IR_REG, + [ENETDMAC_IRMASK] = ENETDMAC_IRMASK_REG, + [ENETDMAC_MAXBURST] = ENETDMAC_MAXBURST_REG, +}; + +static const unsigned long bcm6345_regs_enetdmac[] = { + [ENETDMAC_CHANCFG] = ENETDMA_6345_CHANCFG_REG, + [ENETDMAC_IR] = ENETDMA_6345_IR_REG, + [ENETDMAC_IRMASK] = ENETDMA_6345_IRMASK_REG, + [ENETDMAC_MAXBURST] = ENETDMA_6345_MAXBURST_REG, + [ENETDMAC_BUFALLOC] = ENETDMA_6345_BUFALLOC_REG, + [ENETDMAC_RSTART] = ENETDMA_6345_RSTART_REG, + [ENETDMAC_FC] = ENETDMA_6345_FC_REG, + [ENETDMAC_LEN] = ENETDMA_6345_LEN_REG, +}; + +const unsigned long *bcm63xx_regs_enetdmac; +EXPORT_SYMBOL(bcm63xx_regs_enetdmac); + +static __init void bcm63xx_enetdmac_regs_init(void) +{ + if (BCMCPU_IS_6345()) + bcm63xx_regs_enetdmac = bcm6345_regs_enetdmac; + else + bcm63xx_regs_enetdmac = bcm6348_regs_enetdmac; +} +#else +static __init void bcm63xx_enetdmac_regs_init(void) { } +#endif + static struct resource shared_res[] = { { .start = -1, /* filled at runtime */ .end = -1, /* filled at runtime */ .flags = IORESOURCE_MEM, }, + { + .start = -1, /* filled at runtime */ + .end = -1, /* filled at runtime */ + .flags = IORESOURCE_MEM, + }, + { + .start = -1, /* filled at runtime */ + .end = -1, /* filled at runtime */ + .flags = IORESOURCE_MEM, + }, }; static struct platform_device bcm63xx_enet_shared_device = { @@ -94,6 +138,71 @@ static struct platform_device bcm63xx_enet1_device = { }, }; +static struct resource enetsw_res[] = { + { + /* start & end filled at runtime */ + .flags = IORESOURCE_MEM, + }, + { + /* start filled at runtime */ + .flags = IORESOURCE_IRQ, + }, + { + /* start filled at runtime */ + .flags = IORESOURCE_IRQ, + }, +}; + +static struct bcm63xx_enetsw_platform_data enetsw_pd; + +static struct platform_device bcm63xx_enetsw_device = { + .name = "bcm63xx_enetsw", + .num_resources = ARRAY_SIZE(enetsw_res), + .resource = enetsw_res, + .dev = { + .platform_data = &enetsw_pd, + }, +}; + +static int __init register_shared(void) +{ + int ret, chan_count; + + if (shared_device_registered) + return 0; + + bcm63xx_enetdmac_regs_init(); + + shared_res[0].start = bcm63xx_regset_address(RSET_ENETDMA); + shared_res[0].end = shared_res[0].start; + if (BCMCPU_IS_6345()) + shared_res[0].end += (RSET_6345_ENETDMA_SIZE) - 1; + else + shared_res[0].end += (RSET_ENETDMA_SIZE) - 1; + + if (BCMCPU_IS_6328() || BCMCPU_IS_6362() || BCMCPU_IS_6368()) + chan_count = 32; + else if (BCMCPU_IS_6345()) + chan_count = 8; + else + chan_count = 16; + + shared_res[1].start = bcm63xx_regset_address(RSET_ENETDMAC); + shared_res[1].end = shared_res[1].start; + shared_res[1].end += RSET_ENETDMAC_SIZE(chan_count) - 1; + + shared_res[2].start = bcm63xx_regset_address(RSET_ENETDMAS); + shared_res[2].end = shared_res[2].start; + shared_res[2].end += RSET_ENETDMAS_SIZE(chan_count) - 1; + + ret = platform_device_register(&bcm63xx_enet_shared_device); + if (ret) + return ret; + shared_device_registered = 1; + + return 0; +} + int __init bcm63xx_enet_register(int unit, const struct bcm63xx_enet_platform_data *pd) { @@ -104,22 +213,12 @@ int __init bcm63xx_enet_register(int unit, if (unit > 1) return -ENODEV; - if (unit == 1 && BCMCPU_IS_6338()) + if (unit == 1 && (BCMCPU_IS_6338() || BCMCPU_IS_6345())) return -ENODEV; - if (!shared_device_registered) { - shared_res[0].start = bcm63xx_regset_address(RSET_ENETDMA); - shared_res[0].end = shared_res[0].start; - if (BCMCPU_IS_6338()) - shared_res[0].end += (RSET_ENETDMA_SIZE / 2) - 1; - else - shared_res[0].end += (RSET_ENETDMA_SIZE) - 1; - - ret = platform_device_register(&bcm63xx_enet_shared_device); - if (ret) - return ret; - shared_device_registered = 1; - } + ret = register_shared(); + if (ret) + return ret; if (unit == 0) { enet0_res[0].start = bcm63xx_regset_address(RSET_ENET0); @@ -155,8 +254,62 @@ int __init bcm63xx_enet_register(int unit, dpd->phy_interrupt = bcm63xx_get_irq_number(IRQ_ENET_PHY); } + dpd->dma_chan_en_mask = ENETDMAC_CHANCFG_EN_MASK; + dpd->dma_chan_int_mask = ENETDMAC_IR_PKTDONE_MASK; + if (BCMCPU_IS_6345()) { + dpd->dma_chan_en_mask |= ENETDMAC_CHANCFG_CHAINING_MASK; + dpd->dma_chan_en_mask |= ENETDMAC_CHANCFG_WRAP_EN_MASK; + dpd->dma_chan_en_mask |= ENETDMAC_CHANCFG_FLOWC_EN_MASK; + dpd->dma_chan_int_mask |= ENETDMA_IR_BUFDONE_MASK; + dpd->dma_chan_int_mask |= ENETDMA_IR_NOTOWNER_MASK; + dpd->dma_chan_width = ENETDMA_6345_CHAN_WIDTH; + dpd->dma_desc_shift = ENETDMA_6345_DESC_SHIFT; + } else { + dpd->dma_has_sram = true; + dpd->dma_chan_width = ENETDMA_CHAN_WIDTH; + } + ret = platform_device_register(pdev); if (ret) return ret; return 0; } + +int __init +bcm63xx_enetsw_register(const struct bcm63xx_enetsw_platform_data *pd) +{ + int ret; + + if (!BCMCPU_IS_6328() && !BCMCPU_IS_6362() && !BCMCPU_IS_6368()) + return -ENODEV; + + ret = register_shared(); + if (ret) + return ret; + + enetsw_res[0].start = bcm63xx_regset_address(RSET_ENETSW); + enetsw_res[0].end = enetsw_res[0].start; + enetsw_res[0].end += RSET_ENETSW_SIZE - 1; + enetsw_res[1].start = bcm63xx_get_irq_number(IRQ_ENETSW_RXDMA0); + enetsw_res[2].start = bcm63xx_get_irq_number(IRQ_ENETSW_TXDMA0); + if (!enetsw_res[2].start) + enetsw_res[2].start = -1; + + memcpy(bcm63xx_enetsw_device.dev.platform_data, pd, sizeof(*pd)); + + if (BCMCPU_IS_6328()) + enetsw_pd.num_ports = ENETSW_PORTS_6328; + else if (BCMCPU_IS_6362() || BCMCPU_IS_6368()) + enetsw_pd.num_ports = ENETSW_PORTS_6368; + + enetsw_pd.dma_has_sram = true; + enetsw_pd.dma_chan_width = ENETDMA_CHAN_WIDTH; + enetsw_pd.dma_chan_en_mask = ENETDMAC_CHANCFG_EN_MASK; + enetsw_pd.dma_chan_int_mask = ENETDMAC_IR_PKTDONE_MASK; + + ret = platform_device_register(&bcm63xx_enetsw_device); + if (ret) + return ret; + + return 0; +} diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h index 336228990808..e6e65dc7d502 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h @@ -173,7 +173,10 @@ enum bcm63xx_regs_set { #define BCM_6358_RSET_SPI_SIZE 1804 #define BCM_6368_RSET_SPI_SIZE 1804 #define RSET_ENET_SIZE 2048 -#define RSET_ENETDMA_SIZE 2048 +#define RSET_ENETDMA_SIZE 256 +#define RSET_6345_ENETDMA_SIZE 64 +#define RSET_ENETDMAC_SIZE(chans) (16 * (chans)) +#define RSET_ENETDMAS_SIZE(chans) (16 * (chans)) #define RSET_ENETSW_SIZE 65536 #define RSET_UART_SIZE 24 #define RSET_UDC_SIZE 256 @@ -298,7 +301,7 @@ enum bcm63xx_regs_set { #define BCM_6345_USBDMA_BASE (0xfffe2800) #define BCM_6345_ENET0_BASE (0xfffe1800) #define BCM_6345_ENETDMA_BASE (0xfffe2800) -#define BCM_6345_ENETDMAC_BASE (0xfffe2900) +#define BCM_6345_ENETDMAC_BASE (0xfffe2840) #define BCM_6345_ENETDMAS_BASE (0xfffe2a00) #define BCM_6345_ENETSW_BASE (0xdeadbeef) #define BCM_6345_PCMCIA_BASE (0xfffe2028) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h index d53f611184b9..753953e86242 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_enet.h @@ -4,6 +4,8 @@ #include <linux/if_ether.h> #include <linux/init.h> +#include <bcm63xx_regs.h> + /* * on board ethernet platform data */ @@ -37,9 +39,129 @@ struct bcm63xx_enet_platform_data { int phy_id, int reg), void (*mii_write)(struct net_device *dev, int phy_id, int reg, int val)); + + /* DMA channel enable mask */ + u32 dma_chan_en_mask; + + /* DMA channel interrupt mask */ + u32 dma_chan_int_mask; + + /* DMA engine has internal SRAM */ + bool dma_has_sram; + + /* DMA channel register width */ + unsigned int dma_chan_width; + + /* DMA descriptor shift */ + unsigned int dma_desc_shift; +}; + +/* + * on board ethernet switch platform data + */ +#define ENETSW_MAX_PORT 8 +#define ENETSW_PORTS_6328 5 /* 4 FE PHY + 1 RGMII */ +#define ENETSW_PORTS_6368 6 /* 4 FE PHY + 2 RGMII */ + +#define ENETSW_RGMII_PORT0 4 + +struct bcm63xx_enetsw_port { + int used; + int phy_id; + + int bypass_link; + int force_speed; + int force_duplex_full; + + const char *name; +}; + +struct bcm63xx_enetsw_platform_data { + char mac_addr[ETH_ALEN]; + int num_ports; + struct bcm63xx_enetsw_port used_ports[ENETSW_MAX_PORT]; + + /* DMA channel enable mask */ + u32 dma_chan_en_mask; + + /* DMA channel interrupt mask */ + u32 dma_chan_int_mask; + + /* DMA channel register width */ + unsigned int dma_chan_width; + + /* DMA engine has internal SRAM */ + bool dma_has_sram; }; int __init bcm63xx_enet_register(int unit, const struct bcm63xx_enet_platform_data *pd); +int bcm63xx_enetsw_register(const struct bcm63xx_enetsw_platform_data *pd); + +enum bcm63xx_regs_enetdmac { + ENETDMAC_CHANCFG, + ENETDMAC_IR, + ENETDMAC_IRMASK, + ENETDMAC_MAXBURST, + ENETDMAC_BUFALLOC, + ENETDMAC_RSTART, + ENETDMAC_FC, + ENETDMAC_LEN, +}; + +static inline unsigned long bcm63xx_enetdmacreg(enum bcm63xx_regs_enetdmac reg) +{ +#ifdef BCMCPU_RUNTIME_DETECT + extern const unsigned long *bcm63xx_regs_enetdmac; + + return bcm63xx_regs_enetdmac[reg]; +#else +#ifdef CONFIG_BCM63XX_CPU_6345 + switch (reg) { + case ENETDMAC_CHANCFG: + return ENETDMA_6345_CHANCFG_REG; + case ENETDMAC_IR: + return ENETDMA_6345_IR_REG; + case ENETDMAC_IRMASK: + return ENETDMA_6345_IRMASK_REG; + case ENETDMAC_MAXBURST: + return ENETDMA_6345_MAXBURST_REG; + case ENETDMAC_BUFALLOC: + return ENETDMA_6345_BUFALLOC_REG; + case ENETDMAC_RSTART: + return ENETDMA_6345_RSTART_REG; + case ENETDMAC_FC: + return ENETDMA_6345_FC_REG; + case ENETDMAC_LEN: + return ENETDMA_6345_LEN_REG; + } +#endif +#if defined(CONFIG_BCM63XX_CPU_6328) || \ + defined(CONFIG_BCM63XX_CPU_6338) || \ + defined(CONFIG_BCM63XX_CPU_6348) || \ + defined(CONFIG_BCM63XX_CPU_6358) || \ + defined(CONFIG_BCM63XX_CPU_6362) || \ + defined(CONFIG_BCM63XX_CPU_6368) + switch (reg) { + case ENETDMAC_CHANCFG: + return ENETDMAC_CHANCFG_REG; + case ENETDMAC_IR: + return ENETDMAC_IR_REG; + case ENETDMAC_IRMASK: + return ENETDMAC_IRMASK_REG; + case ENETDMAC_MAXBURST: + return ENETDMAC_MAXBURST_REG; + case ENETDMAC_BUFALLOC: + case ENETDMAC_RSTART: + case ENETDMAC_FC: + case ENETDMAC_LEN: + return 0; + } +#endif +#endif + return 0; +} + + #endif /* ! BCM63XX_DEV_ENET_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 3203fe49b34d..eff7ca7d12b0 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -727,6 +727,8 @@ /************************************************************************* * _REG relative to RSET_ENETDMA *************************************************************************/ +#define ENETDMA_CHAN_WIDTH 0x10 +#define ENETDMA_6345_CHAN_WIDTH 0x40 /* Controller Configuration Register */ #define ENETDMA_CFG_REG (0x0) @@ -782,31 +784,56 @@ /* State Ram Word 4 */ #define ENETDMA_SRAM4_REG(x) (0x20c + (x) * 0x10) +/* Broadcom 6345 ENET DMA definitions */ +#define ENETDMA_6345_CHANCFG_REG (0x00) + +#define ENETDMA_6345_MAXBURST_REG (0x40) + +#define ENETDMA_6345_RSTART_REG (0x08) + +#define ENETDMA_6345_LEN_REG (0x0C) + +#define ENETDMA_6345_IR_REG (0x14) + +#define ENETDMA_6345_IRMASK_REG (0x18) + +#define ENETDMA_6345_FC_REG (0x1C) + +#define ENETDMA_6345_BUFALLOC_REG (0x20) + +/* Shift down for EOP, SOP and WRAP bits */ +#define ENETDMA_6345_DESC_SHIFT (3) /************************************************************************* * _REG relative to RSET_ENETDMAC *************************************************************************/ /* Channel Configuration register */ -#define ENETDMAC_CHANCFG_REG(x) ((x) * 0x10) +#define ENETDMAC_CHANCFG_REG (0x0) #define ENETDMAC_CHANCFG_EN_SHIFT 0 #define ENETDMAC_CHANCFG_EN_MASK (1 << ENETDMAC_CHANCFG_EN_SHIFT) #define ENETDMAC_CHANCFG_PKTHALT_SHIFT 1 #define ENETDMAC_CHANCFG_PKTHALT_MASK (1 << ENETDMAC_CHANCFG_PKTHALT_SHIFT) #define ENETDMAC_CHANCFG_BUFHALT_SHIFT 2 #define ENETDMAC_CHANCFG_BUFHALT_MASK (1 << ENETDMAC_CHANCFG_BUFHALT_SHIFT) +#define ENETDMAC_CHANCFG_CHAINING_SHIFT 2 +#define ENETDMAC_CHANCFG_CHAINING_MASK (1 << ENETDMAC_CHANCFG_CHAINING_SHIFT) +#define ENETDMAC_CHANCFG_WRAP_EN_SHIFT 3 +#define ENETDMAC_CHANCFG_WRAP_EN_MASK (1 << ENETDMAC_CHANCFG_WRAP_EN_SHIFT) +#define ENETDMAC_CHANCFG_FLOWC_EN_SHIFT 4 +#define ENETDMAC_CHANCFG_FLOWC_EN_MASK (1 << ENETDMAC_CHANCFG_FLOWC_EN_SHIFT) /* Interrupt Control/Status register */ -#define ENETDMAC_IR_REG(x) (0x4 + (x) * 0x10) +#define ENETDMAC_IR_REG (0x4) #define ENETDMAC_IR_BUFDONE_MASK (1 << 0) #define ENETDMAC_IR_PKTDONE_MASK (1 << 1) #define ENETDMAC_IR_NOTOWNER_MASK (1 << 2) /* Interrupt Mask register */ -#define ENETDMAC_IRMASK_REG(x) (0x8 + (x) * 0x10) +#define ENETDMAC_IRMASK_REG (0x8) /* Maximum Burst Length */ -#define ENETDMAC_MAXBURST_REG(x) (0xc + (x) * 0x10) +#define ENETDMAC_MAXBURST_REG (0xc) /************************************************************************* @@ -814,26 +841,76 @@ *************************************************************************/ /* Ring Start Address register */ -#define ENETDMAS_RSTART_REG(x) ((x) * 0x10) +#define ENETDMAS_RSTART_REG (0x0) /* State Ram Word 2 */ -#define ENETDMAS_SRAM2_REG(x) (0x4 + (x) * 0x10) +#define ENETDMAS_SRAM2_REG (0x4) /* State Ram Word 3 */ -#define ENETDMAS_SRAM3_REG(x) (0x8 + (x) * 0x10) +#define ENETDMAS_SRAM3_REG (0x8) /* State Ram Word 4 */ -#define ENETDMAS_SRAM4_REG(x) (0xc + (x) * 0x10) +#define ENETDMAS_SRAM4_REG (0xc) /************************************************************************* * _REG relative to RSET_ENETSW *************************************************************************/ +/* Port traffic control */ +#define ENETSW_PTCTRL_REG(x) (0x0 + (x)) +#define ENETSW_PTCTRL_RXDIS_MASK (1 << 0) +#define ENETSW_PTCTRL_TXDIS_MASK (1 << 1) + +/* Switch mode register */ +#define ENETSW_SWMODE_REG (0xb) +#define ENETSW_SWMODE_FWD_EN_MASK (1 << 1) + +/* IMP override Register */ +#define ENETSW_IMPOV_REG (0xe) +#define ENETSW_IMPOV_FORCE_MASK (1 << 7) +#define ENETSW_IMPOV_TXFLOW_MASK (1 << 5) +#define ENETSW_IMPOV_RXFLOW_MASK (1 << 4) +#define ENETSW_IMPOV_1000_MASK (1 << 3) +#define ENETSW_IMPOV_100_MASK (1 << 2) +#define ENETSW_IMPOV_FDX_MASK (1 << 1) +#define ENETSW_IMPOV_LINKUP_MASK (1 << 0) + +/* Port override Register */ +#define ENETSW_PORTOV_REG(x) (0x58 + (x)) +#define ENETSW_PORTOV_ENABLE_MASK (1 << 6) +#define ENETSW_PORTOV_TXFLOW_MASK (1 << 5) +#define ENETSW_PORTOV_RXFLOW_MASK (1 << 4) +#define ENETSW_PORTOV_1000_MASK (1 << 3) +#define ENETSW_PORTOV_100_MASK (1 << 2) +#define ENETSW_PORTOV_FDX_MASK (1 << 1) +#define ENETSW_PORTOV_LINKUP_MASK (1 << 0) + +/* MDIO control register */ +#define ENETSW_MDIOC_REG (0xb0) +#define ENETSW_MDIOC_EXT_MASK (1 << 16) +#define ENETSW_MDIOC_REG_SHIFT 20 +#define ENETSW_MDIOC_PHYID_SHIFT 25 +#define ENETSW_MDIOC_RD_MASK (1 << 30) +#define ENETSW_MDIOC_WR_MASK (1 << 31) + +/* MDIO data register */ +#define ENETSW_MDIOD_REG (0xb4) + +/* Global Management Configuration Register */ +#define ENETSW_GMCR_REG (0x200) +#define ENETSW_GMCR_RST_MIB_MASK (1 << 0) + /* MIB register */ #define ENETSW_MIB_REG(x) (0x2800 + (x) * 4) #define ENETSW_MIB_REG_COUNT 47 +/* Jumbo control register port mask register */ +#define ENETSW_JMBCTL_PORT_REG (0x4004) + +/* Jumbo control mib good frame register */ +#define ENETSW_JMBCTL_MAXSIZE_REG (0x4008) + /************************************************************************* * _REG relative to RSET_OHCI_PRIV diff --git a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h index 682bcf3b492a..d9aee1a833f3 100644 --- a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h +++ b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h @@ -24,6 +24,7 @@ struct board_info { /* enabled feature/device */ unsigned int has_enet0:1; unsigned int has_enet1:1; + unsigned int has_enetsw:1; unsigned int has_pci:1; unsigned int has_pccard:1; unsigned int has_ohci0:1; @@ -36,6 +37,7 @@ struct board_info { /* ethernet config */ struct bcm63xx_enet_platform_data enet0; struct bcm63xx_enet_platform_data enet1; + struct bcm63xx_enetsw_platform_data enetsw; /* USB config */ struct bcm63xx_usbd_platform_data usbd; diff --git a/arch/mips/include/asm/mach-jz4740/dma.h b/arch/mips/include/asm/mach-jz4740/dma.h index 98b4e7c0dbae..509cd5828044 100644 --- a/arch/mips/include/asm/mach-jz4740/dma.h +++ b/arch/mips/include/asm/mach-jz4740/dma.h @@ -16,8 +16,6 @@ #ifndef __ASM_MACH_JZ4740_DMA_H__ #define __ASM_MACH_JZ4740_DMA_H__ -struct jz4740_dma_chan; - enum jz4740_dma_request_type { JZ4740_DMA_TYPE_AUTO_REQUEST = 8, JZ4740_DMA_TYPE_UART_TRANSMIT = 20, @@ -33,58 +31,4 @@ enum jz4740_dma_request_type { JZ4740_DMA_TYPE_SLCD = 30, }; -enum jz4740_dma_width { - JZ4740_DMA_WIDTH_32BIT = 0, - JZ4740_DMA_WIDTH_8BIT = 1, - JZ4740_DMA_WIDTH_16BIT = 2, -}; - -enum jz4740_dma_transfer_size { - JZ4740_DMA_TRANSFER_SIZE_4BYTE = 0, - JZ4740_DMA_TRANSFER_SIZE_1BYTE = 1, - JZ4740_DMA_TRANSFER_SIZE_2BYTE = 2, - JZ4740_DMA_TRANSFER_SIZE_16BYTE = 3, - JZ4740_DMA_TRANSFER_SIZE_32BYTE = 4, -}; - -enum jz4740_dma_flags { - JZ4740_DMA_SRC_AUTOINC = 0x2, - JZ4740_DMA_DST_AUTOINC = 0x1, -}; - -enum jz4740_dma_mode { - JZ4740_DMA_MODE_SINGLE = 0, - JZ4740_DMA_MODE_BLOCK = 1, -}; - -struct jz4740_dma_config { - enum jz4740_dma_width src_width; - enum jz4740_dma_width dst_width; - enum jz4740_dma_transfer_size transfer_size; - enum jz4740_dma_request_type request_type; - enum jz4740_dma_flags flags; - enum jz4740_dma_mode mode; -}; - -typedef void (*jz4740_dma_complete_callback_t)(struct jz4740_dma_chan *, int, void *); - -struct jz4740_dma_chan *jz4740_dma_request(void *dev, const char *name); -void jz4740_dma_free(struct jz4740_dma_chan *dma); - -void jz4740_dma_configure(struct jz4740_dma_chan *dma, - const struct jz4740_dma_config *config); - - -void jz4740_dma_enable(struct jz4740_dma_chan *dma); -void jz4740_dma_disable(struct jz4740_dma_chan *dma); - -void jz4740_dma_set_src_addr(struct jz4740_dma_chan *dma, dma_addr_t src); -void jz4740_dma_set_dst_addr(struct jz4740_dma_chan *dma, dma_addr_t dst); -void jz4740_dma_set_transfer_count(struct jz4740_dma_chan *dma, uint32_t count); - -uint32_t jz4740_dma_get_residue(const struct jz4740_dma_chan *dma); - -void jz4740_dma_set_complete_cb(struct jz4740_dma_chan *dma, - jz4740_dma_complete_callback_t cb); - #endif /* __ASM_JZ4740_DMA_H__ */ diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h index 72cfebdb5a47..05988c2d6565 100644 --- a/arch/mips/include/asm/mach-jz4740/platform.h +++ b/arch/mips/include/asm/mach-jz4740/platform.h @@ -32,6 +32,7 @@ extern struct platform_device jz4740_codec_device; extern struct platform_device jz4740_adc_device; extern struct platform_device jz4740_wdt_device; extern struct platform_device jz4740_pwm_device; +extern struct platform_device jz4740_dma_device; void jz4740_serial_device_register(void); diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 3b211507be7f..6a07992ba6c6 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile index 63bad0e491d0..28e5535dfa9e 100644 --- a/arch/mips/jz4740/Makefile +++ b/arch/mips/jz4740/Makefile @@ -4,7 +4,7 @@ # Object file lists. -obj-y += prom.o irq.o time.o reset.o setup.o dma.o \ +obj-y += prom.o irq.o time.o reset.o setup.o \ gpio.o clock.o platform.o timer.o serial.o obj-$(CONFIG_DEBUG_FS) += clock-debugfs.o diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index be2b3deeef1d..8a5ec0eedeb0 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -438,6 +438,7 @@ static struct platform_device *jz_platform_devices[] __initdata = { &jz4740_rtc_device, &jz4740_adc_device, &jz4740_pwm_device, + &jz4740_dma_device, &qi_lb60_gpio_keys, &qi_lb60_pwm_beeper, &qi_lb60_charger_device, diff --git a/arch/mips/jz4740/clock.c b/arch/mips/jz4740/clock.c index 484d38a0864f..1b5f55426cad 100644 --- a/arch/mips/jz4740/clock.c +++ b/arch/mips/jz4740/clock.c @@ -687,7 +687,7 @@ static struct clk jz4740_clock_simple_clks[] = { [3] = { .name = "dma", .parent = &jz_clk_high_speed_peripheral.clk, - .gate_bit = JZ_CLOCK_GATE_UART0, + .gate_bit = JZ_CLOCK_GATE_DMAC, .ops = &jz_clk_simple_ops, }, [4] = { diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c deleted file mode 100644 index 317ec6fffb12..000000000000 --- a/arch/mips/jz4740/dma.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 SoC DMA support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> - -#include <linux/dma-mapping.h> -#include <asm/mach-jz4740/dma.h> -#include <asm/mach-jz4740/base.h> - -#define JZ_REG_DMA_SRC_ADDR(x) (0x00 + (x) * 0x20) -#define JZ_REG_DMA_DST_ADDR(x) (0x04 + (x) * 0x20) -#define JZ_REG_DMA_TRANSFER_COUNT(x) (0x08 + (x) * 0x20) -#define JZ_REG_DMA_REQ_TYPE(x) (0x0C + (x) * 0x20) -#define JZ_REG_DMA_STATUS_CTRL(x) (0x10 + (x) * 0x20) -#define JZ_REG_DMA_CMD(x) (0x14 + (x) * 0x20) -#define JZ_REG_DMA_DESC_ADDR(x) (0x18 + (x) * 0x20) - -#define JZ_REG_DMA_CTRL 0x300 -#define JZ_REG_DMA_IRQ 0x304 -#define JZ_REG_DMA_DOORBELL 0x308 -#define JZ_REG_DMA_DOORBELL_SET 0x30C - -#define JZ_DMA_STATUS_CTRL_NO_DESC BIT(31) -#define JZ_DMA_STATUS_CTRL_DESC_INV BIT(6) -#define JZ_DMA_STATUS_CTRL_ADDR_ERR BIT(4) -#define JZ_DMA_STATUS_CTRL_TRANSFER_DONE BIT(3) -#define JZ_DMA_STATUS_CTRL_HALT BIT(2) -#define JZ_DMA_STATUS_CTRL_COUNT_TERMINATE BIT(1) -#define JZ_DMA_STATUS_CTRL_ENABLE BIT(0) - -#define JZ_DMA_CMD_SRC_INC BIT(23) -#define JZ_DMA_CMD_DST_INC BIT(22) -#define JZ_DMA_CMD_RDIL_MASK (0xf << 16) -#define JZ_DMA_CMD_SRC_WIDTH_MASK (0x3 << 14) -#define JZ_DMA_CMD_DST_WIDTH_MASK (0x3 << 12) -#define JZ_DMA_CMD_INTERVAL_LENGTH_MASK (0x7 << 8) -#define JZ_DMA_CMD_BLOCK_MODE BIT(7) -#define JZ_DMA_CMD_DESC_VALID BIT(4) -#define JZ_DMA_CMD_DESC_VALID_MODE BIT(3) -#define JZ_DMA_CMD_VALID_IRQ_ENABLE BIT(2) -#define JZ_DMA_CMD_TRANSFER_IRQ_ENABLE BIT(1) -#define JZ_DMA_CMD_LINK_ENABLE BIT(0) - -#define JZ_DMA_CMD_FLAGS_OFFSET 22 -#define JZ_DMA_CMD_RDIL_OFFSET 16 -#define JZ_DMA_CMD_SRC_WIDTH_OFFSET 14 -#define JZ_DMA_CMD_DST_WIDTH_OFFSET 12 -#define JZ_DMA_CMD_TRANSFER_SIZE_OFFSET 8 -#define JZ_DMA_CMD_MODE_OFFSET 7 - -#define JZ_DMA_CTRL_PRIORITY_MASK (0x3 << 8) -#define JZ_DMA_CTRL_HALT BIT(3) -#define JZ_DMA_CTRL_ADDRESS_ERROR BIT(2) -#define JZ_DMA_CTRL_ENABLE BIT(0) - - -static void __iomem *jz4740_dma_base; -static spinlock_t jz4740_dma_lock; - -static inline uint32_t jz4740_dma_read(size_t reg) -{ - return readl(jz4740_dma_base + reg); -} - -static inline void jz4740_dma_write(size_t reg, uint32_t val) -{ - writel(val, jz4740_dma_base + reg); -} - -static inline void jz4740_dma_write_mask(size_t reg, uint32_t val, uint32_t mask) -{ - uint32_t val2; - val2 = jz4740_dma_read(reg); - val2 &= ~mask; - val2 |= val; - jz4740_dma_write(reg, val2); -} - -struct jz4740_dma_chan { - unsigned int id; - void *dev; - const char *name; - - enum jz4740_dma_flags flags; - uint32_t transfer_shift; - - jz4740_dma_complete_callback_t complete_cb; - - unsigned used:1; -}; - -#define JZ4740_DMA_CHANNEL(_id) { .id = _id } - -struct jz4740_dma_chan jz4740_dma_channels[] = { - JZ4740_DMA_CHANNEL(0), - JZ4740_DMA_CHANNEL(1), - JZ4740_DMA_CHANNEL(2), - JZ4740_DMA_CHANNEL(3), - JZ4740_DMA_CHANNEL(4), - JZ4740_DMA_CHANNEL(5), -}; - -struct jz4740_dma_chan *jz4740_dma_request(void *dev, const char *name) -{ - unsigned int i; - struct jz4740_dma_chan *dma = NULL; - - spin_lock(&jz4740_dma_lock); - - for (i = 0; i < ARRAY_SIZE(jz4740_dma_channels); ++i) { - if (!jz4740_dma_channels[i].used) { - dma = &jz4740_dma_channels[i]; - dma->used = 1; - break; - } - } - - spin_unlock(&jz4740_dma_lock); - - if (!dma) - return NULL; - - dma->dev = dev; - dma->name = name; - - return dma; -} -EXPORT_SYMBOL_GPL(jz4740_dma_request); - -void jz4740_dma_configure(struct jz4740_dma_chan *dma, - const struct jz4740_dma_config *config) -{ - uint32_t cmd; - - switch (config->transfer_size) { - case JZ4740_DMA_TRANSFER_SIZE_2BYTE: - dma->transfer_shift = 1; - break; - case JZ4740_DMA_TRANSFER_SIZE_4BYTE: - dma->transfer_shift = 2; - break; - case JZ4740_DMA_TRANSFER_SIZE_16BYTE: - dma->transfer_shift = 4; - break; - case JZ4740_DMA_TRANSFER_SIZE_32BYTE: - dma->transfer_shift = 5; - break; - default: - dma->transfer_shift = 0; - break; - } - - cmd = config->flags << JZ_DMA_CMD_FLAGS_OFFSET; - cmd |= config->src_width << JZ_DMA_CMD_SRC_WIDTH_OFFSET; - cmd |= config->dst_width << JZ_DMA_CMD_DST_WIDTH_OFFSET; - cmd |= config->transfer_size << JZ_DMA_CMD_TRANSFER_SIZE_OFFSET; - cmd |= config->mode << JZ_DMA_CMD_MODE_OFFSET; - cmd |= JZ_DMA_CMD_TRANSFER_IRQ_ENABLE; - - jz4740_dma_write(JZ_REG_DMA_CMD(dma->id), cmd); - jz4740_dma_write(JZ_REG_DMA_STATUS_CTRL(dma->id), 0); - jz4740_dma_write(JZ_REG_DMA_REQ_TYPE(dma->id), config->request_type); -} -EXPORT_SYMBOL_GPL(jz4740_dma_configure); - -void jz4740_dma_set_src_addr(struct jz4740_dma_chan *dma, dma_addr_t src) -{ - jz4740_dma_write(JZ_REG_DMA_SRC_ADDR(dma->id), src); -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_src_addr); - -void jz4740_dma_set_dst_addr(struct jz4740_dma_chan *dma, dma_addr_t dst) -{ - jz4740_dma_write(JZ_REG_DMA_DST_ADDR(dma->id), dst); -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_dst_addr); - -void jz4740_dma_set_transfer_count(struct jz4740_dma_chan *dma, uint32_t count) -{ - count >>= dma->transfer_shift; - jz4740_dma_write(JZ_REG_DMA_TRANSFER_COUNT(dma->id), count); -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_transfer_count); - -void jz4740_dma_set_complete_cb(struct jz4740_dma_chan *dma, - jz4740_dma_complete_callback_t cb) -{ - dma->complete_cb = cb; -} -EXPORT_SYMBOL_GPL(jz4740_dma_set_complete_cb); - -void jz4740_dma_free(struct jz4740_dma_chan *dma) -{ - dma->dev = NULL; - dma->complete_cb = NULL; - dma->used = 0; -} -EXPORT_SYMBOL_GPL(jz4740_dma_free); - -void jz4740_dma_enable(struct jz4740_dma_chan *dma) -{ - jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), - JZ_DMA_STATUS_CTRL_NO_DESC | JZ_DMA_STATUS_CTRL_ENABLE, - JZ_DMA_STATUS_CTRL_HALT | JZ_DMA_STATUS_CTRL_NO_DESC | - JZ_DMA_STATUS_CTRL_ENABLE); - - jz4740_dma_write_mask(JZ_REG_DMA_CTRL, - JZ_DMA_CTRL_ENABLE, - JZ_DMA_CTRL_HALT | JZ_DMA_CTRL_ENABLE); -} -EXPORT_SYMBOL_GPL(jz4740_dma_enable); - -void jz4740_dma_disable(struct jz4740_dma_chan *dma) -{ - jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, - JZ_DMA_STATUS_CTRL_ENABLE); -} -EXPORT_SYMBOL_GPL(jz4740_dma_disable); - -uint32_t jz4740_dma_get_residue(const struct jz4740_dma_chan *dma) -{ - uint32_t residue; - residue = jz4740_dma_read(JZ_REG_DMA_TRANSFER_COUNT(dma->id)); - return residue << dma->transfer_shift; -} -EXPORT_SYMBOL_GPL(jz4740_dma_get_residue); - -static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma) -{ - (void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id)); - - jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0, - JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE); - - if (dma->complete_cb) - dma->complete_cb(dma, 0, dma->dev); -} - -static irqreturn_t jz4740_dma_irq(int irq, void *dev_id) -{ - uint32_t irq_status; - unsigned int i; - - irq_status = readl(jz4740_dma_base + JZ_REG_DMA_IRQ); - - for (i = 0; i < 6; ++i) { - if (irq_status & (1 << i)) - jz4740_dma_chan_irq(&jz4740_dma_channels[i]); - } - - return IRQ_HANDLED; -} - -static int jz4740_dma_init(void) -{ - unsigned int ret; - - jz4740_dma_base = ioremap(JZ4740_DMAC_BASE_ADDR, 0x400); - - if (!jz4740_dma_base) - return -EBUSY; - - spin_lock_init(&jz4740_dma_lock); - - ret = request_irq(JZ4740_IRQ_DMAC, jz4740_dma_irq, 0, "DMA", NULL); - - if (ret) - printk(KERN_ERR "JZ4740 DMA: Failed to request irq: %d\n", ret); - - return ret; -} -arch_initcall(jz4740_dma_init); diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c index e9348fd26a35..df65677f3d0b 100644 --- a/arch/mips/jz4740/platform.c +++ b/arch/mips/jz4740/platform.c @@ -329,3 +329,24 @@ struct platform_device jz4740_pwm_device = { .name = "jz4740-pwm", .id = -1, }; + +/* DMA */ +static struct resource jz4740_dma_resources[] = { + { + .start = JZ4740_DMAC_BASE_ADDR, + .end = JZ4740_DMAC_BASE_ADDR + 0x400 - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = JZ4740_IRQ_DMAC, + .end = JZ4740_IRQ_DMAC, + .flags = IORESOURCE_IRQ, + }, +}; + +struct platform_device jz4740_dma_device = { + .name = "jz4740-dma", + .id = -1, + .num_resources = ARRAY_SIZE(jz4740_dma_resources), + .resource = jz4740_dma_resources, +}; diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c index 3be9e7bb30ff..f291cf99b03a 100644 --- a/arch/mips/kernel/crash_dump.c +++ b/arch/mips/kernel/crash_dump.c @@ -4,16 +4,6 @@ #include <asm/uaccess.h> #include <linux/slab.h> -static int __init parse_savemaxmem(char *p) -{ - if (p) - saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; - - return 1; -} -__setup("savemaxmem=", parse_savemaxmem); - - static void *kdump_buf_page; /** diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson/lemote-2f/clock.c index bc739d4bab2e..4dc2f5fa3f67 100644 --- a/arch/mips/loongson/lemote-2f/clock.c +++ b/arch/mips/loongson/lemote-2f/clock.c @@ -121,7 +121,8 @@ int clk_set_rate(struct clk *clk, unsigned long rate) clk->rate = rate; regval = LOONGSON_CHIPCFG0; - regval = (regval & ~0x7) | (loongson2_clockmod_table[i].index - 1); + regval = (regval & ~0x7) | + (loongson2_clockmod_table[i].driver_data - 1); LOONGSON_CHIPCFG0 = regval; return ret; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b973e0af9cb..4e73f10a7519 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -359,11 +359,24 @@ void __init paging_init(void) static struct kcore_list kcore_kseg0; #endif -void __init mem_init(void) +static inline void mem_init_free_highmem(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long tmp, ram; +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = pfn_to_page(tmp); + + if (!page_is_ram(tmp)) + SetPageReserved(page); + else + free_highmem_page(page); + } +#endif +} + +void __init mem_init(void) +{ #ifdef CONFIG_HIGHMEM #ifdef CONFIG_DISCONTIGMEM #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" @@ -374,34 +387,10 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - - reservedpages = ram = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - if (page_is_ram(tmp) && pfn_valid(tmp)) { - ram++; - if (PageReserved(pfn_to_page(tmp))) - reservedpages++; - } - num_physpages = ram; - -#ifdef CONFIG_HIGHMEM - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - if (!page_is_ram(tmp)) { - SetPageReserved(page); - continue; - } - free_highmem_page(page); - } - num_physpages += totalhigh_pages; -#endif - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + mem_init_free_highmem(); + mem_init_print_info(NULL); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -410,16 +399,6 @@ void __init mem_init(void) kclist_add(&kcore_kseg0, (void *) CKSEG0, 0x80000000 - 4, KCORE_TEXT); #endif - - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ @@ -440,7 +419,8 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 879077b01155..cb1ef9984069 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -89,7 +89,7 @@ static inline u32 ltq_calc_bar11mask(void) u32 mem, bar11mask; /* BAR11MASK value depends on available memory on system. */ - mem = num_physpages * PAGE_SIZE; + mem = get_num_physpages() * PAGE_SIZE; bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8; return bar11mask; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 1230f56429d7..a95c00f5fb96 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -357,8 +357,6 @@ static void __init szmem(void) int slot; cnodeid_t node; - num_physpages = 0; - for_each_online_node(node) { nodebytes = 0; for (slot = 0; slot < MAX_MEM_SLOTS; slot++) { @@ -381,7 +379,6 @@ static void __init szmem(void) slot = MAX_MEM_SLOTS; continue; } - num_physpages += slot_psize; memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)), PFN_PHYS(slot_psize), node); } @@ -480,32 +477,8 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, datasize, initsize, tmp; - unsigned node; - - high_memory = (void *) __va(num_physpages << PAGE_SHIFT); - - for_each_online_node(node) { - /* - * This will free up the bootmem, ie, slot 0 memory. - */ - totalram_pages += free_all_bootmem_node(NODE_DATA(node)); - } - + high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + free_all_bootmem(); setup_zero_pages(); /* This comes from node 0 */ - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - tmp = nr_free_pages(); - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - tmp << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - (num_physpages - tmp) << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); } diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index 729a50991780..b7eccbd17bf7 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -331,7 +331,8 @@ static int tx4939_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + if (event == NETDEV_CHANGE && netif_carrier_ok(dev)) { __u64 bit = 0; if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(0)) diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 428da175d073..70e4f663ebd2 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -13,6 +13,7 @@ config MN10300 select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION + select HAVE_DEBUG_STACKOVERFLOW config AM33_2 def_bool n diff --git a/arch/mn10300/Kconfig.debug b/arch/mn10300/Kconfig.debug index bdbfd444a9ff..94efb3ed223f 100644 --- a/arch/mn10300/Kconfig.debug +++ b/arch/mn10300/Kconfig.debug @@ -2,10 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - config DEBUG_DECOMPRESS_KERNEL bool "Using serial port during decompressing kernel" depends on DEBUG_KERNEL diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index b4ce844c9391..db80fd3e398b 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -74,4 +74,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index d48a84fd7fae..8a2e6ded9a44 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -345,9 +345,10 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - printk(KERN_ALERT "VM: killing process %s\n", tsk->comm); - if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) - do_exit(SIGKILL); + if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) { + pagefault_out_of_memory(); + return; + } goto no_context; do_sigbus: diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index 5a8ace63a6b4..97a1ec0beeec 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -99,43 +99,21 @@ void __init paging_init(void) */ void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int tmp; - BUG_ON(!mem_map); #define START_PFN (contig_page_data.bdata->node_min_pfn) #define MAX_LOW_PFN (contig_page_data.bdata->node_low_pfn) - max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN; + max_mapnr = MAX_LOW_PFN - START_PFN; high_memory = (void *) __va(MAX_LOW_PFN * PAGE_SIZE); /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reservedpages = 0; - for (tmp = 0; tmp < num_physpages; tmp++) - if (PageReserved(&mem_map[tmp])) - reservedpages++; - - codesize = (unsigned long) &_etext - (unsigned long) &_stext; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO - "Memory: %luk/%luk available" - " (%dk kernel code, %dk reserved, %dk data, %dk init," - " %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT - 10), - max_mapnr << (PAGE_SHIFT - 10), - codesize >> 10, - reservedpages << (PAGE_SHIFT - 10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT - 10)); + free_all_bootmem(); + + mem_init_print_info(NULL); } /* @@ -152,6 +130,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 1072bfd18c50..99dbab1c59ac 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -22,6 +22,7 @@ config OPENRISC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select MODULES_USE_ELF_RELA + select HAVE_DEBUG_STACKOVERFLOW config MMU def_bool y @@ -128,16 +129,6 @@ config CMDLINE menu "Debugging options" -config DEBUG_STACKOVERFLOW - bool "Check for kernel stack overflow" - default y - help - Make extra checks for space available on stack in some - critical functions. This will cause kernel to run a bit slower, - but will catch most of kernel stack overruns and exit gracefully. - - Say Y if you are unsure. - config JUMP_UPON_UNHANDLED_EXCEPTION bool "Try to die gracefully" default y diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index f20d01d9aaf9..195653e851da 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -66,3 +66,4 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += word-at-a-time.h +generic-y += xor.h diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index e2bfafce66c5..4a41f8493ab0 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -267,10 +267,10 @@ out_of_memory: __asm__ __volatile__("l.nop 1"); up_read(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_exit(SIGKILL); - goto no_context; + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index b3cbc6703837..7f94652311d7 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -202,56 +202,20 @@ void __init paging_init(void) /* References to section boundaries */ -static int __init free_pages_init(void) -{ - int reservedpages, pfn; - - /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); - - reservedpages = 0; - for (pfn = 0; pfn < max_low_pfn; pfn++) { - /* - * Only count reserved RAM pages - */ - if (PageReserved(mem_map + pfn)) - reservedpages++; - } - - return reservedpages; -} - -static void __init set_max_mapnr_init(void) -{ - max_mapnr = num_physpages = max_low_pfn; -} - void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - BUG_ON(!mem_map); - set_max_mapnr_init(); - + max_mapnr = max_low_pfn; high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* clear the zero-page */ memset((void *)empty_zero_page, 0, PAGE_SIZE); - reservedpages = free_pages_init(); - - codesize = (unsigned long)&_etext - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_etext; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; + /* this will put all low memory onto the freelists */ + free_all_bootmem(); - printk(KERN_INFO - "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), - max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, - reservedpages << (PAGE_SHIFT - 10), datasize >> 10, - initsize >> 10, (unsigned long)(0 << (PAGE_SHIFT - 10)) - ); + mem_init_print_info(NULL); printk("mem_init_done ...........................................\n"); mem_init_done = 1; @@ -261,11 +225,11 @@ void __init mem_init(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 2a2aea5aae5b..aa399a5259b6 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -27,6 +27,7 @@ config PARISC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS select TTY # Needed for pdc_cons.c + select HAVE_DEBUG_STACKOVERFLOW help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 08a332f6ee87..bc989e522a04 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -13,14 +13,3 @@ config DEBUG_RODATA If in doubt, say "N". endmenu - -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - default y - depends on DEBUG_KERNEL - ---help--- - Say Y here if you want to check the overflows of kernel, IRQ - and exception stacks. This option will cause messages of the - stacks in detail when free stack space drops below a certain - limit. - If in doubt, say "N". diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 96ec3982be8d..e02f665f804a 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -17,6 +17,8 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # +KBUILD_IMAGE := vmlinuz + KBUILD_DEFCONFIG := default_defconfig NM = sh $(srctree)/arch/parisc/nm @@ -92,7 +94,7 @@ PALOCONF := $(shell if [ -f $(src)/palo.conf ]; then echo $(src)/palo.conf; \ else echo $(obj)/palo.conf; \ fi) -palo: vmlinux +palo: vmlinuz @if test ! -x "$(PALO)"; then \ echo 'ERROR: Please install palo first (apt-get install palo)';\ echo 'or build it from source and install it somewhere in your $$PATH';\ @@ -107,10 +109,14 @@ palo: vmlinux fi $(PALO) -f $(PALOCONF) -# Shorthands for known targets not supported by parisc, use vmlinux as default -Image zImage bzImage: vmlinux +# Shorthands for known targets not supported by parisc, use vmlinux/vmlinuz as default +Image: vmlinux +zImage bzImage: vmlinuz + +vmlinuz: vmlinux + @gzip -cf -9 $< > $@ -install: vmlinux +install: vmlinuz sh $(src)/arch/parisc/install.sh \ $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)" @@ -119,6 +125,7 @@ MRPROPER_FILES += palo.conf define archhelp @echo '* vmlinux - Uncompressed kernel image (./vmlinux)' + @echo ' vmlinuz - Compressed kernel image (./vmlinuz)' @echo ' palo - Bootable image (./lifimage)' @echo ' install - Install kernel using' @echo ' (your) ~/bin/$(INSTALLKERNEL) or' diff --git a/arch/parisc/defpalo.conf b/arch/parisc/defpalo.conf index 4e1ae25b08d1..208ff3b41487 100644 --- a/arch/parisc/defpalo.conf +++ b/arch/parisc/defpalo.conf @@ -4,7 +4,7 @@ # Most people using 'make palo' want a bootable file, usable for # network or tape booting for example. --init-tape=lifimage ---recoverykernel=vmlinux +--recoverykernel=vmlinuz ########## Pick your ROOT here! ########## # You need at least one 'root='! @@ -12,10 +12,10 @@ # If you want a root ramdisk, use the next 2 lines # (Edit the ramdisk image name!!!!) --ramdisk=ram-disk-image-file ---commandline=0/vmlinux HOME=/ root=/dev/ram initrd=0/ramdisk +--commandline=0/vmlinuz HOME=/ root=/dev/ram initrd=0/ramdisk panic_timeout=60 panic=-1 # If you want NFS root, use the following command line (Edit the HOSTNAME!!!) -#--commandline=0/vmlinux HOME=/ root=/dev/nfs nfsroot=HOSTNAME ip=bootp +#--commandline=0/vmlinuz HOME=/ root=/dev/nfs nfsroot=HOSTNAME ip=bootp # If you have root on a disk partition, use this (Edit the partition name!!!) -#--commandline=0/vmlinux HOME=/ root=/dev/sda1 +#--commandline=0/vmlinuz HOME=/ root=/dev/sda1 diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index d306b75bc77f..e1509308899f 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -32,9 +32,12 @@ static inline void set_eiem(unsigned long val) cr; \ }) -#define mtsp(gr, cr) \ - __asm__ __volatile__("mtsp %0,%1" \ +#define mtsp(val, cr) \ + { if (__builtin_constant_p(val) && ((val) == 0)) \ + __asm__ __volatile__("mtsp %%r0,%0" : : "i" (cr) : "memory"); \ + else \ + __asm__ __volatile__("mtsp %0,%1" \ : /* no outputs */ \ - : "r" (gr), "i" (cr) : "memory") + : "r" (val), "i" (cr) : "memory"); } #endif /* __PARISC_SPECIAL_INSNS_H */ diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 5273da991e06..9d086a599fa0 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -63,13 +63,14 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - unsigned long flags; + unsigned long flags, sid; /* For one page, it's not worth testing the split_tlb variable */ mb(); - mtsp(vma->vm_mm->context,1); + sid = vma->vm_mm->context; purge_tlb_start(flags); + mtsp(sid, 1); pdtlb(addr); pitlb(addr); purge_tlb_end(flags); diff --git a/arch/parisc/include/uapi/asm/fcntl.h b/arch/parisc/include/uapi/asm/fcntl.h index 0304b92ccfea..cc61c475f277 100644 --- a/arch/parisc/include/uapi/asm/fcntl.h +++ b/arch/parisc/include/uapi/asm/fcntl.h @@ -20,6 +20,7 @@ #define O_INVISIBLE 004000000 /* invisible I/O, for DMAPI/XDSM */ #define O_PATH 020000000 +#define O_TMPFILE 040000000 #define F_GETLK64 8 #define F_SETLK64 9 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 70c512a386f7..f866fff9a004 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -73,6 +73,8 @@ #define SO_SELECT_ERR_QUEUE 0x4026 +#define SO_LL 0x4027 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index e593fc8d58bc..4da682b466d0 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -26,13 +26,13 @@ if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install -if [ -f $4/vmlinux ]; then - mv $4/vmlinux $4/vmlinux.old +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old fi if [ -f $4/System.map ]; then mv $4/System.map $4/System.old fi -cat $2 > $4/vmlinux +cat $2 > $4/vmlinuz cp $3 $4/System.map diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 65fb4cbc3a0f..2e65aa54bd10 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -440,8 +440,8 @@ void __flush_tlb_range(unsigned long sid, unsigned long start, else { unsigned long flags; - mtsp(sid, 1); purge_tlb_start(flags); + mtsp(sid, 1); if (split_tlb) { while (npages--) { pdtlb(start); diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index c8fb61ed32f4..8a96c8ab9fe6 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -371,10 +371,23 @@ show_cpuinfo (struct seq_file *m, void *v) seq_printf(m, "capabilities\t:"); if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS32) - seq_printf(m, " os32"); + seq_puts(m, " os32"); if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS64) - seq_printf(m, " os64"); - seq_printf(m, "\n"); + seq_puts(m, " os64"); + if (boot_cpu_data.pdc.capabilities & PDC_MODEL_IOPDIR_FDC) + seq_puts(m, " iopdir_fdc"); + switch (boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) { + case PDC_MODEL_NVA_SUPPORTED: + seq_puts(m, " nva_supported"); + break; + case PDC_MODEL_NVA_SLOW: + seq_puts(m, " nva_slow"); + break; + case PDC_MODEL_NVA_UNSUPPORTED: + seq_puts(m, " needs_equivalent_aliasing"); + break; + } + seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities); seq_printf(m, "model\t\t: %s\n" "model name\t: %s\n", diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index a49cc812df8a..ac4370b1ca40 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -2,6 +2,7 @@ * Optimized memory copy routines. * * Copyright (C) 2004 Randolph Chung <tausq@debian.org> + * Copyright (C) 2013 Helge Deller <deller@gmx.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -153,17 +154,21 @@ static inline void prefetch_dst(const void *addr) #define prefetch_dst(addr) do { } while(0) #endif +#define PA_MEMCPY_OK 0 +#define PA_MEMCPY_LOAD_ERROR 1 +#define PA_MEMCPY_STORE_ERROR 2 + /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words * per loop. This code is derived from glibc. */ -static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len) +static inline unsigned long copy_dstaligned(unsigned long dst, + unsigned long src, unsigned long len) { /* gcc complains that a2 and a3 may be uninitialized, but actually * they cannot be. Initialize a2/a3 to shut gcc up. */ register unsigned int a0, a1, a2 = 0, a3 = 0; int sh_1, sh_2; - struct exception_data *d; /* prefetch_src((const void *)src); */ @@ -197,7 +202,7 @@ static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src goto do2; case 0: if (len == 0) - return 0; + return PA_MEMCPY_OK; /* a3 = ((unsigned int *) src)[0]; a0 = ((unsigned int *) src)[1]; */ ldw(s_space, 0, src, a3, cda_ldw_exc); @@ -256,42 +261,35 @@ do0: preserve_branch(handle_load_error); preserve_branch(handle_store_error); - return 0; + return PA_MEMCPY_OK; handle_load_error: __asm__ __volatile__ ("cda_ldw_exc:\n"); - d = &__get_cpu_var(exception_data); - DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", - o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); - return o_len * 4 - d->fault_addr + o_src; + return PA_MEMCPY_LOAD_ERROR; handle_store_error: __asm__ __volatile__ ("cda_stw_exc:\n"); - d = &__get_cpu_var(exception_data); - DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", - o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); - return o_len * 4 - d->fault_addr + o_dst; + return PA_MEMCPY_STORE_ERROR; } -/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ -static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) +/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. + * In case of an access fault the faulty address can be read from the per_cpu + * exception data struct. */ +static unsigned long pa_memcpy_internal(void *dstp, const void *srcp, + unsigned long len) { register unsigned long src, dst, t1, t2, t3; register unsigned char *pcs, *pcd; register unsigned int *pws, *pwd; register double *pds, *pdd; - unsigned long ret = 0; - unsigned long o_dst, o_src, o_len; - struct exception_data *d; + unsigned long ret; src = (unsigned long)srcp; dst = (unsigned long)dstp; pcs = (unsigned char *)srcp; pcd = (unsigned char *)dstp; - o_dst = dst; o_src = src; o_len = len; - /* prefetch_src((const void *)srcp); */ if (len < THRESHOLD) @@ -401,7 +399,7 @@ byte_copy: len--; } - return 0; + return PA_MEMCPY_OK; unaligned_copy: /* possibly we are aligned on a word, but not on a double... */ @@ -438,8 +436,7 @@ unaligned_copy: src = (unsigned long)pcs; } - ret = copy_dstaligned(dst, src, len / sizeof(unsigned int), - o_dst, o_src, o_len); + ret = copy_dstaligned(dst, src, len / sizeof(unsigned int)); if (ret) return ret; @@ -454,17 +451,41 @@ unaligned_copy: handle_load_error: __asm__ __volatile__ ("pmc_load_exc:\n"); - d = &__get_cpu_var(exception_data); - DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", - o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); - return o_len - d->fault_addr + o_src; + return PA_MEMCPY_LOAD_ERROR; handle_store_error: __asm__ __volatile__ ("pmc_store_exc:\n"); + return PA_MEMCPY_STORE_ERROR; +} + + +/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ +static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) +{ + unsigned long ret, fault_addr, reference; + struct exception_data *d; + + ret = pa_memcpy_internal(dstp, srcp, len); + if (likely(ret == PA_MEMCPY_OK)) + return 0; + + /* if a load or store fault occured we can get the faulty addr */ d = &__get_cpu_var(exception_data); - DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", - o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); - return o_len - d->fault_addr + o_dst; + fault_addr = d->fault_addr; + + /* error in load or store? */ + if (ret == PA_MEMCPY_LOAD_ERROR) + reference = (unsigned long) srcp; + else + reference = (unsigned long) dstp; + + DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n", + ret, len, fault_addr, reference); + + if (fault_addr >= reference) + return len - (fault_addr - reference); + else + return len; } #ifdef __KERNEL__ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 505b56c6b9b9..b0f96c0e6316 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -214,7 +214,6 @@ static void __init setup_bootmem(void) mem_limit_func(); /* check for "mem=" argument */ mem_max = 0; - num_physpages = 0; for (i = 0; i < npmem_ranges; i++) { unsigned long rsize; @@ -229,10 +228,8 @@ static void __init setup_bootmem(void) npmem_ranges = i + 1; mem_max = mem_limit; } - num_physpages += pmem_ranges[i].pages; break; } - num_physpages += pmem_ranges[i].pages; mem_max += rsize; } @@ -532,7 +529,7 @@ void free_initmem(void) * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - num_physpages += free_initmem_default(0); + free_initmem_default(-1); /* set up a new led state on systems shipped LED State panel */ pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); @@ -580,8 +577,6 @@ unsigned long pcxl_dma_start __read_mostly; void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - /* Do sanity checks on page table constants */ BUILD_BUG_ON(PTE_ENTRY_SIZE != sizeof(pte_t)); BUILD_BUG_ON(PMD_ENTRY_SIZE != sizeof(pmd_t)); @@ -590,45 +585,8 @@ void __init mem_init(void) > BITS_PER_LONG); high_memory = __va((max_pfn << PAGE_SHIFT)); - -#ifndef CONFIG_DISCONTIGMEM - max_mapnr = page_to_pfn(virt_to_page(high_memory - 1)) + 1; - totalram_pages += free_all_bootmem(); -#else - { - int i; - - for (i = 0; i < npmem_ranges; i++) - totalram_pages += free_all_bootmem_node(NODE_DATA(i)); - } -#endif - - codesize = (unsigned long)_etext - (unsigned long)_text; - datasize = (unsigned long)_edata - (unsigned long)_etext; - initsize = (unsigned long)__init_end - (unsigned long)__init_begin; - - reservedpages = 0; -{ - unsigned long pfn; -#ifdef CONFIG_DISCONTIGMEM - int i; - - for (i = 0; i < npmem_ranges; i++) { - for (pfn = node_start_pfn(i); pfn < node_end_pfn(i); pfn++) { - if (PageReserved(pfn_to_page(pfn))) - reservedpages++; - } - } -#else /* !CONFIG_DISCONTIGMEM */ - for (pfn = 0; pfn < max_pfn; pfn++) { - /* - * Only count reserved RAM pages - */ - if (PageReserved(pfn_to_page(pfn))) - reservedpages++; - } -#endif -} + set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1); + free_all_bootmem(); #ifdef CONFIG_PA11 if (hppa_dma_ops == &pcxl_dma_ops) { @@ -643,15 +601,7 @@ void __init mem_init(void) parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START); #endif - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10 - ); - + mem_init_print_info(NULL); #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ printk("virtual kernel memory layout:\n" " vmalloc : 0x%p - 0x%p (%4ld MB)\n" @@ -1101,6 +1051,6 @@ void flush_tlb_all(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 508e3fe934d2..3bf72cd2c8fc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,6 +138,7 @@ config PPC select ARCH_USE_BUILTIN_BSWAP select OLD_SIGSUSPEND select OLD_SIGACTION if PPC32 + select HAVE_DEBUG_STACKOVERFLOW config EARLY_PRINTK bool @@ -298,7 +299,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" - depends on 4xx || 8xx || E200 || PPC_MPC832x || E500 + depends on 4xx || 8xx || PPC_MPC832x || BOOKE ---help--- Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -307,6 +308,10 @@ config MATH_EMULATION unit, which will allow programs that use floating-point instructions to run. + This is also useful to emulate missing (optional) instructions + such as fsqrt on cores that do have an FPU but do not implement + them (such as Freescale BookE). + config PPC_TRANSACTIONAL_MEM bool "Transactional Memory support for POWERPC" depends on PPC_BOOK3S_64 @@ -315,17 +320,6 @@ config PPC_TRANSACTIONAL_MEM ---help--- Support user-mode Transactional Memory on POWERPC. -config 8XX_MINIMAL_FPEMU - bool "Minimal math emulation for 8xx" - depends on 8xx && !MATH_EMULATION - help - Older arch/ppc kernels still emulated a few floating point - instructions such as load and store, even when full math - emulation is disabled. Say "Y" here if you want to preserve - this behavior. - - It is recommended that you build a soft-float userspace instead. - config IOMMU_HELPER def_bool PPC64 @@ -674,7 +668,6 @@ config SBUS config FSL_SOC bool - select HAVE_CAN_FLEXCAN if NET && CAN config FSL_PCI bool diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 863d877e0b5f..21c9f304e96c 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -28,13 +28,6 @@ config PRINT_STACK_DEPTH too small and stack traces cause important information to scroll off the screen. -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit. - config HCALL_STATS bool "Hypervisor call instrumentation" depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS @@ -147,6 +140,13 @@ choice enable debugging for the wrong type of machine your kernel _will not boot_. +config PPC_EARLY_DEBUG_BOOTX + bool "BootX or OpenFirmware" + depends on BOOTX_TEXT + help + Select this to enable early debugging for a machine using BootX + or OpenFirmware. + config PPC_EARLY_DEBUG_LPAR bool "LPAR HV Console" depends on PPC_PSERIES diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index b801dd06e573..d2c8a872308e 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -103,6 +103,11 @@ interrupts = <34 2>; }; + FPGA0: fpga@50000000 { + compatible = "ibm,currituck-fpga"; + reg = <0x50000000 0x4>; + }; + IIC0: i2c@00000000 { compatible = "ibm,iic-currituck", "ibm,iic"; reg = <0x0 0x00000014>; diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi new file mode 100644 index 000000000000..9cffccf4e07e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi @@ -0,0 +1,156 @@ +/* T4240 Interlaken LAC Portal device tree stub with 24 portals. + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#address-cells = <0x1>; +#size-cells = <0x1>; +compatible = "fsl,interlaken-lac-portals"; + +lportal0: lac-portal@0 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x0 0x1000>; +}; + +lportal1: lac-portal@1000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x1000 0x1000>; +}; + +lportal2: lac-portal@2000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x2000 0x1000>; +}; + +lportal3: lac-portal@3000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x3000 0x1000>; +}; + +lportal4: lac-portal@4000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x4000 0x1000>; +}; + +lportal5: lac-portal@5000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x5000 0x1000>; +}; + +lportal6: lac-portal@6000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x6000 0x1000>; +}; + +lportal7: lac-portal@7000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x7000 0x1000>; +}; + +lportal8: lac-portal@8000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x8000 0x1000>; +}; + +lportal9: lac-portal@9000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x9000 0x1000>; +}; + +lportal10: lac-portal@A000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0xA000 0x1000>; +}; + +lportal11: lac-portal@B000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0xB000 0x1000>; +}; + +lportal12: lac-portal@C000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0xC000 0x1000>; +}; + +lportal13: lac-portal@D000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0xD000 0x1000>; +}; + +lportal14: lac-portal@E000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0xE000 0x1000>; +}; + +lportal15: lac-portal@F000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0xF000 0x1000>; +}; + +lportal16: lac-portal@10000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x10000 0x1000>; +}; + +lportal17: lac-portal@11000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x11000 0x1000>; +}; + +lportal18: lac-portal@1200 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x12000 0x1000>; +}; + +lportal19: lac-portal@13000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x13000 0x1000>; +}; + +lportal20: lac-portal@14000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x14000 0x1000>; +}; + +lportal21: lac-portal@15000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x15000 0x1000>; +}; + +lportal22: lac-portal@16000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x16000 0x1000>; +}; + +lportal23: lac-portal@17000 { + compatible = "fsl,interlaken-lac-portal-v1.0"; + reg = <0x17000 0x1000>; +}; diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi new file mode 100644 index 000000000000..e8208720ac0e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi @@ -0,0 +1,45 @@ +/* + * T4 Interlaken Look-aside Controller (LAC) device tree stub + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +lac: lac@229000 { + compatible = "fsl,interlaken-lac"; + reg = <0x229000 0x1000>; + interrupts = <16 2 1 18>; +}; + +lac-hv@228000 { + compatible = "fsl,interlaken-lac-hv"; + reg = <0x228000 0x1000>; + fsl,non-hv-node = <&lac>; +}; diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 2a84fd7f631c..671a8f960afa 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -423,6 +423,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 07b7f2af2dca..1ea22fc24ea8 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -284,6 +284,8 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_LATENCYTOP=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_ECB=m diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 02ac96b679b8..2a5afac29861 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -138,6 +138,8 @@ CONFIG_DEBUG_STACK_USAGE=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index 0d0d981442fd..ee853a1b1b2c 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -1,7 +1,6 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_SPARSE_IRQ=y +CONFIG_NO_HZ=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set @@ -9,6 +8,7 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set CONFIG_PPC_MPC512x=y @@ -16,9 +16,7 @@ CONFIG_MPC5121_ADS=y CONFIG_MPC512x_GENERIC=y CONFIG_PDM360NG=y # CONFIG_PPC_PMAC is not set -CONFIG_NO_HZ=y CONFIG_HZ_1000=y -# CONFIG_MIGRATION is not set # CONFIG_SECCOMP is not set # CONFIG_PCI is not set CONFIG_NET=y @@ -33,8 +31,6 @@ CONFIG_IP_PNP=y # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_CAN=y -CONFIG_CAN_RAW=y -CONFIG_CAN_BCM=y CONFIG_CAN_VCAN=y CONFIG_CAN_MSCAN=y CONFIG_CAN_DEBUG_DEVICES=y @@ -46,7 +42,6 @@ CONFIG_DEVTMPFS_MOUNT=y # CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -60,7 +55,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_BLK_DEV_XIP=y -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_SCSI=y @@ -68,6 +62,7 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_NETDEVICES=y +CONFIG_FS_ENET=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_QSEMI_PHY=y @@ -83,10 +78,6 @@ CONFIG_STE10XP=y CONFIG_LSI_ET1011C_PHY=y CONFIG_FIXED_PHY=y CONFIG_MDIO_BITBANG=y -CONFIG_NET_ETHERNET=y -CONFIG_FS_ENET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y @@ -106,14 +97,18 @@ CONFIG_GPIO_SYSFS=y CONFIG_GPIO_MPC8XXX=y # CONFIG_HWMON is not set CONFIG_MEDIA_SUPPORT=y -CONFIG_VIDEO_DEV=y CONFIG_VIDEO_ADV_DEBUG=y -# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set -CONFIG_VIDEO_SAA711X=y CONFIG_FB=y CONFIG_FB_FSL_DIU=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_FSL=y +# CONFIG_USB_EHCI_HCD_PPC_OF is not set +CONFIG_USB_STORAGE=y +CONFIG_USB_GADGET=y +CONFIG_USB_FSL_USB2=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_RTC_DRV_MPC5121=y @@ -129,9 +124,7 @@ CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 165e6b32baef..152fa05b15e4 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -131,6 +131,7 @@ CONFIG_DUMMY=y CONFIG_FS_ENET=y CONFIG_UCC_GETH=y CONFIG_GIANFAR=y +CONFIG_E1000E=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_CICADA_PHY=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 29767a8dfea5..a73626b09051 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -350,6 +350,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index aef3f71de5ad..c86fcb92358e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -398,6 +398,8 @@ CONFIG_FTR_FIXUP_SELFTEST=y CONFIG_MSI_BITMAP_SELFTEST=y CONFIG_XMON=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index be1cb6ea3a36..20ebfaf7234b 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1264,6 +1264,8 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_XMON=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index c4dfbaf8b192..bea8587c3af5 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -296,6 +296,7 @@ CONFIG_SQUASHFS=m CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_PSTORE=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a80e32b46c11..09a8743143f3 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/string.h> +#include <linux/time.h> struct pci_dev; struct pci_bus; @@ -52,6 +53,7 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ +#define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ @@ -59,8 +61,10 @@ struct eeh_pe { int config_addr; /* Traditional PCI address */ int addr; /* PE configuration address */ struct pci_controller *phb; /* Associated PHB */ + struct pci_bus *bus; /* Top PCI bus for bus PE */ int check_count; /* Times of ignored error */ int freeze_count; /* Times of froze up */ + struct timeval tstamp; /* Time on first-time freeze */ int false_positives; /* Times of reported #ff's */ struct eeh_pe *parent; /* Parent PE */ struct list_head child_list; /* Link PE to the child list */ @@ -95,12 +99,12 @@ struct eeh_dev { static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) { - return edev->dn; + return edev ? edev->dn : NULL; } static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) { - return edev->pdev; + return edev ? edev->pdev : NULL; } /* @@ -130,8 +134,9 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) struct eeh_ops { char *name; int (*init)(void); + int (*post_init)(void); void* (*of_probe)(struct device_node *dn, void *flag); - void* (*dev_probe)(struct pci_dev *dev, void *flag); + int (*dev_probe)(struct pci_dev *dev, void *flag); int (*set_option)(struct eeh_pe *pe, int option); int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *state); @@ -141,11 +146,12 @@ struct eeh_ops { int (*configure_bridge)(struct eeh_pe *pe); int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); + int (*next_error)(struct eeh_pe **pe); }; extern struct eeh_ops *eeh_ops; extern int eeh_subsystem_enabled; -extern struct mutex eeh_mutex; +extern raw_spinlock_t confirm_error_lock; extern int eeh_probe_mode; #define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ @@ -166,14 +172,14 @@ static inline int eeh_probe_mode_dev(void) return (eeh_probe_mode == EEH_PROBE_MODE_DEV); } -static inline void eeh_lock(void) +static inline void eeh_serialize_lock(unsigned long *flags) { - mutex_lock(&eeh_mutex); + raw_spin_lock_irqsave(&confirm_error_lock, *flags); } -static inline void eeh_unlock(void) +static inline void eeh_serialize_unlock(unsigned long flags) { - mutex_unlock(&eeh_mutex); + raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } /* @@ -184,8 +190,11 @@ static inline void eeh_unlock(void) typedef void *(*eeh_traverse_func)(void *data, void *flag); int eeh_phb_pe_create(struct pci_controller *phb); +struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); +struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); int eeh_add_to_parent_pe(struct eeh_dev *edev); int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); +void eeh_pe_update_time_stamp(struct eeh_pe *pe); void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); @@ -193,12 +202,13 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void *eeh_dev_init(struct device_node *dn, void *data); void eeh_dev_phb_init_dynamic(struct pci_controller *phb); +int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dev_check_failure(struct eeh_dev *edev); -void __init eeh_addr_cache_build(void); +void eeh_addr_cache_build(void); void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); @@ -221,6 +231,11 @@ void eeh_remove_bus_device(struct pci_dev *, int); #else /* !CONFIG_EEH */ +static inline int eeh_init(void) +{ + return 0; +} + static inline void *eeh_dev_init(struct device_node *dn, void *data) { return NULL; @@ -245,9 +260,6 @@ static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } -static inline void eeh_lock(void) { } -static inline void eeh_unlock(void) { } - #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) #endif /* CONFIG_EEH */ diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index de67d830151b..89d5670b2eeb 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -31,7 +31,9 @@ struct eeh_event { struct eeh_pe *pe; /* EEH PE */ }; +int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); +void eeh_remove_event(struct eeh_pe *pe); void eeh_handle_event(struct eeh_pe *pe); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 46793b58a761..07ca627e52c0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -358,12 +358,12 @@ label##_relon_pSeries: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST_PR, vec) + EXC_STD, NOTEST, vec) #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ .globl label##_relon_pSeries; \ label##_relon_pSeries: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ @@ -374,12 +374,12 @@ label##_relon_hv: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_HV, KVMTEST, vec) + EXC_HV, NOTEST, vec) #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ .globl label##_relon_hv; \ label##_relon_hv: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) /* This associate vector numbers with bits in paca->irq_happened */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index f2498c8e595d..d750336b171d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -191,8 +191,14 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { } -#endif /* CONFIG_HUGETLB_PAGE */ +#define hugepd_shift(x) 0 +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + unsigned pdshift) +{ + return 0; +} +#endif /* CONFIG_HUGETLB_PAGE */ /* * FSL Book3E platforms require special gpage handling - the gpages diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h index 1a9d9aea21fa..088f95b2e14f 100644 --- a/arch/powerpc/include/asm/ibmebus.h +++ b/arch/powerpc/include/asm/ibmebus.h @@ -48,8 +48,8 @@ extern struct bus_type ibmebus_bus_type; -int ibmebus_register_driver(struct of_platform_driver *drv); -void ibmebus_unregister_driver(struct of_platform_driver *drv); +int ibmebus_register_driver(struct platform_driver *drv); +void ibmebus_unregister_driver(struct platform_driver *drv); int ibmebus_request_irq(u32 ist, irq_handler_t handler, unsigned long irq_flags, const char *devname, diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index cbfe678e3dbe..c34656a8925e 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -76,6 +76,9 @@ struct iommu_table { struct iommu_pool large_pool; struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ +#ifdef CONFIG_IOMMU_API + struct iommu_group *it_group; +#endif }; struct scatterlist; @@ -98,6 +101,8 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +extern void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num); extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, @@ -125,13 +130,6 @@ extern void iommu_init_early_pSeries(void); extern void iommu_init_early_dart(void); extern void iommu_init_early_pasemi(void); -#ifdef CONFIG_PCI -extern void pci_iommu_init(void); -extern void pci_direct_iommu_init(void); -#else -static inline void pci_iommu_init(void) { } -#endif - extern void alloc_dart_table(void); #if defined(CONFIG_PPC64) && defined(CONFIG_PM) static inline void iommu_save(void) @@ -147,5 +145,26 @@ static inline void iommu_restore(void) } #endif +/* The API to support IOMMU operations for VFIO */ +extern int iommu_tce_clear_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce_value, + unsigned long npages); +extern int iommu_tce_put_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce); +extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, + unsigned long hwaddr, enum dma_data_direction direction); +extern unsigned long iommu_clear_tce(struct iommu_table *tbl, + unsigned long entry); +extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, + unsigned long entry, unsigned long pages); +extern int iommu_put_tce_user_mode(struct iommu_table *tbl, + unsigned long entry, unsigned long tce); + +extern void iommu_flush_tce(struct iommu_table *tbl); +extern int iommu_take_ownership(struct iommu_table *tbl); +extern void iommu_release_ownership(struct iommu_table *tbl); + +extern enum dma_data_direction iommu_tce_direction(unsigned long tce); + #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 349ed85c7d61..08891d07aeb6 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -107,8 +107,9 @@ struct kvmppc_vcpu_book3s { #define CONTEXT_GUEST 1 #define CONTEXT_GUEST_END 2 -#define VSID_REAL 0x1fffffffffc00000ULL -#define VSID_BAT 0x1fffffffffb00000ULL +#define VSID_REAL 0x0fffffffffc00000ULL +#define VSID_BAT 0x0fffffffffb00000ULL +#define VSID_1T 0x1000000000000000ULL #define VSID_REAL_DR 0x2000000000000000ULL #define VSID_REAL_IR 0x4000000000000000ULL #define VSID_PR 0x8000000000000000ULL @@ -123,6 +124,7 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); +extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long addr, diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 9c1ff330c805..a1ecb14e4442 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -159,36 +159,46 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) } /* - * Lock and read a linux PTE. If it's present and writable, atomically - * set dirty and referenced bits and return the PTE, otherwise return 0. + * If it's present and writable, atomically set dirty and referenced bits and + * return the PTE, otherwise return 0. If we find a transparent hugepage + * and if it is marked splitting we return 0; */ -static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) +static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, + unsigned int hugepage) { - pte_t pte, tmp; - - /* wait until _PAGE_BUSY is clear then set it atomically */ - __asm__ __volatile__ ( - "1: ldarx %0,0,%3\n" - " andi. %1,%0,%4\n" - " bne- 1b\n" - " ori %1,%0,%4\n" - " stdcx. %1,0,%3\n" - " bne- 1b" - : "=&r" (pte), "=&r" (tmp), "=m" (*p) - : "r" (p), "i" (_PAGE_BUSY) - : "cc"); - - if (pte_present(pte)) { - pte = pte_mkyoung(pte); - if (writing && pte_write(pte)) - pte = pte_mkdirty(pte); - } + pte_t old_pte, new_pte = __pte(0); + + while (1) { + old_pte = pte_val(*ptep); + /* + * wait until _PAGE_BUSY is clear then set it atomically + */ + if (unlikely(old_pte & _PAGE_BUSY)) { + cpu_relax(); + continue; + } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* If hugepage and is trans splitting return None */ + if (unlikely(hugepage && + pmd_trans_splitting(pte_pmd(old_pte)))) + return __pte(0); +#endif + /* If pte is not present return None */ + if (unlikely(!(old_pte & _PAGE_PRESENT))) + return __pte(0); - *p = pte; /* clears _PAGE_BUSY */ + new_pte = pte_mkyoung(old_pte); + if (writing && pte_write(old_pte)) + new_pte = pte_mkdirty(new_pte); - return pte; + if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte, + new_pte)) + break; + } + return new_pte; } + /* Return HPTE cache control bits corresponding to Linux pte bits */ static inline unsigned long hpte_cache_bits(unsigned long pte_val) { diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index b1e7f2af1016..9b12f88d4adb 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -66,7 +66,8 @@ struct lppaca { u8 reserved6[48]; u8 cede_latency_hint; - u8 reserved7[7]; + u8 ebb_regs_in_use; + u8 reserved7[6]; u8 dtl_enable_mask; /* Dispatch Trace Log mask */ u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */ u8 fpregs_in_use; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 92386fc4e82a..8b480901165a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -36,13 +36,13 @@ struct machdep_calls { #ifdef CONFIG_PPC64 void (*hpte_invalidate)(unsigned long slot, unsigned long vpn, - int psize, int ssize, - int local); + int bpsize, int apsize, + int ssize, int local); long (*hpte_updatepp)(unsigned long slot, unsigned long newpp, unsigned long vpn, - int psize, int ssize, - int local); + int bpsize, int apsize, + int ssize, int local); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); @@ -57,6 +57,9 @@ struct machdep_calls { void (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); + void (*hugepage_invalidate)(struct mm_struct *mm, + unsigned char *hpte_slot_array, + unsigned long addr, int psize); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 2accc9611248..c4cf01197273 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -340,6 +340,20 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, unsigned int shift, unsigned int mmu_psize); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern int __hash_page_thp(unsigned long ea, unsigned long access, + unsigned long vsid, pmd_t *pmdp, unsigned long trap, + int local, int ssize, unsigned int psize); +#else +static inline int __hash_page_thp(unsigned long ea, unsigned long access, + unsigned long vsid, pmd_t *pmdp, + unsigned long trap, int local, + int ssize, unsigned int psize) +{ + BUG(); + return -1; +} +#endif extern void hash_failure_debug(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long trap, int ssize, int psize, int lpsize, diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h index 885c040d6194..8ae133eaf9fa 100644 --- a/arch/powerpc/include/asm/mpc5121.h +++ b/arch/powerpc/include/asm/mpc5121.h @@ -68,6 +68,5 @@ struct mpc512x_lpc { }; int mpc512x_cs_config(unsigned int cs, u32 val); -int __init mpc5121_clk_init(void); #endif /* __ASM_POWERPC_MPC5121_H__ */ diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index c0f9ef90f0b8..4a1ac9fbf186 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -339,6 +339,8 @@ struct mpic #endif }; +extern struct bus_type mpic_subsys; + /* * MPIC flags (passed to mpic_alloc) * @@ -393,6 +395,9 @@ struct mpic #define MPIC_REGSET_STANDARD MPIC_REGSET(0) /* Original MPIC */ #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ +/* Get the version of primary MPIC */ +extern u32 fsl_mpic_primary_get_version(void); + /* Allocate the controller structure and setup the linux irq descs * for the range if interrupts passed in. No HW initialization is * actually performed. diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h new file mode 100644 index 000000000000..0e23cd4ac8aa --- /dev/null +++ b/arch/powerpc/include/asm/mpic_timer.h @@ -0,0 +1,46 @@ +/* + * arch/powerpc/include/asm/mpic_timer.h + * + * Header file for Mpic Global Timer + * + * Copyright 2013 Freescale Semiconductor, Inc. + * + * Author: Wang Dongsheng <Dongsheng.Wang@freescale.com> + * Li Yang <leoli@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __MPIC_TIMER__ +#define __MPIC_TIMER__ + +#include <linux/interrupt.h> +#include <linux/time.h> + +struct mpic_timer { + void *dev; + struct cascade_priv *cascade_handle; + unsigned int num; + unsigned int irq; +}; + +#ifdef CONFIG_MPIC_TIMER +struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, + const struct timeval *time); +void mpic_start_timer(struct mpic_timer *handle); +void mpic_stop_timer(struct mpic_timer *handle); +void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time); +void mpic_free_timer(struct mpic_timer *handle); +#else +struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, + const struct timeval *time) { return NULL; } +void mpic_start_timer(struct mpic_timer *handle) { } +void mpic_stop_timer(struct mpic_timer *handle) { } +void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { } +void mpic_free_timer(struct mpic_timer *handle) { } +#endif + +#endif diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index cbb9305ab15a..029fe85722aa 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -117,7 +117,13 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_SET_SLOT_LED_STATUS 55 #define OPAL_GET_EPOW_STATUS 56 #define OPAL_SET_SYSTEM_ATTENTION_LED 57 +#define OPAL_RESERVED1 58 +#define OPAL_RESERVED2 59 +#define OPAL_PCI_NEXT_ERROR 60 +#define OPAL_PCI_EEH_FREEZE_STATUS2 61 +#define OPAL_PCI_POLL 62 #define OPAL_PCI_MSI_EOI 63 +#define OPAL_PCI_GET_PHB_DIAG_DATA2 64 #ifndef __ASSEMBLY__ @@ -125,6 +131,7 @@ extern int opal_enter_rtas(struct rtas_args *args, enum OpalVendorApiTokens { OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999 }; + enum OpalFreezeState { OPAL_EEH_STOPPED_NOT_FROZEN = 0, OPAL_EEH_STOPPED_MMIO_FREEZE = 1, @@ -134,55 +141,69 @@ enum OpalFreezeState { OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5, OPAL_EEH_STOPPED_PERM_UNAVAIL = 6 }; + enum OpalEehFreezeActionToken { OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 }; + enum OpalPciStatusToken { - OPAL_EEH_PHB_NO_ERROR = 0, - OPAL_EEH_PHB_FATAL = 1, - OPAL_EEH_PHB_RECOVERABLE = 2, - OPAL_EEH_PHB_BUS_ERROR = 3, - OPAL_EEH_PCI_NO_DEVSEL = 4, - OPAL_EEH_PCI_TA = 5, - OPAL_EEH_PCIEX_UR = 6, - OPAL_EEH_PCIEX_CA = 7, - OPAL_EEH_PCI_MMIO_ERROR = 8, - OPAL_EEH_PCI_DMA_ERROR = 9 + OPAL_EEH_NO_ERROR = 0, + OPAL_EEH_IOC_ERROR = 1, + OPAL_EEH_PHB_ERROR = 2, + OPAL_EEH_PE_ERROR = 3, + OPAL_EEH_PE_MMIO_ERROR = 4, + OPAL_EEH_PE_DMA_ERROR = 5 }; + +enum OpalPciErrorSeverity { + OPAL_EEH_SEV_NO_ERROR = 0, + OPAL_EEH_SEV_IOC_DEAD = 1, + OPAL_EEH_SEV_PHB_DEAD = 2, + OPAL_EEH_SEV_PHB_FENCED = 3, + OPAL_EEH_SEV_PE_ER = 4, + OPAL_EEH_SEV_INF = 5 +}; + enum OpalShpcAction { OPAL_SHPC_GET_LINK_STATE = 0, OPAL_SHPC_GET_SLOT_STATE = 1 }; + enum OpalShpcLinkState { OPAL_SHPC_LINK_DOWN = 0, OPAL_SHPC_LINK_UP = 1 }; + enum OpalMmioWindowType { OPAL_M32_WINDOW_TYPE = 1, OPAL_M64_WINDOW_TYPE = 2, OPAL_IO_WINDOW_TYPE = 3 }; + enum OpalShpcSlotState { OPAL_SHPC_DEV_NOT_PRESENT = 0, OPAL_SHPC_DEV_PRESENT = 1 }; + enum OpalExceptionHandler { OPAL_MACHINE_CHECK_HANDLER = 1, OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2, OPAL_SOFTPATCH_HANDLER = 3 }; + enum OpalPendingState { - OPAL_EVENT_OPAL_INTERNAL = 0x1, - OPAL_EVENT_NVRAM = 0x2, - OPAL_EVENT_RTC = 0x4, - OPAL_EVENT_CONSOLE_OUTPUT = 0x8, - OPAL_EVENT_CONSOLE_INPUT = 0x10, - OPAL_EVENT_ERROR_LOG_AVAIL = 0x20, - OPAL_EVENT_ERROR_LOG = 0x40, - OPAL_EVENT_EPOW = 0x80, - OPAL_EVENT_LED_STATUS = 0x100 + OPAL_EVENT_OPAL_INTERNAL = 0x1, + OPAL_EVENT_NVRAM = 0x2, + OPAL_EVENT_RTC = 0x4, + OPAL_EVENT_CONSOLE_OUTPUT = 0x8, + OPAL_EVENT_CONSOLE_INPUT = 0x10, + OPAL_EVENT_ERROR_LOG_AVAIL = 0x20, + OPAL_EVENT_ERROR_LOG = 0x40, + OPAL_EVENT_EPOW = 0x80, + OPAL_EVENT_LED_STATUS = 0x100, + OPAL_EVENT_PCI_ERROR = 0x200 }; /* Machine check related definitions */ @@ -364,15 +385,80 @@ struct opal_machine_check_event { } u; }; +enum { + OPAL_P7IOC_DIAG_TYPE_NONE = 0, + OPAL_P7IOC_DIAG_TYPE_RGC = 1, + OPAL_P7IOC_DIAG_TYPE_BI = 2, + OPAL_P7IOC_DIAG_TYPE_CI = 3, + OPAL_P7IOC_DIAG_TYPE_MISC = 4, + OPAL_P7IOC_DIAG_TYPE_I2C = 5, + OPAL_P7IOC_DIAG_TYPE_LAST = 6 +}; + +struct OpalIoP7IOCErrorData { + uint16_t type; + + /* GEM */ + uint64_t gemXfir; + uint64_t gemRfir; + uint64_t gemRirqfir; + uint64_t gemMask; + uint64_t gemRwof; + + /* LEM */ + uint64_t lemFir; + uint64_t lemErrMask; + uint64_t lemAction0; + uint64_t lemAction1; + uint64_t lemWof; + + union { + struct OpalIoP7IOCRgcErrorData { + uint64_t rgcStatus; /* 3E1C10 */ + uint64_t rgcLdcp; /* 3E1C18 */ + }rgc; + struct OpalIoP7IOCBiErrorData { + uint64_t biLdcp0; /* 3C0100, 3C0118 */ + uint64_t biLdcp1; /* 3C0108, 3C0120 */ + uint64_t biLdcp2; /* 3C0110, 3C0128 */ + uint64_t biFenceStatus; /* 3C0130, 3C0130 */ + + uint8_t biDownbound; /* BI Downbound or Upbound */ + }bi; + struct OpalIoP7IOCCiErrorData { + uint64_t ciPortStatus; /* 3Dn008 */ + uint64_t ciPortLdcp; /* 3Dn010 */ + + uint8_t ciPort; /* Index of CI port: 0/1 */ + }ci; + }; +}; + /** * This structure defines the overlay which will be used to store PHB error * data upon request. */ enum { + OPAL_PHB_ERROR_DATA_VERSION_1 = 1, +}; + +enum { + OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1, +}; + +enum { OPAL_P7IOC_NUM_PEST_REGS = 128, }; +struct OpalIoPhbErrorCommon { + uint32_t version; + uint32_t ioType; + uint32_t len; +}; + struct OpalIoP7IOCPhbErrorData { + struct OpalIoPhbErrorCommon common; + uint32_t brdgCtl; // P7IOC utl regs @@ -530,14 +616,21 @@ int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number, uint64_t pci_mem_size); int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); -int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, uint64_t diag_buffer_len); -int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer, uint64_t diag_buffer_len); +int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, + uint64_t diag_buffer_len); +int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer, + uint64_t diag_buffer_len); +int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer, + uint64_t diag_buffer_len); int64_t opal_pci_fence_phb(uint64_t phb_id); int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope); int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action); int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); int64_t opal_get_epow_status(uint64_t *status); int64_t opal_set_system_attention_led(uint8_t led_action); +int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, + uint16_t *pci_error_type, uint16_t *severity); +int64_t opal_pci_poll(uint64_t phb_id); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); @@ -551,6 +644,11 @@ extern void hvc_opal_init_early(void); extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); +extern int opal_notifier_register(struct notifier_block *nb); +extern void opal_notifier_enable(void); +extern void opal_notifier_disable(void); +extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); + extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f265049dd7d6..2dd7bfc459be 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -60,6 +60,7 @@ struct power_pmu { #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ #define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ +#define PPMU_EBB 0x00000100 /* supports event based branch */ /* * Values for flags to get_alternatives() @@ -68,6 +69,11 @@ struct power_pmu { #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ +/* + * We use the event config bit 63 as a flag to request EBB. + */ +#define EVENT_CONFIG_EBB_SHIFT 63 + extern int register_power_pmu(struct power_pmu *); struct pt_regs; diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index b66ae722a8e9..f65e27b09bd3 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -221,17 +221,17 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE), + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL|__GFP_REPEAT); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd); + kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); } #define __pmd_free_tlb(tlb, pmd, addr) \ - pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE) + pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) #ifndef CONFIG_PPC_64K_PAGES #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index 45142d640720..a56b82fb0609 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -33,7 +33,8 @@ #define PGDIR_MASK (~(PGDIR_SIZE-1)) /* Bits to mask out from a PMD to get to the PTE page */ -#define PMD_MASKED_BITS 0x1ff +/* PMDs point to PTE table fragments which are 4K aligned. */ +#define PMD_MASKED_BITS 0xfff /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index e3d55f6f24fe..46db09414a10 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -10,6 +10,7 @@ #else #include <asm/pgtable-ppc64-4k.h> #endif +#include <asm/barrier.h> #define FIRST_USER_ADDRESS 0 @@ -20,7 +21,11 @@ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) - +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) +#else +#define PMD_CACHE_INDEX PMD_INDEX_SIZE +#endif /* * Define the address range of the kernel non-linear virtual area */ @@ -150,7 +155,7 @@ #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) -#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) +extern struct page *pmd_page(pmd_t pmd); #define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) #define pud_none(pud) (!pud_val(pud)) @@ -339,43 +344,217 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); +#endif /* __ASSEMBLY__ */ + +/* + * THP pages can't be special. So use the _PAGE_SPECIAL + */ +#define _PAGE_SPLITTING _PAGE_SPECIAL + +/* + * We need to differentiate between explicit huge page and THP huge + * page, since THP huge page also need to track real subpage details + */ +#define _PAGE_THP_HUGE _PAGE_4K_PFN /* - * find_linux_pte returns the address of a linux pte for a given - * effective address and directory. If not found, it returns zero. + * set of bits not changed in pmd_modify. */ -static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) +#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ + _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ + _PAGE_THP_HUGE) + +#ifndef __ASSEMBLY__ +/* + * The linux hugepage PMD now include the pmd entries followed by the address + * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. + * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per + * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and + * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. + * + * The last three bits are intentionally left to zero. This memory location + * are also used as normal page PTE pointers. So if we have any pointers + * left around while we collapse a hugepage, we need to make sure + * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them + */ +static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) { - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - pte_t *pt = NULL; - - pg = pgdir + pgd_index(ea); - if (!pgd_none(*pg)) { - pu = pud_offset(pg, ea); - if (!pud_none(*pu)) { - pm = pmd_offset(pu, ea); - if (pmd_present(*pm)) - pt = pte_offset_kernel(pm, ea); - } - } - return pt; + return (hpte_slot_array[index] >> 3) & 0x1; } -#ifdef CONFIG_HUGETLB_PAGE -pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - unsigned *shift); -#else -static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - unsigned *shift) +static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, + int index) { - if (shift) - *shift = 0; - return find_linux_pte(pgdir, ea); + return hpte_slot_array[index] >> 4; } -#endif /* !CONFIG_HUGETLB_PAGE */ -#endif /* __ASSEMBLY__ */ +static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, + unsigned int index, unsigned int hidx) +{ + hpte_slot_array[index] = hidx << 4 | 0x1 << 3; +} +static inline char *get_hpte_slot_array(pmd_t *pmdp) +{ + /* + * The hpte hindex is stored in the pgtable whose address is in the + * second half of the PMD + * + * Order this load with the test for pmd_trans_huge in the caller + */ + smp_rmb(); + return *(char **)(pmdp + PTRS_PER_PMD); + + +} + +extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); +extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); +extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); + +static inline int pmd_trans_huge(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); +} + +static inline int pmd_large(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + if (pmd_trans_huge(pmd)) + return pmd_val(pmd) & _PAGE_PRESENT; + return 0; +} + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + if (pmd_trans_huge(pmd)) + return pmd_val(pmd) & _PAGE_SPLITTING; + return 0; +} + +extern int has_transparent_hugepage(void); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +static inline pte_t *pmdp_ptep(pmd_t *pmd) +{ + return (pte_t *)pmd; +} + +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + /* Do nothing, mk_pmd() does this part. */ + return pmd; +} + +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + pmd_val(pmd) &= ~_PAGE_PRESENT; + return pmd; +} + +static inline pmd_t pmd_mksplitting(pmd_t pmd) +{ + pmd_val(pmd) |= _PAGE_SPLITTING; + return pmd; +} + +#define __HAVE_ARCH_PMD_SAME +static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) +{ + return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); +} + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +extern unsigned long pmd_hugepage_update(struct mm_struct *mm, + unsigned long addr, + pmd_t *pmdp, unsigned long clr); + +static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + unsigned long old; + + if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + return 0; + old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED); + return ((old & _PAGE_ACCESSED) != 0); +} + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +extern pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_CLEAR_FLUSH +extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + + if ((pmd_val(*pmdp) & _PAGE_RW) == 0) + return; + + pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW); +} + +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_INVALIDATE +extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); +#endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b6293d26bd39..7d6eacf249cf 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -217,6 +217,12 @@ extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr); +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_large(pmd) 0 +#define has_transparent_hugepage() 0 +#endif +pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, + unsigned *shift); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 5f1e15b68704..3421637cfd7b 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -38,5 +38,30 @@ typedef u32 ppc_opcode_t; #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_SINGLESTEP (MSR_DE) +#else +#define MSR_SINGLESTEP (MSR_SE) +#endif + +/* Enable single stepping for the current task */ +static inline void enable_single_step(struct pt_regs *regs) +{ + regs->msr |= MSR_SINGLESTEP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * We turn off Critical Input Exception(CE) to ensure that the single + * step will be for the instruction we have the probe on; if we don't, + * it is possible we'd get the single step reported for CE. + */ + regs->msr &= ~MSR_CE; + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#ifdef CONFIG_PPC_47x + isync(); +#endif +#endif +} + + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PROBES_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 14a658363698..47a35b08b963 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -168,10 +168,10 @@ struct thread_struct { * The following help to manage the use of Debug Control Registers * om the BookE platforms. */ - unsigned long dbcr0; - unsigned long dbcr1; + uint32_t dbcr0; + uint32_t dbcr1; #ifdef CONFIG_BOOKE - unsigned long dbcr2; + uint32_t dbcr2; #endif /* * The stored value of the DBSR register will be the value at the @@ -179,7 +179,7 @@ struct thread_struct { * user (will never be written to) and has value while helping to * describe the reason for the last debug trap. Torez */ - unsigned long dbsr; + uint32_t dbsr; /* * The following will contain addresses used by debug applications * to help trace and trap on particular address locations. @@ -200,7 +200,7 @@ struct thread_struct { #endif #endif /* FP and VSX 0-31 register set */ - double fpr[32][TS_FPRWIDTH]; + double fpr[32][TS_FPRWIDTH] __attribute__((aligned(16))); struct { unsigned int pad; @@ -287,9 +287,9 @@ struct thread_struct { unsigned long siar; unsigned long sdar; unsigned long sier; - unsigned long mmcr0; unsigned long mmcr2; - unsigned long mmcra; + unsigned mmcr0; + unsigned used_ebb; #endif }; @@ -404,9 +404,7 @@ static inline void prefetchw(const void *x) #define spin_lock_prefetch(x) prefetchw(x) -#ifdef CONFIG_PPC64 #define HAVE_ARCH_PICK_MMAP_LAYOUT -#endif #ifdef CONFIG_PPC64 static inline unsigned long get_clean_sp(unsigned long sp, int is_32) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 4a9e408644fe..5d7d9c2a5473 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -621,11 +621,15 @@ #define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ #define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ #define MMCR0_TBEE 0x00400000UL /* time base exception enable */ +#define MMCR0_EBE 0x00100000UL /* Event based branch enable */ +#define MMCR0_PMCC 0x000c0000UL /* PMC control */ +#define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ #define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ #define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ @@ -673,6 +677,11 @@ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +/* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */ +#define MMCR0_USER_MASK (MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO) +#define MMCR2_USER_MASK 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */ +#define SIER_USER_MASK 0x7fffffUL + #define SPRN_PA6T_MMCR0 795 #define PA6T_MMCR0_EN0 0x0000000000000001UL #define PA6T_MMCR0_EN1 0x0000000000000002UL diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 34fd70488d83..c7a8bfc9f6f5 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -350,8 +350,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg) (devfn << 8) | (reg & 0xff); } -extern void __cpuinit rtas_give_timebase(void); -extern void __cpuinit rtas_take_timebase(void); +extern void rtas_give_timebase(void); +extern void rtas_take_timebase(void); #ifdef CONFIG_PPC_RTAS static inline int page_is_rtas_user_buf(unsigned long pfn) diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 200d763a0a67..49a13e0ef234 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -67,4 +67,18 @@ static inline void flush_spe_to_thread(struct task_struct *t) } #endif +static inline void clear_task_ebb(struct task_struct *t) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + /* EBB perf events are not inherited, so clear all EBB state. */ + t->thread.bescr = 0; + t->thread.mmcr2 = 0; + t->thread.mmcr0 = 0; + t->thread.siar = 0; + t->thread.sdar = 0; + t->thread.sier = 0; + t->thread.used_ebb = 0; +#endif +} + #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 61a59271665b..2def01ed0cb2 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -165,7 +165,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, /* Private function for use by PCI IO mapping code */ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, unsigned long end); - +extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr); #else #error Unsupported MMU type #endif diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 50f261bc3e95..0d9cecddf8a4 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -22,7 +22,7 @@ extern unsigned long vdso64_rt_sigtramp; extern unsigned long vdso32_sigtramp; extern unsigned long vdso32_rt_sigtramp; -int __cpuinit vdso_getcpu_init(void); +int vdso_getcpu_init(void); #else /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a36daf3c6f9a..405fb09bda94 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -81,4 +81,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f960a7944553..a8619bfe879e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -58,6 +58,8 @@ obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ + eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_FA_DUMP) += fadump.o @@ -100,7 +102,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o -pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o +pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6f16ffafa6f0..c7e8afc2ead0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -105,9 +105,6 @@ int main(void) DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); #else /* CONFIG_PPC64 */ DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); -#endif #ifdef CONFIG_SPE DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); @@ -115,6 +112,9 @@ int main(void) DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); +#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif @@ -132,7 +132,6 @@ int main(void) DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); - DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra)); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 92c6b008dd2b..9262cf2bec4b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -131,7 +131,8 @@ static const char *cache_type_string(const struct cache *cache) return cache_type_info[cache->type].name; } -static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode) +static void cache_init(struct cache *cache, int type, int level, + struct device_node *ofnode) { cache->type = type; cache->level = level; @@ -140,7 +141,7 @@ static void __cpuinit cache_init(struct cache *cache, int type, int level, struc list_add(&cache->list, &cache_list); } -static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode) +static struct cache *new_cache(int type, int level, struct device_node *ofnode) { struct cache *cache; @@ -324,7 +325,8 @@ static bool cache_node_is_unified(const struct device_node *np) return of_get_property(np, "cache-unified", NULL); } -static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level) +static struct cache *cache_do_one_devnode_unified(struct device_node *node, + int level) { struct cache *cache; @@ -335,7 +337,8 @@ static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node * return cache; } -static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level) +static struct cache *cache_do_one_devnode_split(struct device_node *node, + int level) { struct cache *dcache, *icache; @@ -357,7 +360,7 @@ err: return NULL; } -static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level) +static struct cache *cache_do_one_devnode(struct device_node *node, int level) { struct cache *cache; @@ -369,7 +372,8 @@ static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, in return cache; } -static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level) +static struct cache *cache_lookup_or_instantiate(struct device_node *node, + int level) { struct cache *cache; @@ -385,7 +389,7 @@ static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *n return cache; } -static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger) +static void link_cache_lists(struct cache *smaller, struct cache *bigger) { while (smaller->next_local) { if (smaller->next_local == bigger) @@ -396,13 +400,13 @@ static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigg smaller->next_local = bigger; } -static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache) +static void do_subsidiary_caches_debugcheck(struct cache *cache) { WARN_ON_ONCE(cache->level != 1); WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); } -static void __cpuinit do_subsidiary_caches(struct cache *cache) +static void do_subsidiary_caches(struct cache *cache) { struct device_node *subcache_node; int level = cache->level; @@ -423,7 +427,7 @@ static void __cpuinit do_subsidiary_caches(struct cache *cache) } } -static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id) +static struct cache *cache_chain_instantiate(unsigned int cpu_id) { struct device_node *cpu_node; struct cache *cpu_cache = NULL; @@ -448,7 +452,7 @@ out: return cpu_cache; } -static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) +static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id) { struct cache_dir *cache_dir; struct device *dev; @@ -653,7 +657,7 @@ static struct kobj_type cache_index_type = { .default_attrs = cache_index_default_attrs, }; -static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) +static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) { const char *cache_name; const char *cache_type; @@ -696,7 +700,8 @@ static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *d kfree(buf); } -static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir) +static void cacheinfo_create_index_dir(struct cache *cache, int index, + struct cache_dir *cache_dir) { struct cache_index_dir *index_dir; int rc; @@ -722,7 +727,8 @@ err: kfree(index_dir); } -static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list) +static void cacheinfo_sysfs_populate(unsigned int cpu_id, + struct cache *cache_list) { struct cache_dir *cache_dir; struct cache *cache; @@ -740,7 +746,7 @@ static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache } } -void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id) +void cacheinfo_cpu_online(unsigned int cpu_id) { struct cache *cache; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9ec3fe174cba..779a78c26435 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_NONSTATIC_KERNEL */ -static int __init parse_savemaxmem(char *p) -{ - if (p) - saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; - - return 1; -} -__setup("savemaxmem=", parse_savemaxmem); - - static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, unsigned long offset, int userbuf) { diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/kernel/eeh.c index 6b73d6c44f51..39954fe941b8 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -103,11 +103,8 @@ EXPORT_SYMBOL(eeh_subsystem_enabled); */ int eeh_probe_mode; -/* Global EEH mutex */ -DEFINE_MUTEX(eeh_mutex); - /* Lock to avoid races due to multiple reports of an error */ -static DEFINE_RAW_SPINLOCK(confirm_error_lock); +DEFINE_RAW_SPINLOCK(confirm_error_lock); /* Buffer for reporting pci register dumps. Its here in BSS, and * not dynamically alloced, so that it ends up in RMO where RTAS @@ -235,16 +232,30 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; struct eeh_dev *edev; + bool valid_cfg_log = true; - eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); - - pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf, - EEH_PCI_REGS_LOG_LEN); - } + /* + * When the PHB is fenced or dead, it's pointless to collect + * the data from PCI config space because it should return + * 0xFF's. For ER, we still retrieve the data from the PCI + * config space. + */ + if (eeh_probe_mode_dev() && + (pe->type & EEH_PE_PHB) && + (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) + valid_cfg_log = false; + + if (valid_cfg_log) { + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); + + pci_regs_buf[0] = 0; + eeh_pe_for_each_dev(pe, edev) { + loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, + EEH_PCI_REGS_LOG_LEN - loglen); + } + } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); } @@ -260,15 +271,74 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) { pte_t *ptep; unsigned long pa; + int hugepage_shift; - ptep = find_linux_pte(init_mm.pgd, token); + /* + * We won't find hugepages here, iomem + */ + ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); if (!ptep) return token; + WARN_ON(hugepage_shift); pa = pte_pfn(*ptep) << PAGE_SHIFT; return pa | (token & (PAGE_SIZE-1)); } +/* + * On PowerNV platform, we might already have fenced PHB there. + * For that case, it's meaningless to recover frozen PE. Intead, + * We have to handle fenced PHB firstly. + */ +static int eeh_phb_check_failure(struct eeh_pe *pe) +{ + struct eeh_pe *phb_pe; + unsigned long flags; + int ret; + + if (!eeh_probe_mode_dev()) + return -EPERM; + + /* Find the PHB PE */ + phb_pe = eeh_phb_pe_get(pe->phb); + if (!phb_pe) { + pr_warning("%s Can't find PE for PHB#%d\n", + __func__, pe->phb->global_number); + return -EEXIST; + } + + /* If the PHB has been in problematic state */ + eeh_serialize_lock(&flags); + if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + ret = 0; + goto out; + } + + /* Check PHB state */ + ret = eeh_ops->get_state(phb_pe, NULL); + if ((ret < 0) || + (ret == EEH_STATE_NOT_SUPPORT) || + (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + ret = 0; + goto out; + } + + /* Isolate the PHB and send event */ + eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + eeh_serialize_unlock(flags); + eeh_send_failure_event(phb_pe); + + pr_err("EEH: PHB#%x failure detected\n", + phb_pe->phb->global_number); + dump_stack(); + + return 1; +out: + eeh_serialize_unlock(flags); + return ret; +} + /** * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze * @edev: eeh device @@ -319,13 +389,21 @@ int eeh_dev_check_failure(struct eeh_dev *edev) return 0; } + /* + * On PowerNV platform, we might already have fenced PHB + * there and we need take care of that firstly. + */ + ret = eeh_phb_check_failure(pe); + if (ret > 0) + return ret; + /* If we already have a pending isolation event for this * slot, we know it's bad already, we don't need to check. * Do this checking under a lock; as multiple PCI devices * in one slot might report errors simultaneously, and we * only want one error recovery routine running. */ - raw_spin_lock_irqsave(&confirm_error_lock, flags); + eeh_serialize_lock(&flags); rc = 1; if (pe->state & EEH_PE_ISOLATED) { pe->check_count++; @@ -368,13 +446,13 @@ int eeh_dev_check_failure(struct eeh_dev *edev) } eeh_stats.slot_resets++; - + /* Avoid repeated reports of this failure, including problems * with other functions on this device, and functions under * bridges. */ eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - raw_spin_unlock_irqrestore(&confirm_error_lock, flags); + eeh_serialize_unlock(flags); eeh_send_failure_event(pe); @@ -382,11 +460,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - WARN(1, "EEH: failure detected\n"); + pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", + pe->addr, pe->phb->global_number); + dump_stack(); + return 1; dn_unlock: - raw_spin_unlock_irqrestore(&confirm_error_lock, flags); + eeh_serialize_unlock(flags); return rc; } @@ -525,7 +606,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) * or a fundamental reset (3). * A fundamental reset required by any device under * Partitionable Endpoint trumps hot-reset. - */ + */ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) @@ -538,8 +619,8 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) */ #define PCI_BUS_RST_HOLD_TIME_MSEC 250 msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - - /* We might get hit with another EEH freeze as soon as the + + /* We might get hit with another EEH freeze as soon as the * pci slot reset line is dropped. Make sure we don't miss * these, and clear the flag now. */ @@ -565,6 +646,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) */ int eeh_reset_pe(struct eeh_pe *pe) { + int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int i, rc; /* Take three shots at resetting the bus */ @@ -572,7 +654,7 @@ int eeh_reset_pe(struct eeh_pe *pe) eeh_reset_pe_once(pe); rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) + if ((rc & flags) == flags) return 0; if (rc < 0) { @@ -604,7 +686,7 @@ void eeh_save_bars(struct eeh_dev *edev) if (!edev) return; dn = eeh_dev_to_of_node(edev); - + for (i = 0; i < 16; i++) eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); } @@ -674,11 +756,21 @@ int __exit eeh_ops_unregister(const char *name) * Even if force-off is set, the EEH hardware is still enabled, so that * newer systems can boot. */ -static int __init eeh_init(void) +int eeh_init(void) { struct pci_controller *hose, *tmp; struct device_node *phb; - int ret; + static int cnt = 0; + int ret = 0; + + /* + * We have to delay the initialization on PowerNV after + * the PCI hierarchy tree has been built because the PEs + * are figured out based on PCI devices instead of device + * tree nodes + */ + if (machine_is(powernv) && cnt++ <= 0) + return ret; /* call platform initialization function */ if (!eeh_ops) { @@ -691,7 +783,10 @@ static int __init eeh_init(void) return ret; } - raw_spin_lock_init(&confirm_error_lock); + /* Initialize EEH event */ + ret = eeh_event_init(); + if (ret) + return ret; /* Enable EEH for all adapters */ if (eeh_probe_mode_devtree()) { @@ -700,6 +795,25 @@ static int __init eeh_init(void) phb = hose->dn; traverse_pci_devices(phb, eeh_ops->of_probe, NULL); } + } else if (eeh_probe_mode_dev()) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) + pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); + } else { + pr_warning("%s: Invalid probe mode %d\n", + __func__, eeh_probe_mode); + return -EINVAL; + } + + /* + * Call platform post-initialization. Actually, It's good chance + * to inform platform that EEH is ready to supply service if the + * I/O cache stuff has been built up. + */ + if (eeh_ops->post_init) { + ret = eeh_ops->post_init(); + if (ret) + return ret; } if (eeh_subsystem_enabled) @@ -728,6 +842,14 @@ static void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; + /* + * If we're doing EEH probe based on PCI device, we + * would delay the probe until late stage because + * the PCI device isn't available this moment. + */ + if (!eeh_probe_mode_devtree()) + return; + if (!of_node_to_eeh_dev(dn)) return; phb = of_node_to_eeh_dev(dn)->phb; @@ -736,7 +858,6 @@ static void eeh_add_device_early(struct device_node *dn) if (NULL == phb || 0 == phb->buid) return; - /* FIXME: hotplug support on POWERNV */ eeh_ops->of_probe(dn, NULL); } @@ -787,6 +908,13 @@ static void eeh_add_device_late(struct pci_dev *dev) edev->pdev = dev; dev->dev.archdata.edev = edev; + /* + * We have to do the EEH probe here because the PCI device + * hasn't been created yet in the early stage. + */ + if (eeh_probe_mode_dev()) + eeh_ops->dev_probe(dev, NULL); + eeh_addr_cache_insert_dev(dev); } @@ -803,12 +931,12 @@ void eeh_add_device_tree_late(struct pci_bus *bus) struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); - } + eeh_add_device_late(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + struct pci_bus *subbus = dev->subordinate; + if (subbus) + eeh_add_device_tree_late(subbus); + } } } EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 5ce3ba7ad137..f9ac1232a746 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -194,7 +194,7 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) } /* Skip any devices for which EEH is not enabled. */ - if (!edev->pe) { + if (!eeh_probe_mode_dev() && !edev->pe) { #ifdef DEBUG pr_info("PCI: skip building address cache for=%s - %s\n", pci_name(dev), dn->full_name); @@ -285,7 +285,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev) * Must be run late in boot process, after the pci controllers * have been scanned for devices (after all device resources are known). */ -void __init eeh_addr_cache_build(void) +void eeh_addr_cache_build(void) { struct device_node *dn; struct eeh_dev *edev; @@ -316,4 +316,3 @@ void __init eeh_addr_cache_build(void) eeh_addr_cache_print(&pci_io_addr_cache_root); #endif } - diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 1efa28f5fc54..1efa28f5fc54 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a3fefb61097c..2b1ce17cae50 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -154,9 +154,9 @@ static void eeh_enable_irq(struct pci_dev *dev) * eeh_report_error - Report pci error to each device driver * @data: eeh device * @userdata: return value - * - * Report an EEH error to each device driver, collect up and - * merge the device driver responses. Cumulative response + * + * Report an EEH error to each device driver, collect up and + * merge the device driver responses. Cumulative response * passed back in "userdata". */ static void *eeh_report_error(void *data, void *userdata) @@ -349,10 +349,12 @@ static void *eeh_report_failure(void *data, void *userdata) */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) { + struct timeval tstamp; int cnt, rc; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; + tstamp = pe->tstamp; /* * We don't remove the corresponding PE instances because @@ -376,15 +378,17 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_restore_bars(pe); /* Give the system 5 seconds to finish running the user-space - * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, - * this is a hack, but if we don't do this, and try to bring - * the device up before the scripts have taken it down, + * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, + * this is a hack, but if we don't do this, and try to bring + * the device up before the scripts have taken it down, * potentially weird things happen. */ if (bus) { ssleep(5); pcibios_add_pci_devices(bus); } + + pe->tstamp = tstamp; pe->freeze_count = cnt; return 0; @@ -395,24 +399,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ #define MAX_WAIT_FOR_RECOVERY 150 -/** - * eeh_handle_event - Reset a PCI device after hard lockup. - * @pe: EEH PE - * - * While PHB detects address or data parity errors on particular PCI - * slot, the associated PE will be frozen. Besides, DMA's occurring - * to wild addresses (which usually happen due to bugs in device - * drivers or in PCI adapter firmware) can cause EEH error. #SERR, - * #PERR or other misc PCI-related errors also can trigger EEH errors. - * - * Recovery process consists of unplugging the device driver (which - * generated hotplug events to userspace), then issuing a PCI #RST to - * the device, then reconfiguring the PCI config space for all bridges - * & devices under this slot, and then finally restarting the device - * drivers (which cause a second set of hotplug events to go out to - * userspace). - */ -void eeh_handle_event(struct eeh_pe *pe) +static void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; int rc = 0; @@ -425,6 +412,7 @@ void eeh_handle_event(struct eeh_pe *pe) return; } + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; @@ -437,6 +425,7 @@ void eeh_handle_event(struct eeh_pe *pe) * status ... if any child can't handle the reset, then the entire * slot is dlpar removed and added. */ + pr_info("EEH: Notify device drivers to shutdown\n"); eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, @@ -444,7 +433,7 @@ void eeh_handle_event(struct eeh_pe *pe) */ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - printk(KERN_WARNING "EEH: Permanent failure\n"); + pr_warning("EEH: Permanent failure\n"); goto hard_fail; } @@ -452,6 +441,7 @@ void eeh_handle_event(struct eeh_pe *pe) * don't post the error log until after all dev drivers * have been informed. */ + pr_info("EEH: Collect temporary log\n"); eeh_slot_error_detail(pe, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut @@ -459,15 +449,18 @@ void eeh_handle_event(struct eeh_pe *pe) * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { + pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, frozen_bus); if (rc) { - printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); + pr_warning("%s: Unable to reset, err=%d\n", + __func__, rc); goto hard_fail; } } /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { + pr_info("EEH: Enable I/O for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); if (rc < 0) @@ -475,6 +468,7 @@ void eeh_handle_event(struct eeh_pe *pe) if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { + pr_info("EEH: Notify device drivers to resume I/O\n"); result = PCI_ERS_RESULT_NONE; eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); } @@ -482,6 +476,7 @@ void eeh_handle_event(struct eeh_pe *pe) /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { + pr_info("EEH: Enabled DMA for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); if (rc < 0) @@ -494,17 +489,22 @@ void eeh_handle_event(struct eeh_pe *pe) /* If any device has a hard failure, then shut off everything. */ if (result == PCI_ERS_RESULT_DISCONNECT) { - printk(KERN_WARNING "EEH: Device driver gave up\n"); + pr_warning("EEH: Device driver gave up\n"); goto hard_fail; } /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { + pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, NULL); if (rc) { - printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); + pr_warning("%s: Cannot reset, err=%d\n", + __func__, rc); goto hard_fail; } + + pr_info("EEH: Notify device drivers " + "the completion of reset\n"); result = PCI_ERS_RESULT_NONE; eeh_pe_dev_traverse(pe, eeh_report_reset, &result); } @@ -512,15 +512,16 @@ void eeh_handle_event(struct eeh_pe *pe) /* All devices should claim they have recovered by now. */ if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { - printk(KERN_WARNING "EEH: Not recovered\n"); + pr_warning("EEH: Not recovered\n"); goto hard_fail; } /* Tell all device drivers that they can resume operations */ + pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); return; - + excess_failures: /* * About 90% of all real-life EEH failures in the field @@ -550,3 +551,111 @@ perm_error: pcibios_remove_pci_devices(frozen_bus); } +static void eeh_handle_special_event(void) +{ + struct eeh_pe *pe, *phb_pe; + struct pci_bus *bus; + struct pci_controller *hose, *tmp; + unsigned long flags; + int rc = 0; + + /* + * The return value from next_error() has been classified as follows. + * It might be good to enumerate them. However, next_error() is only + * supported by PowerNV platform for now. So it would be fine to use + * integer directly: + * + * 4 - Dead IOC 3 - Dead PHB + * 2 - Fenced PHB 1 - Frozen PE + * 0 - No error found + * + */ + rc = eeh_ops->next_error(&pe); + if (rc <= 0) + return; + + switch (rc) { + case 4: + /* Mark all PHBs in dead state */ + eeh_serialize_lock(&flags); + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) continue; + + eeh_pe_state_mark(phb_pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + } + eeh_serialize_unlock(flags); + + /* Purge all events */ + eeh_remove_event(NULL); + break; + case 3: + case 2: + case 1: + /* Mark the PE in fenced state */ + eeh_serialize_lock(&flags); + if (rc == 3) + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + else + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_RECOVERING); + eeh_serialize_unlock(flags); + + /* Purge all events of the PHB */ + eeh_remove_event(pe); + break; + default: + pr_err("%s: Invalid value %d from next_error()\n", + __func__, rc); + return; + } + + /* + * For fenced PHB and frozen PE, it's handled as normal + * event. We have to remove the affected PHBs for dead + * PHB and IOC + */ + if (rc == 2 || rc == 1) + eeh_handle_normal_event(pe); + else { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) + continue; + + bus = eeh_pe_bus_get(phb_pe); + /* Notify all devices that they're about to go down. */ + eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + pcibios_remove_pci_devices(bus); + } + } +} + +/** + * eeh_handle_event - Reset a PCI device after hard lockup. + * @pe: EEH PE + * + * While PHB detects address or data parity errors on particular PCI + * slot, the associated PE will be frozen. Besides, DMA's occurring + * to wild addresses (which usually happen due to bugs in device + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, + * #PERR or other misc PCI-related errors also can trigger EEH errors. + * + * Recovery process consists of unplugging the device driver (which + * generated hotplug events to userspace), then issuing a PCI #RST to + * the device, then reconfiguring the PCI config space for all bridges + * & devices under this slot, and then finally restarting the device + * drivers (which cause a second set of hotplug events to go out to + * userspace). + */ +void eeh_handle_event(struct eeh_pe *pe) +{ + if (pe) + eeh_handle_normal_event(pe); + else + eeh_handle_special_event(); +} diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 185bedd926df..d27c5afc90ae 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -18,11 +18,10 @@ #include <linux/delay.h> #include <linux/list.h> -#include <linux/mutex.h> #include <linux/sched.h> +#include <linux/semaphore.h> #include <linux/pci.h> #include <linux/slab.h> -#include <linux/workqueue.h> #include <linux/kthread.h> #include <asm/eeh_event.h> #include <asm/ppc-pci.h> @@ -35,14 +34,9 @@ * work-queue, where a worker thread can drive recovery. */ -/* EEH event workqueue setup. */ static DEFINE_SPINLOCK(eeh_eventlist_lock); +static struct semaphore eeh_eventlist_sem; LIST_HEAD(eeh_eventlist); -static void eeh_thread_launcher(struct work_struct *); -DECLARE_WORK(eeh_event_wq, eeh_thread_launcher); - -/* Serialize reset sequences for a given pci device */ -DEFINE_MUTEX(eeh_event_mutex); /** * eeh_event_handler - Dispatch EEH events. @@ -60,55 +54,63 @@ static int eeh_event_handler(void * dummy) struct eeh_event *event; struct eeh_pe *pe; - spin_lock_irqsave(&eeh_eventlist_lock, flags); - event = NULL; - - /* Unqueue the event, get ready to process. */ - if (!list_empty(&eeh_eventlist)) { - event = list_entry(eeh_eventlist.next, struct eeh_event, list); - list_del(&event->list); - } - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - - if (event == NULL) - return 0; - - /* Serialize processing of EEH events */ - mutex_lock(&eeh_event_mutex); - pe = event->pe; - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); - pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", - pe->phb->global_number, pe->addr); - - set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ - eeh_handle_event(pe); - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); - - kfree(event); - mutex_unlock(&eeh_event_mutex); - - /* If there are no new errors after an hour, clear the counter. */ - if (pe && pe->freeze_count > 0) { - msleep_interruptible(3600*1000); - if (pe->freeze_count > 0) - pe->freeze_count--; - + while (!kthread_should_stop()) { + if (down_interruptible(&eeh_eventlist_sem)) + break; + + /* Fetch EEH event from the queue */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, + struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (!event) + continue; + + /* We might have event without binding PE */ + pe = event->pe; + if (pe) { + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); + eeh_handle_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + } else { + eeh_handle_event(NULL); + } + + kfree(event); } return 0; } /** - * eeh_thread_launcher - Start kernel thread to handle EEH events - * @dummy - unused + * eeh_event_init - Start kernel thread to handle EEH events * * This routine is called to start the kernel thread for processing * EEH event. */ -static void eeh_thread_launcher(struct work_struct *dummy) +int eeh_event_init(void) { - if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) - printk(KERN_ERR "Failed to start EEH daemon\n"); + struct task_struct *t; + int ret = 0; + + /* Initialize semaphore */ + sema_init(&eeh_eventlist_sem, 0); + + t = kthread_run(eeh_event_handler, NULL, "eehd"); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + pr_err("%s: Failed to start EEH daemon (%d)\n", + __func__, ret); + return ret; + } + + return 0; } /** @@ -136,7 +138,45 @@ int eeh_send_failure_event(struct eeh_pe *pe) list_add(&event->list, &eeh_eventlist); spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - schedule_work(&eeh_event_wq); + /* For EEH deamon to knick in */ + up(&eeh_eventlist_sem); return 0; } + +/** + * eeh_remove_event - Remove EEH event from the queue + * @pe: Event binding to the PE + * + * On PowerNV platform, we might have subsequent coming events + * is part of the former one. For that case, those subsequent + * coming events are totally duplicated and unnecessary, thus + * they should be removed. + */ +void eeh_remove_event(struct eeh_pe *pe) +{ + unsigned long flags; + struct eeh_event *event, *tmp; + + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { + /* + * If we don't have valid PE passed in, that means + * we already have event corresponding to dead IOC + * and all events should be purged. + */ + if (!pe) { + list_del(&event->list); + kfree(event); + } else if (pe->type & EEH_PE_PHB) { + if (event->pe && event->pe->phb == pe->phb) { + list_del(&event->list); + kfree(event); + } + } else if (event->pe == pe) { + list_del(&event->list); + kfree(event); + } + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); +} diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 9d4a9e8562b2..016588a6f5ed 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -22,6 +22,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/delay.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/init.h> @@ -78,9 +79,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) } /* Put it into the list */ - eeh_lock(); list_add_tail(&pe->child, &eeh_phb_pe); - eeh_unlock(); pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); @@ -95,7 +94,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) * hierarchy tree is composed of PHB PEs. The function is used * to retrieve the corresponding PHB PE according to the given PHB. */ -static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) +struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) { struct eeh_pe *pe; @@ -185,21 +184,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, return NULL; } - eeh_lock(); - /* Traverse root PE */ for (pe = root; pe; pe = eeh_pe_next(pe, root)) { eeh_pe_for_each_dev(pe, edev) { ret = fn(edev, flag); - if (ret) { - eeh_unlock(); + if (ret) return ret; - } } } - eeh_unlock(); - return NULL; } @@ -228,7 +221,7 @@ static void *__eeh_pe_get(void *data, void *flag) return pe; /* Try BDF address */ - if (edev->pe_config_addr && + if (edev->config_addr && (edev->config_addr == pe->config_addr)) return pe; @@ -246,7 +239,7 @@ static void *__eeh_pe_get(void *data, void *flag) * which is composed of PCI bus/device/function number, or unified * PE address. */ -static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) +struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) { struct eeh_pe *root = eeh_phb_pe_get(edev->phb); struct eeh_pe *pe; @@ -305,8 +298,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent; - eeh_lock(); - /* * Search the PE has been existing or not according * to the PE address. If that has been existing, the @@ -316,7 +307,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pe = eeh_pe_get(edev); if (pe && !(pe->type & EEH_PE_INVALID)) { if (!edev->pe_config_addr) { - eeh_unlock(); pr_err("%s: PE with addr 0x%x already exists\n", __func__, edev->config_addr); return -EEXIST; @@ -328,7 +318,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Put the edev to PE */ list_add_tail(&edev->list, &pe->edevs); - eeh_unlock(); pr_debug("EEH: Add %s to Bus PE#%x\n", edev->dn->full_name, pe->addr); @@ -347,7 +336,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) parent->type &= ~EEH_PE_INVALID; parent = parent->parent; } - eeh_unlock(); pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", edev->dn->full_name, pe->addr, pe->parent->addr); @@ -357,7 +345,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); if (!pe) { - eeh_unlock(); pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } @@ -365,6 +352,17 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pe->config_addr = edev->config_addr; /* + * While doing PE reset, we probably hot-reset the + * upstream bridge. However, the PCI devices including + * the associated EEH devices might be removed when EEH + * core is doing recovery. So that won't safe to retrieve + * the bridge through downstream EEH device. We have to + * trace the parent PCI bus, then the upstream bridge. + */ + if (eeh_probe_mode_dev()) + pe->bus = eeh_dev_to_pci_dev(edev)->bus; + + /* * Put the new EEH PE into hierarchy tree. If the parent * can't be found, the newly created PE will be attached * to PHB directly. Otherwise, we have to associate the @@ -374,7 +372,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) if (!parent) { parent = eeh_phb_pe_get(edev->phb); if (!parent) { - eeh_unlock(); pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", __func__, edev->phb->global_number); edev->pe = NULL; @@ -391,7 +388,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) list_add_tail(&pe->child, &parent->child_list); list_add_tail(&edev->list, &pe->edevs); edev->pe = pe; - eeh_unlock(); pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", edev->dn->full_name, pe->addr, pe->parent->addr); @@ -419,8 +415,6 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) return -EEXIST; } - eeh_lock(); - /* Remove the EEH device */ pe = edev->pe; edev->pe = NULL; @@ -465,12 +459,37 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) pe = parent; } - eeh_unlock(); - return 0; } /** + * eeh_pe_update_time_stamp - Update PE's frozen time stamp + * @pe: EEH PE + * + * We have time stamp for each PE to trace its time of getting + * frozen in last hour. The function should be called to update + * the time stamp on first error of the specific PE. On the other + * handle, we needn't account for errors happened in last hour. + */ +void eeh_pe_update_time_stamp(struct eeh_pe *pe) +{ + struct timeval tstamp; + + if (!pe) return; + + if (pe->freeze_count <= 0) { + pe->freeze_count = 0; + do_gettimeofday(&pe->tstamp); + } else { + do_gettimeofday(&tstamp); + if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) { + pe->tstamp = tstamp; + pe->freeze_count = 0; + } + } +} + +/** * __eeh_pe_state_mark - Mark the state for the PE * @data: EEH PE * @flag: state @@ -512,9 +531,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) */ void eeh_pe_state_mark(struct eeh_pe *pe, int state) { - eeh_lock(); eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); - eeh_unlock(); } /** @@ -548,35 +565,135 @@ static void *__eeh_pe_state_clear(void *data, void *flag) */ void eeh_pe_state_clear(struct eeh_pe *pe, int state) { - eeh_lock(); eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); - eeh_unlock(); } -/** - * eeh_restore_one_device_bars - Restore the Base Address Registers for one device - * @data: EEH device - * @flag: Unused +/* + * Some PCI bridges (e.g. PLX bridges) have primary/secondary + * buses assigned explicitly by firmware, and we probably have + * lost that after reset. So we have to delay the check until + * the PCI-CFG registers have been restored for the parent + * bridge. * - * Loads the PCI configuration space base address registers, - * the expansion ROM base address, the latency timer, and etc. - * from the saved values in the device node. + * Don't use normal PCI-CFG accessors, which probably has been + * blocked on normal path during the stage. So we need utilize + * eeh operations, which is always permitted. */ -static void *eeh_restore_one_device_bars(void *data, void *flag) +static void eeh_bridge_check_link(struct pci_dev *pdev, + struct device_node *dn) +{ + int cap; + uint32_t val; + int timeout = 0; + + /* + * We only check root port and downstream ports of + * PCIe switches + */ + if (!pci_is_pcie(pdev) || + (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && + pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) + return; + + pr_debug("%s: Check PCIe link for %s ...\n", + __func__, pci_name(pdev)); + + /* Check slot status */ + cap = pdev->pcie_cap; + eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val); + if (!(val & PCI_EXP_SLTSTA_PDS)) { + pr_debug(" No card in the slot (0x%04x) !\n", val); + return; + } + + /* Check power status if we have the capability */ + eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val); + if (val & PCI_EXP_SLTCAP_PCP) { + eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val); + if (val & PCI_EXP_SLTCTL_PCC) { + pr_debug(" In power-off state, power it on ...\n"); + val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); + val |= (0x0100 & PCI_EXP_SLTCTL_PIC); + eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val); + msleep(2 * 1000); + } + } + + /* Enable link */ + eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val); + val &= ~PCI_EXP_LNKCTL_LD; + eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val); + + /* Check link */ + eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val); + if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { + pr_debug(" No link reporting capability (0x%08x) \n", val); + msleep(1000); + return; + } + + /* Wait the link is up until timeout (5s) */ + timeout = 0; + while (timeout < 5000) { + msleep(20); + timeout += 20; + + eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val); + if (val & PCI_EXP_LNKSTA_DLLLA) + break; + } + + if (val & PCI_EXP_LNKSTA_DLLLA) + pr_debug(" Link up (%s)\n", + (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB"); + else + pr_debug(" Link not ready (0x%04x)\n", val); +} + +#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) +#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) + +static void eeh_restore_bridge_bars(struct pci_dev *pdev, + struct eeh_dev *edev, + struct device_node *dn) +{ + int i; + + /* + * Device BARs: 0x10 - 0x18 + * Bus numbers and windows: 0x18 - 0x30 + */ + for (i = 4; i < 13; i++) + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); + /* Rom: 0x38 */ + eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]); + + /* Cache line & Latency timer: 0xC 0xD */ + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + /* Max latency, min grant, interrupt ping and line: 0x3C */ + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); + + /* PCI Command: 0x4 */ + eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]); + + /* Check the PCIe link is ready */ + eeh_bridge_check_link(pdev, dn); +} + +static void eeh_restore_device_bars(struct eeh_dev *edev, + struct device_node *dn) { int i; u32 cmd; - struct eeh_dev *edev = (struct eeh_dev *)data; - struct device_node *dn = eeh_dev_to_of_node(edev); for (i = 4; i < 10; i++) eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); /* 12 == Expansion ROM Address */ eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); -#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) -#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) - eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, @@ -599,6 +716,34 @@ static void *eeh_restore_one_device_bars(void *data, void *flag) else cmd &= ~PCI_COMMAND_SERR; eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); +} + +/** + * eeh_restore_one_device_bars - Restore the Base Address Registers for one device + * @data: EEH device + * @flag: Unused + * + * Loads the PCI configuration space base address registers, + * the expansion ROM base address, the latency timer, and etc. + * from the saved values in the device node. + */ +static void *eeh_restore_one_device_bars(void *data, void *flag) +{ + struct pci_dev *pdev = NULL; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct device_node *dn = eeh_dev_to_of_node(edev); + + /* Trace the PCI bridge */ + if (eeh_probe_mode_dev()) { + pdev = eeh_dev_to_pci_dev(edev); + if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE) + pdev = NULL; + } + + if (pdev) + eeh_restore_bridge_bars(pdev, edev, dn); + else + eeh_restore_device_bars(edev, dn); return NULL; } @@ -635,19 +780,21 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) struct eeh_dev *edev; struct pci_dev *pdev; - eeh_lock(); - if (pe->type & EEH_PE_PHB) { bus = pe->phb->bus; } else if (pe->type & EEH_PE_BUS || pe->type & EEH_PE_DEVICE) { + if (pe->bus) { + bus = pe->bus; + goto out; + } + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); pdev = eeh_dev_to_pci_dev(edev); if (pdev) bus = pdev->bus; } - eeh_unlock(); - +out: return bus; } diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index d37708360f2e..e7ae3484918c 100644 --- a/arch/powerpc/platforms/pseries/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -72,4 +72,3 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); } - diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 8741c854e03d..ab15b8d057ad 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -629,21 +629,43 @@ _GLOBAL(ret_from_except_lite) CURRENT_THREAD_INFO(r9, r1) ld r3,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3E + ld r10,PACACURRENT(r13) +#endif /* CONFIG_PPC_BOOK3E */ ld r4,TI_FLAGS(r9) andi. r3,r3,MSR_PR beq resume_kernel +#ifdef CONFIG_PPC_BOOK3E + lwz r3,(THREAD+THREAD_DBCR0)(r10) +#endif /* CONFIG_PPC_BOOK3E */ /* Check current_thread_info()->flags */ andi. r0,r4,_TIF_USER_WORK_MASK +#ifdef CONFIG_PPC_BOOK3E + bne 1f + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + andis. r0,r3,DBCR0_IDM@h beq restore - - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f + mfmsr r0 + rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ + mtmsr r0 + mtspr SPRN_DBCR0,r3 + li r10, -1 + mtspr SPRN_DBSR,r10 + b restore +#else + beq restore +#endif +1: andi. r0,r4,_TIF_NEED_RESCHED + beq 2f bl .restore_interrupts SCHEDULE_USER b .ret_from_except_lite -1: bl .save_nvgprs +2: bl .save_nvgprs bl .restore_interrupts addi r3,r1,STACK_FRAME_OVERHEAD bl .do_notify_resume diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 40e4a17c8ba0..4e00d223b2e3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -341,10 +341,17 @@ vsx_unavailable_pSeries_1: EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_pSeries +facility_unavailable_trampoline: . = 0xf60 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_pSeries + b facility_unavailable_pSeries + +hv_facility_unavailable_trampoline: + . = 0xf80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_hv #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) @@ -522,8 +529,10 @@ denorm_done: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) - STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) + STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -793,14 +802,10 @@ system_call_relon_pSeries: STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) . = 0x4e00 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_data_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e20 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_instr_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e40 SET_SCRATCH0(r13) @@ -808,9 +813,7 @@ system_call_relon_pSeries: b emulation_assist_relon_hv . = 0x4e60 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e80 SET_SCRATCH0(r13) @@ -835,11 +838,17 @@ vsx_unavailable_relon_pSeries_1: EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_relon_pSeries -tm_unavailable_relon_pSeries_1: +facility_unavailable_relon_trampoline: . = 0x4f60 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_relon_pSeries + b facility_unavailable_relon_pSeries + +hv_facility_unavailable_relon_trampoline: + . = 0x4f80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_relon_hv STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION @@ -1165,36 +1174,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl .vsx_unavailable_exception b .ret_from_except - .align 7 - .globl tm_unavailable_common -tm_unavailable_common: - EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) - bl .save_nvgprs - DISABLE_INTS - addi r3,r1,STACK_FRAME_OVERHEAD - bl .tm_unavailable_exception - b .ret_from_except + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) .align 7 .globl __end_handlers __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) - STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) - STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) + STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a949bdfc9623..f0b47d1a6b0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((bp->attr.bp_addr >> 10) != - ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) + ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) return -EINVAL; } if (info->len > @@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) * we still need to single-step the instruction, but we don't * generate an event. */ + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; if (!((bp->attr.bp_addr <= dar) && (dar - bp->attr.bp_addr < bp->attr.bp_len))) info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 8220baa46faf..16a7c2326d48 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -205,7 +205,7 @@ static int ibmebus_create_devices(const struct of_device_id *matches) return ret; } -int ibmebus_register_driver(struct of_platform_driver *drv) +int ibmebus_register_driver(struct platform_driver *drv) { /* If the driver uses devices that ibmebus doesn't know, add them */ ibmebus_create_devices(drv->driver.of_match_table); @@ -215,7 +215,7 @@ int ibmebus_register_driver(struct of_platform_driver *drv) } EXPORT_SYMBOL(ibmebus_register_driver); -void ibmebus_unregister_driver(struct of_platform_driver *drv) +void ibmebus_unregister_driver(struct platform_driver *drv) { driver_unregister(&drv->driver); } @@ -338,11 +338,10 @@ static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) static int ibmebus_bus_device_probe(struct device *dev) { int error = -ENODEV; - struct of_platform_driver *drv; + struct platform_driver *drv; struct platform_device *of_dev; - const struct of_device_id *match; - drv = to_of_platform_driver(dev->driver); + drv = to_platform_driver(dev->driver); of_dev = to_platform_device(dev); if (!drv->probe) @@ -350,9 +349,8 @@ static int ibmebus_bus_device_probe(struct device *dev) of_dev_get(of_dev); - match = of_match_device(drv->driver.of_match_table, dev); - if (match) - error = drv->probe(of_dev, match); + if (of_driver_match_device(dev, dev->driver)) + error = drv->probe(of_dev); if (error) of_dev_put(of_dev); @@ -362,7 +360,7 @@ static int ibmebus_bus_device_probe(struct device *dev) static int ibmebus_bus_device_remove(struct device *dev) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); if (dev->driver && drv->remove) drv->remove(of_dev); @@ -372,7 +370,7 @@ static int ibmebus_bus_device_remove(struct device *dev) static void ibmebus_bus_device_shutdown(struct device *dev) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); if (dev->driver && drv->shutdown) drv->shutdown(of_dev); @@ -419,7 +417,7 @@ struct device_attribute ibmebus_bus_device_attrs[] = { static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); int ret = 0; if (dev->driver && drv->suspend) @@ -430,7 +428,7 @@ static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) static int ibmebus_bus_legacy_resume(struct device *dev) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); int ret = 0; if (dev->driver && drv->resume) diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 939ea7ef0dc8..d7216c9abda1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -85,7 +85,7 @@ int powersave_nap; /* * Register the sysctl to set/clear powersave_nap. */ -static ctl_table powersave_nap_ctl_table[]={ +static struct ctl_table powersave_nap_ctl_table[] = { { .procname = "powersave-nap", .data = &powersave_nap, @@ -95,7 +95,7 @@ static ctl_table powersave_nap_ctl_table[]={ }, {} }; -static ctl_table powersave_nap_sysctl_root[] = { +static struct ctl_table powersave_nap_sysctl_root[] = { { .procname = "kernel", .mode = 0555, diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 50e90b7e7139..fa0b54b2a362 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -55,6 +55,7 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { + unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -70,11 +71,17 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - ptep = find_linux_pte(init_mm.pgd, vaddr); + ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr, + &hugepage_shift); if (ptep == NULL) paddr = 0; - else + else { + /* + * we don't have hugepages backing iomem + */ + WARN_ON(hugepage_shift); paddr = pte_pfn(*ptep) << PAGE_SHIFT; + } bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c0d0dbddfba1..b20ff173a671 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -36,6 +36,8 @@ #include <linux/hash.h> #include <linux/fault-inject.h> #include <linux/pci.h> +#include <linux/iommu.h> +#include <linux/sched.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -44,6 +46,7 @@ #include <asm/kdump.h> #include <asm/fadump.h> #include <asm/vio.h> +#include <asm/tce.h> #define DBG(...) @@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); +#ifdef CONFIG_IOMMU_API + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } +#endif + /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); @@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } } + +#ifdef CONFIG_IOMMU_API +/* + * SPAPR TCE API + */ +static void group_release(void *iommu_data) +{ + struct iommu_table *tbl = iommu_data; + tbl->it_group = NULL; +} + +void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num) +{ + struct iommu_group *grp; + char *name; + + grp = iommu_group_alloc(); + if (IS_ERR(grp)) { + pr_warn("powerpc iommu api: cannot create new group, err=%ld\n", + PTR_ERR(grp)); + return; + } + tbl->it_group = grp; + iommu_group_set_iommudata(grp, tbl, group_release); + name = kasprintf(GFP_KERNEL, "domain%d-pe%lx", + pci_domain_number, pe_num); + if (!name) + return; + iommu_group_set_name(grp, name); + kfree(name); +} + +enum dma_data_direction iommu_tce_direction(unsigned long tce) +{ + if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE)) + return DMA_BIDIRECTIONAL; + else if (tce & TCE_PCI_READ) + return DMA_TO_DEVICE; + else if (tce & TCE_PCI_WRITE) + return DMA_FROM_DEVICE; + else + return DMA_NONE; +} +EXPORT_SYMBOL_GPL(iommu_tce_direction); + +void iommu_flush_tce(struct iommu_table *tbl) +{ + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + /* Make sure updates are seen by hardware */ + mb(); +} +EXPORT_SYMBOL_GPL(iommu_flush_tce); + +int iommu_tce_clear_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce_value, + unsigned long npages) +{ + /* ppc_md.tce_free() does not support any value but 0 */ + if (tce_value) + return -EINVAL; + + if (ioba & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ioba >>= IOMMU_PAGE_SHIFT; + if (ioba < tbl->it_offset) + return -EINVAL; + + if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); + +int iommu_tce_put_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce) +{ + if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) + return -EINVAL; + + if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) + return -EINVAL; + + if (ioba & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ioba >>= IOMMU_PAGE_SHIFT; + if (ioba < tbl->it_offset) + return -EINVAL; + + if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); + +unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long oldtce; + struct iommu_pool *pool = get_pool(tbl, entry); + + spin_lock(&(pool->lock)); + + oldtce = ppc_md.tce_get(tbl, entry); + if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) + ppc_md.tce_free(tbl, entry, 1); + else + oldtce = 0; + + spin_unlock(&(pool->lock)); + + return oldtce; +} +EXPORT_SYMBOL_GPL(iommu_clear_tce); + +int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + unsigned long oldtce; + struct page *page; + + for ( ; pages; --pages, ++entry) { + oldtce = iommu_clear_tce(tbl, entry); + if (!oldtce) + continue; + + page = pfn_to_page(oldtce >> PAGE_SHIFT); + WARN_ON(!page); + if (page) { + if (oldtce & TCE_PCI_WRITE) + SetPageDirty(page); + put_page(page); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); + +/* + * hwaddr is a kernel virtual address here (0xc... bazillion), + * tce_build converts it to a physical address. + */ +int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, + unsigned long hwaddr, enum dma_data_direction direction) +{ + int ret = -EBUSY; + unsigned long oldtce; + struct iommu_pool *pool = get_pool(tbl, entry); + + spin_lock(&(pool->lock)); + + oldtce = ppc_md.tce_get(tbl, entry); + /* Add new entry if it is not busy */ + if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) + ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL); + + spin_unlock(&(pool->lock)); + + /* if (unlikely(ret)) + pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, + hwaddr, ret); */ + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_build); + +int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, + unsigned long tce) +{ + int ret; + struct page *page = NULL; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; + enum dma_data_direction direction = iommu_tce_direction(tce); + + ret = get_user_pages_fast(tce & PAGE_MASK, 1, + direction != DMA_TO_DEVICE, &page); + if (unlikely(ret != 1)) { + /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", + tce, entry << IOMMU_PAGE_SHIFT, ret); */ + return -EFAULT; + } + hwaddr = (unsigned long) page_address(page) + offset; + + ret = iommu_tce_build(tbl, entry, hwaddr, direction); + if (ret) + put_page(page); + + if (ret < 0) + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", + __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode); + +int iommu_take_ownership(struct iommu_table *tbl) +{ + unsigned long sz = (tbl->it_size + 7) >> 3; + + if (tbl->it_offset == 0) + clear_bit(0, tbl->it_map); + + if (!bitmap_empty(tbl->it_map, tbl->it_size)) { + pr_err("iommu_tce: it_map is not empty"); + return -EBUSY; + } + + memset(tbl->it_map, 0xff, sz); + iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_take_ownership); + +void iommu_release_ownership(struct iommu_table *tbl) +{ + unsigned long sz = (tbl->it_size + 7) >> 3; + + iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); + memset(tbl->it_map, 0, sz); + + /* Restore bit#0 set by iommu_init_table() */ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); +} +EXPORT_SYMBOL_GPL(iommu_release_ownership); + +static int iommu_add_device(struct device *dev) +{ + struct iommu_table *tbl; + int ret = 0; + + if (WARN_ON(dev->iommu_group)) { + pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", + dev_name(dev), + iommu_group_id(dev->iommu_group)); + return -EBUSY; + } + + tbl = get_iommu_table_base(dev); + if (!tbl || !tbl->it_group) { + pr_debug("iommu_tce: skipping device %s with no tbl\n", + dev_name(dev)); + return 0; + } + + pr_debug("iommu_tce: adding %s to iommu group %d\n", + dev_name(dev), iommu_group_id(tbl->it_group)); + + ret = iommu_group_add_device(tbl->it_group, dev); + if (ret < 0) + pr_err("iommu_tce: %s has not been added, ret=%d\n", + dev_name(dev), ret); + + return ret; +} + +static void iommu_del_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static int iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + return iommu_add_device(dev); + case BUS_NOTIFY_DEL_DEVICE: + iommu_del_device(dev); + return 0; + default: + return 0; + } +} + +static struct notifier_block tce_iommu_bus_nb = { + .notifier_call = iommu_bus_notifier, +}; + +static int __init tce_iommu_init(void) +{ + struct pci_dev *pdev = NULL; + + BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); + + for_each_pci_dev(pdev) + iommu_add_device(&pdev->dev); + + bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); + return 0; +} + +subsys_initcall_sync(tce_iommu_init); + +#else + +void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num) +{ +} + +#endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ea185e0b3cae..2e51cde616d2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void) u64 now = get_tb_or_rtc(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - if (now >= *next_tb) - set_dec(1); return now >= *next_tb; } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 11f5b03a0b06..2156ea90eb54 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -36,12 +36,6 @@ #include <asm/sstep.h> #include <asm/uaccess.h> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -#define MSR_SINGLESTEP (MSR_DE) -#else -#define MSR_SINGLESTEP (MSR_SE) -#endif - DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - /* We turn off async exceptions to ensure that the single step will - * be for the instruction we have the kprobe on, if we dont its - * possible we'd get the single step reported for an exception handler - * like Decrementer or External Interrupt */ - regs->msr &= ~MSR_EE; - regs->msr |= MSR_SINGLESTEP; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - regs->msr &= ~MSR_CE; - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); -#ifdef CONFIG_PPC_47x - isync(); -#endif -#endif + enable_single_step(regs); /* * On powerpc we should single step on the original diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6782221d49bd..db28032e320e 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall); static __init void kvm_free_tmp(void) { - unsigned long start, end; - - start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; - end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; - - /* Free the tmp space we don't need */ - free_reserved_area(start, end, 0, NULL); + free_reserved_area(&kvm_tmp[kvm_tmp_index], + &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 48fbc2b97e95..8213ee1eb05a 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf, char *tmp = NULL; ssize_t size; - ret = -ENODEV; - if (!ppc_md.nvram_size) + if (!ppc_md.nvram_size) { + ret = -ENODEV; goto out; + } - ret = 0; size = ppc_md.nvram_size(); - if (*ppos >= size || size < 0) + if (size < 0) { + ret = size; + goto out; + } + + if (*ppos >= size) { + ret = 0; goto out; + } count = min_t(size_t, count, size - *ppos); count = min(count, PAGE_SIZE); - ret = -ENOMEM; tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) + if (!tmp) { + ret = -ENOMEM; goto out; + } ret = ppc_md.nvram_read(tmp, count, ppos); if (ret <= 0) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c new file mode 100644 index 000000000000..3f608800c06b --- /dev/null +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -0,0 +1,111 @@ +/* + * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c" + * + * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com> + * Copyright (C) 2005 International Business Machines + * + * Updates, 2005, John Rose <johnrose@austin.ibm.com> + * Updates, 2005, Linas Vepstas <linas@austin.ibm.com> + * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/pci.h> +#include <linux/export.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/firmware.h> +#include <asm/eeh.h> + +/** + * __pcibios_remove_pci_devices - remove all devices under this bus + * @bus: the indicated PCI bus + * @purge_pe: destroy the PE on removal of PCI devices + * + * Remove all of the PCI devices under this bus both from the + * linux pci device tree, and from the powerpc EEH address cache. + * By default, the corresponding PE will be destroied during the + * normal PCI hotplug path. For PCI hotplug during EEH recovery, + * the corresponding PE won't be destroied and deallocated. + */ +void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) +{ + struct pci_dev *dev, *tmp; + struct pci_bus *child_bus; + + /* First go down child busses */ + list_for_each_entry(child_bus, &bus->children, node) + __pcibios_remove_pci_devices(child_bus, purge_pe); + + pr_debug("PCI: Removing devices on bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + pr_debug(" * Removing %s...\n", pci_name(dev)); + eeh_remove_bus_device(dev, purge_pe); + pci_stop_and_remove_bus_device(dev); + } +} + +/** + * pcibios_remove_pci_devices - remove all devices under this bus + * @bus: the indicated PCI bus + * + * Remove all of the PCI devices under this bus both from the + * linux pci device tree, and from the powerpc EEH address cache. + */ +void pcibios_remove_pci_devices(struct pci_bus *bus) +{ + __pcibios_remove_pci_devices(bus, 1); +} +EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); + +/** + * pcibios_add_pci_devices - adds new pci devices to bus + * @bus: the indicated PCI bus + * + * This routine will find and fixup new pci devices under + * the indicated bus. This routine presumes that there + * might already be some devices under this bridge, so + * it carefully tries to add only new devices. (And that + * is how this routine differs from other, similar pcibios + * routines.) + */ +void pcibios_add_pci_devices(struct pci_bus * bus) +{ + int slotno, num, mode, pass, max; + struct pci_dev *dev; + struct device_node *dn = pci_bus_to_OF_node(bus); + + eeh_add_device_tree_early(dn); + + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + + if (mode == PCI_PROBE_DEVTREE) { + /* use ofdt-based probe */ + of_rescan_bus(dn, bus); + } else if (mode == PCI_PROBE_NORMAL) { + /* use legacy probe */ + slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); + num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + if (!num) + return; + pcibios_setup_bus_devices(bus); + max = bus->busn_res.start; + for (pass = 0; pass < 2; pass++) { + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + max = pci_scan_bridge(bus, dev, + max, pass); + } + } + } + pcibios_finish_adding_to_bus(bus); +} +EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 2a67e9baa59f..6b0ba5854d99 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, const char *type; struct pci_slot *slot; - dev = alloc_pci_dev(); + dev = pci_alloc_dev(bus); if (!dev) return NULL; type = of_get_property(node, "device_type", NULL); @@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); - dev->bus = bus; dev->dev.of_node = of_node_get(node); dev->dev.parent = bus->bridge; dev->dev.bus = &pci_bus_type; @@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" class: 0x%x\n", dev->class); pr_debug(" revision: 0x%x\n", dev->revision); - dev->current_state = 4; /* unknown power state */ + dev->current_state = PCI_UNKNOWN; /* unknown power state */ dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index feb8580fdc84..c30612aad68e 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -29,25 +29,9 @@ #ifdef CONFIG_PPC64 -static loff_t page_map_seek( struct file *file, loff_t off, int whence) +static loff_t page_map_seek(struct file *file, loff_t off, int whence) { - loff_t new; - switch(whence) { - case 0: - new = off; - break; - case 1: - new = file->f_pos + off; - break; - case 2: - new = PAGE_SIZE + off; - break; - default: - return -EINVAL; - } - if ( new < 0 || new > PAGE_SIZE ) - return -EINVAL; - return (file->f_pos = new); + return fixed_size_llseek(file, off, whence, PAGE_SIZE); } static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 076d1242507a..c517dbe705fd 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -916,7 +916,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) flush_altivec_to_thread(src); flush_vsx_to_thread(src); flush_spe_to_thread(src); + *dst = *src; + + clear_task_ebb(dst); + return 0; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..eb23ac92abb9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -559,6 +559,35 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start, } #endif +static void __init early_reserve_mem_dt(void) +{ + unsigned long i, len, dt_root; + const __be32 *prop; + + dt_root = of_get_flat_dt_root(); + + prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); + + if (!prop) + return; + + DBG("Found new-style reserved-ranges\n"); + + /* Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. */ + for (i = 0; i < len / (sizeof(*prop) * 4); i++) { + u64 base, size; + + base = of_read_number(prop + (i * 4) + 0, 2); + size = of_read_number(prop + (i * 4) + 2, 2); + + if (size) { + DBG("reserving: %llx -> %llx\n", base, size); + memblock_reserve(base, size); + } + } +} + static void __init early_reserve_mem(void) { u64 base, size; @@ -574,12 +603,16 @@ static void __init early_reserve_mem(void) self_size = initial_boot_params->totalsize; memblock_reserve(self_base, self_size); + /* Look for the new "reserved-regions" property in the DT */ + early_reserve_mem_dt(); + #ifdef CONFIG_BLK_DEV_INITRD - /* then reserve the initrd, if any */ - if (initrd_start && (initrd_end > initrd_start)) + /* Then reserve the initrd, if any */ + if (initrd_start && (initrd_end > initrd_start)) { memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), _ALIGN_UP(initrd_end, PAGE_SIZE) - _ALIGN_DOWN(initrd_start, PAGE_SIZE)); + } #endif /* CONFIG_BLK_DEV_INITRD */ #ifdef CONFIG_PPC32 @@ -591,6 +624,8 @@ static void __init early_reserve_mem(void) u32 base_32, size_32; u32 *reserve_map_32 = (u32 *)reserve_map; + DBG("Found old 32-bit reserve map\n"); + while (1) { base_32 = *(reserve_map_32++); size_32 = *(reserve_map_32++); @@ -605,6 +640,9 @@ static void __init early_reserve_mem(void) return; } #endif + DBG("Processing reserve map\n"); + + /* Handle the reserve map in the fdt blob if it exists */ while (1) { base = *(reserve_map++); size = *(reserve_map++); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 98c2fc198712..9a0d24c390a3 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -975,16 +975,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = 8; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(task) < 0) - return -ESRCH; - bp = thread->ptrace_bps[0]; if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } - ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -997,11 +993,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ret = modify_user_hw_breakpoint(bp, &attr); if (ret) { - ptrace_put_breakpoints(task); return ret; } thread->ptrace_bps[0] = bp; - ptrace_put_breakpoints(task); thread->hw_brk = hw_brk; return 0; } @@ -1016,12 +1010,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, NULL, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; - ptrace_put_breakpoints(task); return PTR_ERR(bp); } - ptrace_put_breakpoints(task); - #endif /* CONFIG_HAVE_HW_BREAKPOINT */ task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -1440,24 +1431,19 @@ static long ppc_set_hwdebug(struct task_struct *child, if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) brk.type |= HW_BRK_TYPE_WRITE; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - /* * Check if the request is for 'range' breakpoints. We can * support it if range < 8 bytes. */ - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) len = bp_info->addr2 - bp_info->addr; - } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { - ptrace_put_breakpoints(child); + else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else return -EINVAL; - } bp = thread->ptrace_bps[0]; - if (bp) { - ptrace_put_breakpoints(child); + if (bp) return -ENOSPC; - } /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); @@ -1469,11 +1455,9 @@ static long ppc_set_hwdebug(struct task_struct *child, ptrace_triggered, NULL, child); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; - ptrace_put_breakpoints(child); return PTR_ERR(bp); } - ptrace_put_breakpoints(child); return 1; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ @@ -1517,16 +1501,12 @@ static long ppc_del_hwdebug(struct task_struct *child, long data) return -EINVAL; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - bp = thread->ptrace_bps[0]; if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } else ret = -ENOENT; - ptrace_put_breakpoints(child); return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ if (child->thread.hw_brk.address == 0) diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S index ef46ba6e094f..f366fedb0872 100644 --- a/arch/powerpc/kernel/reloc_32.S +++ b/arch/powerpc/kernel/reloc_32.S @@ -166,7 +166,7 @@ ha16: /* R_PPC_ADDR16_LO */ lo16: cmpwi r4, R_PPC_ADDR16_LO - bne nxtrela + bne unknown_type lwz r4, 0(r9) /* r_offset */ lwz r0, 8(r9) /* r_addend */ add r0, r0, r3 @@ -191,6 +191,7 @@ nxtrela: dcbst r4,r7 sync /* Ensure the data is flushed before icbi */ icbi r4,r7 +unknown_type: cmpwi r8, 0 /* relasz = 0 ? */ ble done add r9, r9, r6 /* move to next entry in the .rela table */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 52add6f3e201..80b5ef403f68 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1172,7 +1172,7 @@ int __init early_init_dt_scan_rtas(unsigned long node, static arch_spinlock_t timebase_lock; static u64 timebase = 0; -void __cpuinit rtas_give_timebase(void) +void rtas_give_timebase(void) { unsigned long flags; @@ -1189,7 +1189,7 @@ void __cpuinit rtas_give_timebase(void) local_irq_restore(flags); } -void __cpuinit rtas_take_timebase(void) +void rtas_take_timebase(void) { while (!timebase) barrier(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e379d3fd1694..389fb8077cc9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -76,7 +76,7 @@ #endif int boot_cpuid = 0; -int __initdata spinning_secondaries; +int spinning_secondaries; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 201385c3a1ae..0f83122e6676 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, * altivec/spe instructions at some point. */ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - int sigret, int ctx_has_vsx_region) + struct mcontext __user *tm_frame, int sigret, + int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (__put_user(msr, &frame->mc_gregs[PT_MSR])) return 1; + /* We need to write 0 the MSR top 32 bits in the tm frame so that we + * can check it on the restore to see if TM is active + */ + if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { /* Set up the sigreturn trampoline: li r0,sigret; sc */ if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) @@ -747,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { long err; - unsigned long msr; + unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; #endif @@ -852,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + /* Get the top half of the MSR */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + /* Pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -952,6 +962,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; + struct mcontext __user *tm_frame = NULL; void __user *addr; unsigned long newsp = 0; int sigret; @@ -985,23 +996,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { - if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, - &rt_sf->uc_transact.uc_mcontext, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif - if (save_user_regs(regs, frame, sigret, 1)) + { + if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; + } regs->link = tramp; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) - || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), - &rt_sf->uc_transact.uc_regs)) + || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; } else @@ -1170,7 +1182,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) + || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) return -EFAULT; @@ -1233,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) goto bad; - if (MSR_TM_SUSPENDED(msr_hi<<32)) { + if (MSR_TM_ACTIVE(msr_hi<<32)) { /* We only recheckpoint on return if we're * transaction. */ @@ -1392,6 +1404,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, { struct sigcontext __user *sc; struct sigframe __user *frame; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -1425,6 +1438,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; if (MSR_TM_ACTIVE(regs->msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, sigret)) @@ -1432,8 +1446,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } else #endif - if (save_user_regs(regs, &frame->mctx, sigret, 1)) + { + if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) goto badframe; + } regs->link = tramp; @@ -1481,16 +1497,22 @@ badframe: long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { + struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; void __user *addr; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct mcontext __user *mcp, *tm_mcp; + unsigned long msr_hi; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sc = &sf->sctx; addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1507,11 +1529,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif set_current_blocked(&set); - sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mcp = (struct mcontext __user *)&sf->mctx; + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; + if (MSR_TM_ACTIVE(msr_hi<<32)) { + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + goto badframe; + } else +#endif + { + sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); + addr = sr; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + } set_thread_flag(TIF_RESTOREALL); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 345947367ec0..887e99d85bc2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ @@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this: */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; - if (MSR_TM_SUSPENDED(msr)) { + if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ee7ac5e6e28a..38b0ba65a735 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -480,7 +480,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) secondary_ti = current_set[cpu] = ti; } -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc, c; @@ -610,7 +610,7 @@ static struct device_node *cpu_to_l2cache(int cpu) } /* Activate a secondary processor. */ -__cpuinit void start_secondary(void *unused) +void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); struct device_node *l2_cache; @@ -637,12 +637,10 @@ __cpuinit void start_secondary(void *unused) vdso_getcpu_init(); #endif - notify_cpu_starting(cpu); - set_cpu_online(cpu, true); /* Update sibling maps */ base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { - if (cpu_is_offline(base + i)) + if (cpu_is_offline(base + i) && (cpu != base + i)) continue; cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); @@ -667,6 +665,10 @@ __cpuinit void start_secondary(void *unused) } of_node_put(l2_cache); + smp_wmb(); + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + local_irq_enable(); cpu_startup_entry(CPUHP_ONLINE); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e68a84568b8b..27a90b99ef67 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -341,7 +341,7 @@ static struct device_attribute pa6t_attrs[] = { #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ -static void __cpuinit register_cpu_online(unsigned int cpu) +static void register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; @@ -502,7 +502,7 @@ ssize_t arch_cpu_release(const char *buf, size_t count) #endif /* CONFIG_HOTPLUG_CPU */ -static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, +static int sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; @@ -522,7 +522,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata sysfs_cpu_nb = { +static struct notifier_block sysfs_cpu_nb = { .notifier_call = sysfs_cpu_notify, }; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 5fc29ad7e26f..65ab9e909377 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -631,7 +631,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } -/* should become __cpuinit when secondary_cpu_time_init also is */ void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2da67e7a16d5..51be8fb24803 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -112,9 +112,18 @@ _GLOBAL(tm_reclaim) std r3, STACK_PARAM(0)(r1) SAVE_NVGPRS(r1) + /* We need to setup MSR for VSX register save instructions. Here we + * also clear the MSR RI since when we do the treclaim, we won't have a + * valid kernel pointer for a while. We clear RI here as it avoids + * adding another mtmsr closer to the treclaim. This makes the region + * maked as non-recoverable wider than it needs to be but it saves on + * inserting another mtmsrd later. + */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP + li r16, MSR_RI + andc r15, r15, r16 oris r15, r15, MSR_VEC@h #ifdef CONFIG_VSX BEGIN_FTR_SECTION @@ -349,9 +358,10 @@ restore_gprs: mtcr r5 mtxer r6 - /* MSR and flags: We don't change CRs, and we don't need to alter - * MSR. + /* Clear the MSR RI since we are about to change R1. EE is already off */ + li r4, 0 + mtmsrd r4, 1 REST_4GPRS(0, r7) /* GPR0-3 */ REST_GPR(4, r7) /* GPR4-6 */ @@ -377,6 +387,10 @@ restore_gprs: GET_PACA(r13) GET_SCRATCH0(r1) + /* R1 is restored, so we are recoverable again. EE is still off */ + li r4, MSR_RI + mtmsrd r4, 1 + REST_NVGPRS(r1) addi r1, r1, TM_FRAME_SIZE diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c0e5caf8ccc7..bf33c22e38a4 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -866,6 +866,10 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword) u8 val; u32 shift = 8 * (3 - (pos & 0x3)); + /* if process is 32-bit, clear upper 32 bits of EA */ + if ((regs->msr & MSR_64BIT) == 0) + EA &= 0xFFFFFFFF; + switch ((instword & PPC_INST_STRING_MASK)) { case PPC_INST_LSWX: case PPC_INST_LSWI: @@ -1125,7 +1129,17 @@ void __kprobes program_check_exception(struct pt_regs *regs) * ESR_DST (!?) or 0. In the process of chasing this with the * hardware people - not sure if it can happen on any illegal * instruction or only on FP instructions, whether there is a - * pattern to occurrences etc. -dgibson 31/Mar/2003 */ + * pattern to occurrences etc. -dgibson 31/Mar/2003 + */ + + /* + * If we support a HW FPU, we need to ensure the FP state + * if flushed into the thread_struct before attempting + * emulation + */ +#ifdef CONFIG_PPC_FPU + flush_fp_to_thread(current); +#endif switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); @@ -1282,25 +1296,50 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -void tm_unavailable_exception(struct pt_regs *regs) +void facility_unavailable_exception(struct pt_regs *regs) { + static char *facility_strings[] = { + "FPU", + "VMX/VSX", + "DSCR", + "PMU SPRs", + "BHRB", + "TM", + "AT", + "EBB", + "TAR", + }; + char *facility, *prefix; + u64 value; + + if (regs->trap == 0xf60) { + value = mfspr(SPRN_FSCR); + prefix = ""; + } else { + value = mfspr(SPRN_HFSCR); + prefix = "Hypervisor "; + } + + value = value >> 56; + /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - /* Currently we never expect a TMU exception. Catch - * this and kill the process! - */ - printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " - "(msr %lx)\n", - regs->nip, regs->msr); + if (value < ARRAY_SIZE(facility_strings)) + facility = facility_strings[value]; + else + facility = "unknown"; + + pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + prefix, facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; } - die("Unexpected TM unavailable exception", regs, SIGABRT); + die("Unexpected facility unavailable exception", regs, SIGABRT); } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1396,8 +1435,7 @@ void performance_monitor_exception(struct pt_regs *regs) void SoftwareEmulation(struct pt_regs *regs) { extern int do_mathemu(struct pt_regs *); - extern int Soft_emulate_8xx(struct pt_regs *); -#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU) +#if defined(CONFIG_MATH_EMULATION) int errcode; #endif @@ -1430,23 +1468,6 @@ void SoftwareEmulation(struct pt_regs *regs) _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; } - -#elif defined(CONFIG_8XX_MINIMAL_FPEMU) - errcode = Soft_emulate_8xx(regs); - if (errcode >= 0) - PPC_WARN_EMULATED(8xx, regs); - - switch (errcode) { - case 0: - emulate_single_step(regs); - return; - case 1: - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } #else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); #endif @@ -1796,8 +1817,6 @@ struct ppc_emulated ppc_emulated = { WARN_EMULATED_SETUP(unaligned), #ifdef CONFIG_MATH_EMULATION WARN_EMULATED_SETUP(math), -#elif defined(CONFIG_8XX_MINIMAL_FPEMU) - WARN_EMULATED_SETUP(8xx), #endif #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 9d3fdcd66290..a15837519dca 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -50,7 +50,7 @@ void __init udbg_early_init(void) udbg_init_debug_beat(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); -#elif defined(CONFIG_BOOTX_TEXT) +#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) udbg_init_btext(); #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) /* PPC44x debug */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d4f463ac65b1..1d9c92621b36 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -711,7 +711,7 @@ static void __init vdso_setup_syscall_map(void) } #ifdef CONFIG_PPC64 -int __cpuinit vdso_getcpu_init(void) +int vdso_getcpu_init(void) { unsigned long cpu, node, val; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 422de3f4d46c..008cd856c5b5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -5,9 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm +KVM := ../../../virt/kvm -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ - eventfd.o) +common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ + $(KVM)/eventfd.o CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. @@ -53,7 +54,7 @@ kvm-e500mc-objs := \ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ - ../../../virt/kvm/coalesced_mmio.o \ + $(KVM)/coalesced_mmio.o \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ @@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o kvm-book3s_64-module-objs := \ - ../../../virt/kvm/kvm_main.o \ - ../../../virt/kvm/eventfd.o \ + $(KVM)/kvm_main.o \ + $(KVM)/eventfd.o \ powerpc.o \ emulate.o \ book3s.o \ @@ -111,7 +112,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o -kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index b871721c0050..739bfbadb85e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -26,6 +26,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> +#include <asm/mmu-hash64.h> /* #define DEBUG_MMU */ @@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( return NULL; } +static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe) +{ + return slbe->tb ? SID_SHIFT_1T : SID_SHIFT; +} + +static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe) +{ + return (1ul << kvmppc_slb_sid_shift(slbe)) - 1; +} + +static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr) +{ + eaddr &= kvmppc_slb_offset_mask(slb); + + return (eaddr >> VPN_SHIFT) | + ((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT)); +} + static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, bool data) { @@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, if (!slb) return 0; - if (slb->tb) - return (((u64)eaddr >> 12) & 0xfffffff) | - (((u64)slb->vsid) << 28); - - return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16); + return kvmppc_slb_calc_vpn(slb, eaddr); } static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) @@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) { int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); - return ((eaddr & 0xfffffff) >> p); + + return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); } static hva_t kvmppc_mmu_book3s_64_get_pteg( @@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg( bool second) { u64 hash, pteg, htabsize; - u32 page; + u32 ssize; hva_t r; + u64 vpn; - page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1); - hash = slbe->vsid ^ page; + vpn = kvmppc_slb_calc_vpn(slbe, eaddr); + ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M; + hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize); if (second) hash = ~hash; hash &= ((1ULL << 39ULL) - 1ULL); @@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr) u64 avpn; avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); - avpn |= slbe->vsid << (28 - p); + avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); if (p < 24) avpn >>= ((80 - p) - 56) - 8; @@ -167,7 +185,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, int i; u8 key = 0; bool found = false; - bool perm_err = false; int second = 0; ulong mp_ea = vcpu->arch.magic_page_ea; @@ -190,13 +207,15 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, if (!slbe) goto no_seg_found; + avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); + if (slbe->tb) + avpn |= SLB_VSID_B_1T; + do_second: ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); if (kvm_is_error_hva(ptegp)) goto no_page_found; - avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); - if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); goto no_page_found; @@ -219,7 +238,7 @@ do_second: continue; /* AVPN compare */ - if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) { + if (HPTE_V_COMPARE(avpn, v)) { u8 pp = (r & HPTE_R_PP) | key; int eaddr_mask = 0xFFF; @@ -248,11 +267,6 @@ do_second: break; } - if (!gpte->may_read) { - perm_err = true; - continue; - } - dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " "-> 0x%lx\n", eaddr, avpn, gpte->vpage, gpte->raddr); @@ -281,6 +295,8 @@ do_second: if (pteg[i+1] != oldr) copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); + if (!gpte->may_read) + return -EPERM; return 0; } else { dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " @@ -296,13 +312,7 @@ do_second: } } - no_page_found: - - - if (perm_err) - return -EPERM; - return -ENOENT; no_seg_found: @@ -334,7 +344,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) slbe->large = (rs & SLB_VSID_L) ? 1 : 0; slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; slbe->esid = slbe->tb ? esid_1t : esid; - slbe->vsid = rs >> 12; + slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16); slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; @@ -375,6 +385,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr) static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) { struct kvmppc_slb *slbe; + u64 seg_size; dprintk("KVM MMU: slbie(0x%llx)\n", ea); @@ -386,8 +397,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid); slbe->valid = false; + slbe->orige = 0; + slbe->origv = 0; - kvmppc_mmu_map_segment(vcpu, ea); + seg_size = 1ull << kvmppc_slb_sid_shift(slbe); + kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size); } static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) @@ -396,8 +410,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) dprintk("KVM MMU: slbia()\n"); - for (i = 1; i < vcpu->arch.slb_nr; i++) + for (i = 1; i < vcpu->arch.slb_nr; i++) { vcpu->arch.slb[i].valid = false; + vcpu->arch.slb[i].orige = 0; + vcpu->arch.slb[i].origv = 0; + } if (vcpu->arch.shared->msr & MSR_IR) { kvmppc_mmu_flush_segments(vcpu); @@ -467,8 +484,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); - if (slb) + if (slb) { gvsid = slb->vsid; + if (slb->tb) { + gvsid <<= SID_SHIFT_1T - SID_SHIFT; + gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); + gvsid |= VSID_1T; + } + } } switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 3a9a1aceb14f..e5240524bf6c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -34,7 +34,7 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, - MMU_PAGE_4K, MMU_SEGSIZE_256M, + MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, false); } @@ -301,6 +301,23 @@ out: return r; } +void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size) +{ + struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + ulong seg_mask = -seg_size; + int i; + + for (i = 1; i < svcpu->slb_max; i++) { + if ((svcpu->slb[i].esid & SLB_ESID_V) && + (svcpu->slb[i].esid & seg_mask) == ea) { + /* Invalidate this entry */ + svcpu->slb[i].esid = 0; + } + } + + svcpu_put(svcpu); +} + void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) { struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); @@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) return -1; vcpu3s->context_id[0] = err; - vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) + vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1) << ESID_BITS) - 1; - vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS; + vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS; vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 5880dfb31074..710d31317d81 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } /* if the guest wants write access, see if that is OK */ if (!writing && hpte_is_writable(r)) { + unsigned int hugepage_shift; pte_t *ptep, pte; /* @@ -683,9 +684,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ rcu_read_lock_sched(); ptep = find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL); - if (ptep && pte_present(*ptep)) { - pte = kvmppc_read_update_linux_pte(ptep, 1); + hva, &hugepage_shift); + if (ptep) { + pte = kvmppc_read_update_linux_pte(ptep, 1, + hugepage_shift); if (pte_write(pte)) write_ok = 1; } diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 56b983e7b738..4f0caecc0f9d 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -66,10 +66,6 @@ slb_exit_skip_ ## num: ld r12, PACA_SLBSHADOWPTR(r13) - /* Save off the first entry so we can slbie it later */ - ld r10, SHADOW_SLB_ESID(0)(r12) - ld r11, SHADOW_SLB_VSID(0)(r12) - /* Remove bolted entries */ UNBOLT_SLB_ENTRY(0) UNBOLT_SLB_ENTRY(1) @@ -81,15 +77,10 @@ slb_exit_skip_ ## num: /* Flush SLB */ + li r10, 0 + slbmte r10, r10 slbia - /* r0 = esid & ESID_MASK */ - rldicr r10, r10, 0, 35 - /* r0 |= CLASS_BIT(VSID) */ - rldic r12, r11, 56 - 36, 36 - or r10, r10, r12 - slbie r10 - /* Fill SLB with our shadow */ lbz r12, SVCPU_SLB_MAX(r3) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6dcbb49105a4..fc25689a9f35 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x) unsigned long addr = (unsigned long) x; pte_t *p; - p = find_linux_pte(swapper_pg_dir, addr); + p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); if (!p || !pte_present(*p)) return NULL; /* assume we don't have huge pages in vmalloc space... */ @@ -139,20 +139,18 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, { pte_t *ptep; unsigned long ps = *pte_sizep; - unsigned int shift; + unsigned int hugepage_shift; - ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); + ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift); if (!ptep) return __pte(0); - if (shift) - *pte_sizep = 1ul << shift; + if (hugepage_shift) + *pte_sizep = 1ul << hugepage_shift; else *pte_sizep = PAGE_SIZE; if (ps > *pte_sizep) return __pte(0); - if (!pte_present(*ptep)) - return __pte(0); - return kvmppc_read_update_linux_pte(ptep, writing); + return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); } static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bdc40b8e77d9..19498a567a81 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1239,8 +1239,7 @@ out: #ifdef CONFIG_PPC64 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) { - /* No flags */ - info->flags = 0; + info->flags = KVM_PPC_1T_SEGMENTS; /* SLB is always 64 entries */ info->slb_size = 64; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1a1b51189773..dcc94f016007 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -796,7 +796,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, kvmppc_fill_pt_regs(®s); timer_interrupt(®s); break; -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) +#if defined(CONFIG_PPC_DOORBELL) case BOOKE_INTERRUPT_DOORBELL: kvmppc_fill_pt_regs(®s); doorbell_exception(®s); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 631a2650e4e4..2c52ada30775 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -169,6 +169,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) vcpu->arch.shared->sprg3 = spr_val; break; + /* PIR can legally be written, but we ignore it */ + case SPRN_PIR: break; + default: emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, spr_val); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e15c521846ca..99c7fc16dc0d 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -580,7 +580,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) - regs->nip = imm; + regs->nip = truncate_if_32bit(regs->msr, imm); return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile index 7d1dba0d57f9..8d035d2d42a6 100644 --- a/arch/powerpc/math-emu/Makefile +++ b/arch/powerpc/math-emu/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_MATH_EMULATION) += fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \ fmadd.o fmadds.o fmsub.o fmsubs.o \ fmul.o fmuls.o fnabs.o fneg.o \ fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \ - fres.o frsp.o frsqrte.o fsel.o lfs.o \ + fres.o fre.o frsp.o fsel.o lfs.o \ + frsqrte.o frsqrtes.o \ fsqrt.o fsqrts.o fsub.o fsubs.o \ mcrfs.o mffs.o mtfsb0.o mtfsb1.o \ mtfsf.o mtfsfi.o stfiwx.o stfs.o \ diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c new file mode 100644 index 000000000000..49ccf2cc6a5a --- /dev/null +++ b/arch/powerpc/math-emu/fre.c @@ -0,0 +1,11 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int fre(void *frD, void *frB) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __func__, frD, frB); +#endif + return -ENOSYS; +} diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c new file mode 100644 index 000000000000..7e838e380314 --- /dev/null +++ b/arch/powerpc/math-emu/frsqrtes.c @@ -0,0 +1,11 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int frsqrtes(void *frD, void *frB) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __func__, frD, frB); +#endif + return 0; +} diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 164d55935bd8..0328e66e0799 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -58,8 +58,10 @@ FLOATFUNC(fnabs); FLOATFUNC(fneg); /* Optional */ +FLOATFUNC(fre); FLOATFUNC(fres); FLOATFUNC(frsqrte); +FLOATFUNC(frsqrtes); FLOATFUNC(fsel); FLOATFUNC(fsqrt); FLOATFUNC(fsqrts); @@ -97,6 +99,7 @@ FLOATFUNC(fsqrts); #define FSQRTS 0x016 /* 22 */ #define FRES 0x018 /* 24 */ #define FMULS 0x019 /* 25 */ +#define FRSQRTES 0x01a /* 26 */ #define FMSUBS 0x01c /* 28 */ #define FMADDS 0x01d /* 29 */ #define FNMSUBS 0x01e /* 30 */ @@ -109,6 +112,7 @@ FLOATFUNC(fsqrts); #define FADD 0x015 /* 21 */ #define FSQRT 0x016 /* 22 */ #define FSEL 0x017 /* 23 */ +#define FRE 0x018 /* 24 */ #define FMUL 0x019 /* 25 */ #define FRSQRTE 0x01a /* 26 */ #define FMSUB 0x01c /* 28 */ @@ -299,9 +303,10 @@ do_mathemu(struct pt_regs *regs) case FDIVS: func = fdivs; type = AB; break; case FSUBS: func = fsubs; type = AB; break; case FADDS: func = fadds; type = AB; break; - case FSQRTS: func = fsqrts; type = AB; break; - case FRES: func = fres; type = AB; break; + case FSQRTS: func = fsqrts; type = XB; break; + case FRES: func = fres; type = XB; break; case FMULS: func = fmuls; type = AC; break; + case FRSQRTES: func = frsqrtes;type = XB; break; case FMSUBS: func = fmsubs; type = ABC; break; case FMADDS: func = fmadds; type = ABC; break; case FNMSUBS: func = fnmsubs; type = ABC; break; @@ -317,10 +322,11 @@ do_mathemu(struct pt_regs *regs) case FDIV: func = fdiv; type = AB; break; case FSUB: func = fsub; type = AB; break; case FADD: func = fadd; type = AB; break; - case FSQRT: func = fsqrt; type = AB; break; + case FSQRT: func = fsqrt; type = XB; break; + case FRE: func = fre; type = XB; break; case FSEL: func = fsel; type = ABC; break; case FMUL: func = fmul; type = AC; break; - case FRSQRTE: func = frsqrte; type = AB; break; + case FRSQRTE: func = frsqrte; type = XB; break; case FMSUB: func = fmsub; type = ABC; break; case FMADD: func = fmadd; type = ABC; break; case FNMSUB: func = fnmsub; type = ABC; break; diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 2c9441ee6bb8..82b1ff759e26 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -41,7 +41,7 @@ int icache_44x_need_flush; unsigned long tlb_47x_boltmap[1024/8]; -static void __cpuinit ppc44x_update_tlb_hwater(void) +static void ppc44x_update_tlb_hwater(void) { extern unsigned int tlb_44x_patch_hwater_D[]; extern unsigned int tlb_44x_patch_hwater_I[]; @@ -134,7 +134,7 @@ static void __init ppc47x_update_boltmap(void) /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU */ -static void __cpuinit ppc47x_pin_tlb(unsigned int virt, unsigned int phys) +static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) { unsigned int rA; int bolted; @@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, } #ifdef CONFIG_SMP -void __cpuinit mmu_init_secondary(int cpu) +void mmu_init_secondary(int cpu) { unsigned long addr; unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index cf16b5733eaa..51230ee6a407 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,17 +6,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o \ +obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o -obj-$(CONFIG_PPC64) += mmap_64.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ - mmap_64.o $(hash64-y) + $(hash64-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ @@ -28,11 +27,12 @@ obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o -ifeq ($(CONFIG_HUGETLB_PAGE),y) obj-y += hugetlbpage.o +ifeq ($(CONFIG_HUGETLB_PAGE),y) obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o endif +obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 4b921affa495..49822d90ea96 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -34,7 +34,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, ptep = pte_offset_kernel(&pmd, addr); do { - pte_t pte = *ptep; + pte_t pte = ACCESS_ONCE(*ptep); struct page *page; if ((pte_val(pte) & mask) != result) @@ -63,12 +63,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmdp = pmd_offset(&pud, addr); do { - pmd_t pmd = *pmdp; + pmd_t pmd = ACCESS_ONCE(*pmdp); next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + /* + * If we find a splitting transparent hugepage we + * return zero. That will result in taking the slow + * path which will call wait_split_huge_page() + * if the pmd is still in splitting state + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (pmd_huge(pmd)) { + if (pmd_huge(pmd) || pmd_large(pmd)) { if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, write, pages, nr)) return 0; @@ -91,7 +97,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, pudp = pud_offset(&pgd, addr); do { - pud_t pud = *pudp; + pud_t pud = ACCESS_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) @@ -154,7 +160,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pgdp = pgd_offset(mm, addr); do { - pgd_t pgd = *pgdp; + pgd_t pgd = ACCESS_ONCE(*pgdp); pr_devel(" %016lx: normal pgd %p\n", addr, (void *)pgd_val(pgd)); diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 0e980acae67c..d3cbda62857b 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -289,9 +289,10 @@ htab_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* vpn */ - li r6,MMU_PAGE_4K /* page size */ - ld r7,STK_PARAM(R9)(r1) /* segment size */ - ld r8,STK_PARAM(R8)(r1) /* get "local" param */ + li r6,MMU_PAGE_4K /* base page size */ + li r7,MMU_PAGE_4K /* actual page size */ + ld r8,STK_PARAM(R9)(r1) /* segment size */ + ld r9,STK_PARAM(R8)(r1) /* get "local" param */ _GLOBAL(htab_call_hpte_updatepp) bl . /* Patched by htab_finish_init() */ @@ -649,9 +650,10 @@ htab_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* vpn */ - li r6,MMU_PAGE_4K /* page size */ - ld r7,STK_PARAM(R9)(r1) /* segment size */ - ld r8,STK_PARAM(R8)(r1) /* get "local" param */ + li r6,MMU_PAGE_4K /* base page size */ + li r7,MMU_PAGE_4K /* actual page size */ + ld r8,STK_PARAM(R9)(r1) /* segment size */ + ld r9,STK_PARAM(R8)(r1) /* get "local" param */ _GLOBAL(htab_call_hpte_updatepp) bl . /* patched by htab_finish_init() */ @@ -937,9 +939,10 @@ ht64_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* vpn */ - li r6,MMU_PAGE_64K - ld r7,STK_PARAM(R9)(r1) /* segment size */ - ld r8,STK_PARAM(R8)(r1) /* get "local" param */ + li r6,MMU_PAGE_64K /* base page size */ + li r7,MMU_PAGE_64K /* actual page size */ + ld r8,STK_PARAM(R9)(r1) /* segment size */ + ld r9,STK_PARAM(R8)(r1) /* get "local" param */ _GLOBAL(ht64_call_hpte_updatepp) bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 4c122c3f1623..3f0c30ae4791 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -273,61 +273,15 @@ static long native_hpte_remove(unsigned long hpte_group) return i; } -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - -static inline int hpte_actual_psize(struct hash_pte *hptep, int psize) -{ - /* Look at the 8 bit LP value */ - unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); - - if (!(hptep->v & HPTE_V_VALID)) - return -1; - - /* First check if it is large page */ - if (!(hptep->v & HPTE_V_LARGE)) - return MMU_PAGE_4K; - - return __hpte_actual_psize(lp, psize); -} - static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long vpn, int psize, int ssize, - int local) + unsigned long vpn, int bpsize, + int apsize, int ssize, int local) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; int ret = 0; - int actual_psize; - want_v = hpte_encode_avpn(vpn, psize, ssize); + want_v = hpte_encode_avpn(vpn, bpsize, ssize); DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); @@ -335,7 +289,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); hpte_v = hptep->v; - actual_psize = hpte_actual_psize(hptep, psize); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -343,12 +296,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, * (hpte_remove) because we assume the old translation is still * technically "valid". */ - if (actual_psize < 0) { - actual_psize = psize; - ret = -1; - goto err_out; - } - if (!HPTE_V_COMPARE(hpte_v, want_v)) { + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { DBG_LOW(" -> miss\n"); ret = -1; } else { @@ -357,11 +305,10 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); } -err_out: native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ - tlbie(vpn, psize, actual_psize, ssize, local); + tlbie(vpn, bpsize, apsize, ssize, local); return ret; } @@ -402,7 +349,6 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { - int actual_psize; unsigned long vpn; unsigned long vsid; long slot; @@ -415,36 +361,33 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, if (slot == -1) panic("could not find page to bolt\n"); hptep = htab_address + slot; - actual_psize = hpte_actual_psize(hptep, psize); - if (actual_psize < 0) - actual_psize = psize; /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N)); - - /* Ensure it is out of the tlb too. */ - tlbie(vpn, psize, actual_psize, ssize, 0); + /* + * Ensure it is out of the tlb too. Bolted entries base and + * actual page size will be same. + */ + tlbie(vpn, psize, psize, ssize, 0); } static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int bpsize, int apsize, int ssize, int local) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v; unsigned long want_v; unsigned long flags; - int actual_psize; local_irq_save(flags); DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); - want_v = hpte_encode_avpn(vpn, psize, ssize); + want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; - actual_psize = hpte_actual_psize(hptep, psize); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -452,23 +395,120 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, * (hpte_remove) because we assume the old translation is still * technically "valid". */ - if (actual_psize < 0) { - actual_psize = psize; - native_unlock_hpte(hptep); - goto err_out; - } - if (!HPTE_V_COMPARE(hpte_v, want_v)) + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; -err_out: /* Invalidate the TLB */ - tlbie(vpn, psize, actual_psize, ssize, local); + tlbie(vpn, bpsize, apsize, ssize, local); + + local_irq_restore(flags); +} + +static void native_hugepage_invalidate(struct mm_struct *mm, + unsigned char *hpte_slot_array, + unsigned long addr, int psize) +{ + int ssize = 0, i; + int lock_tlbie; + struct hash_pte *hptep; + int actual_psize = MMU_PAGE_16M; + unsigned int max_hpte_count, valid; + unsigned long flags, s_addr = addr; + unsigned long hpte_v, want_v, shift; + unsigned long hidx, vpn = 0, vsid, hash, slot; + + shift = mmu_psize_defs[psize].shift; + max_hpte_count = 1U << (PMD_SHIFT - shift); + + local_irq_save(flags); + for (i = 0; i < max_hpte_count; i++) { + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } + + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + + hptep = htab_address + slot; + want_v = hpte_encode_avpn(vpn, psize, ssize); + native_lock_hpte(hptep); + hpte_v = hptep->v; + + /* Even if we miss, we need to invalidate the TLB */ + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) + native_unlock_hpte(hptep); + else + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + } + /* + * Since this is a hugepage, we just need a single tlbie. + * use the last vpn. + */ + lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + + asm volatile("ptesync":::"memory"); + __tlbie(vpn, psize, actual_psize, ssize); + asm volatile("eieio; tlbsync; ptesync":::"memory"); + + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + local_irq_restore(flags); } +static inline int __hpte_actual_psize(unsigned int lp, int psize) +{ + int i, shift; + unsigned int mask; + + /* start from 1 ignoring MMU_PAGE_4K */ + for (i = 1; i < MMU_PAGE_COUNT; i++) { + + /* invalid penc */ + if (mmu_psize_defs[psize].penc[i] == -1) + continue; + /* + * encoding bits per actual page size + * PTE LP actual page size + * rrrr rrrz >=8KB + * rrrr rrzz >=16KB + * rrrr rzzz >=32KB + * rrrr zzzz >=64KB + * ....... + */ + shift = mmu_psize_defs[i].shift - LP_SHIFT; + if (shift > LP_BITS) + shift = LP_BITS; + mask = (1 << shift) - 1; + if ((lp & mask) == mmu_psize_defs[psize].penc[i]) + return i; + } + return -1; +} + static void hpte_decode(struct hash_pte *hpte, unsigned long slot, int *psize, int *apsize, int *ssize, unsigned long *vpn) { @@ -672,4 +712,5 @@ void __init hpte_init_native(void) ppc_md.hpte_remove = native_hpte_remove; ppc_md.hpte_clear_all = native_hpte_clear; ppc_md.flush_hash_range = native_flush_hash_range; + ppc_md.hugepage_invalidate = native_hugepage_invalidate; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index e303a6d74e3a..6ecc38bd5b24 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -807,7 +807,7 @@ void __init early_init_mmu(void) } #ifdef CONFIG_SMP -void __cpuinit early_init_mmu_secondary(void) +void early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) @@ -1050,13 +1050,26 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) goto bail; } -#ifdef CONFIG_HUGETLB_PAGE if (hugeshift) { - rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, - ssize, hugeshift, psize); + if (pmd_trans_huge(*(pmd_t *)ptep)) + rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, + trap, local, ssize, psize); +#ifdef CONFIG_HUGETLB_PAGE + else + rc = __hash_page_huge(ea, access, vsid, ptep, trap, + local, ssize, hugeshift, psize); +#else + else { + /* + * if we have hugeshift, and is not transhuge with + * hugetlb disabled, something is really wrong. + */ + rc = 1; + WARN_ON(1); + } +#endif goto bail; } -#endif /* CONFIG_HUGETLB_PAGE */ #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); @@ -1145,6 +1158,7 @@ EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { + int hugepage_shift; unsigned long vsid; pgd_t *pgdir; pte_t *ptep; @@ -1166,10 +1180,27 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgdir = mm->pgd; if (pgdir == NULL) return; - ptep = find_linux_pte(pgdir, ea); - if (!ptep) + + /* Get VSID */ + ssize = user_segment_size(ea); + vsid = get_vsid(mm->context.id, ea, ssize); + if (!vsid) return; + /* + * Hash doesn't like irqs. Walking linux page table with irq disabled + * saves us from holding multiple locks. + */ + local_irq_save(flags); + + /* + * THP pages use update_mmu_cache_pmd. We don't do + * hash preload there. Hence can ignore THP here + */ + ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift); + if (!ptep) + goto out_exit; + WARN_ON(hugepage_shift); #ifdef CONFIG_PPC_64K_PAGES /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on * a 64K kernel), then we don't preload, hash_page() will take @@ -1178,18 +1209,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, * page size demotion here */ if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) - return; + goto out_exit; #endif /* CONFIG_PPC_64K_PAGES */ - /* Get VSID */ - ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); - if (!vsid) - return; - - /* Hash doesn't like irqs */ - local_irq_save(flags); - /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) local = 1; @@ -1211,7 +1233,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, mm->context.user_psize, mm->context.user_psize, pte_val(*ptep)); - +out_exit: local_irq_restore(flags); } @@ -1232,7 +1254,11 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); - ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); + /* + * We use same base page size and actual psize, because we don't + * use these functions for hugepage + */ + ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); } pte_iterate_hashed_end(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1365,7 +1391,8 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0); + ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, + mmu_kernel_ssize, 0); } void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c new file mode 100644 index 000000000000..34de9e0cdc34 --- /dev/null +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -0,0 +1,175 @@ +/* + * Copyright IBM Corporation, 2013 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +/* + * PPC64 THP Support for hash based MMUs + */ +#include <linux/mm.h> +#include <asm/machdep.h> + +int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, + pmd_t *pmdp, unsigned long trap, int local, int ssize, + unsigned int psize) +{ + unsigned int index, valid; + unsigned char *hpte_slot_array; + unsigned long rflags, pa, hidx; + unsigned long old_pmd, new_pmd; + int ret, lpsize = MMU_PAGE_16M; + unsigned long vpn, hash, shift, slot; + + /* + * atomically mark the linux large page PMD busy and dirty + */ + do { + old_pmd = pmd_val(*pmdp); + /* If PMD busy, retry the access */ + if (unlikely(old_pmd & _PAGE_BUSY)) + return 0; + /* If PMD is trans splitting retry the access */ + if (unlikely(old_pmd & _PAGE_SPLITTING)) + return 0; + /* If PMD permissions don't match, take page fault */ + if (unlikely(access & ~old_pmd)) + return 1; + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access + */ + new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED; + if (access & _PAGE_RW) + new_pmd |= _PAGE_DIRTY; + } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp, + old_pmd, new_pmd)); + /* + * PP bits. _PAGE_USER is already PP bit 0x2, so we only + * need to add in 0x1 if it's a read-only user page + */ + rflags = new_pmd & _PAGE_USER; + if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) && + (new_pmd & _PAGE_DIRTY))) + rflags |= 0x1; + /* + * _PAGE_EXEC -> HW_NO_EXEC since it's inverted + */ + rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N); + +#if 0 + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + + /* + * No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case + */ + rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); + } +#endif + /* + * Find the slot index details for this ea, using base page size. + */ + shift = mmu_psize_defs[psize].shift; + index = (ea & ~HPAGE_PMD_MASK) >> shift; + BUG_ON(index >= 4096); + + vpn = hpt_vpn(ea, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + hpte_slot_array = get_hpte_slot_array(pmdp); + + valid = hpte_valid(hpte_slot_array, index); + if (valid) { + /* update the hpte bits */ + hidx = hpte_hash_index(hpte_slot_array, index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + + ret = ppc_md.hpte_updatepp(slot, rflags, vpn, + psize, lpsize, ssize, local); + /* + * We failed to update, try to insert a new entry. + */ + if (ret == -1) { + /* + * large pte is marked busy, so we can be sure + * nobody is looking at hpte_slot_array. hence we can + * safely update this here. + */ + valid = 0; + new_pmd &= ~_PAGE_HPTEFLAGS; + hpte_slot_array[index] = 0; + } else + /* clear the busy bits and set the hash pte bits */ + new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + } + + if (!valid) { + unsigned long hpte_group; + + /* insert new entry */ + pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + /* clear the busy bits and set the hash pte bits */ + new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + + /* Add in WIMG bits */ + rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | + _PAGE_COHERENT | _PAGE_GUARDED)); + + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, + psize, lpsize, ssize); + /* + * Primary is full, try the secondary + */ + if (unlikely(slot == -1)) { + hpte_group = ((~hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + psize, lpsize, ssize); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + + ppc_md.hpte_remove(hpte_group); + goto repeat; + } + } + /* + * Hypervisor failure. Restore old pmd and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *pmdp = __pmd(old_pmd); + hash_failure_debug(ea, access, vsid, trap, ssize, + psize, lpsize, old_pmd); + return -1; + } + /* + * large pte is marked busy, so we can be sure + * nobody is looking at hpte_slot_array. hence we can + * safely update this here. + */ + mark_hpte_slot_valid(hpte_slot_array, index, slot); + } + /* + * No need to use ldarx/stdcx here + */ + *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); + return 0; +} diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0f1d94a1fb82..0b7fb6761015 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - ssize, local) == -1) + mmu_psize, ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 77fdd2cef33b..834ca8eb38f2 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -21,6 +21,9 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/setup.h> +#include <asm/hugetlb.h> + +#ifdef CONFIG_HUGETLB_PAGE #define PAGE_SHIFT_64K 16 #define PAGE_SHIFT_16M 24 @@ -100,68 +103,9 @@ int pgd_huge(pgd_t pgd) } #endif -/* - * We have 4 cases for pgds and pmds: - * (1) invalid (all zeroes) - * (2) pointer to next table, as normal; bottom 6 bits == 0 - * (3) leaf pte for huge page, bottom two bits != 00 - * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table - */ -pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) -{ - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - pte_t *ret_pte; - hugepd_t *hpdp = NULL; - unsigned pdshift = PGDIR_SHIFT; - - if (shift) - *shift = 0; - - pg = pgdir + pgd_index(ea); - - if (pgd_huge(*pg)) { - ret_pte = (pte_t *) pg; - goto out; - } else if (is_hugepd(pg)) - hpdp = (hugepd_t *)pg; - else if (!pgd_none(*pg)) { - pdshift = PUD_SHIFT; - pu = pud_offset(pg, ea); - - if (pud_huge(*pu)) { - ret_pte = (pte_t *) pu; - goto out; - } else if (is_hugepd(pu)) - hpdp = (hugepd_t *)pu; - else if (!pud_none(*pu)) { - pdshift = PMD_SHIFT; - pm = pmd_offset(pu, ea); - - if (pmd_huge(*pm)) { - ret_pte = (pte_t *) pm; - goto out; - } else if (is_hugepd(pm)) - hpdp = (hugepd_t *)pm; - else if (!pmd_none(*pm)) - return pte_offset_kernel(pm, ea); - } - } - if (!hpdp) - return NULL; - - ret_pte = hugepte_offset(hpdp, ea, pdshift); - pdshift = hugepd_shift(*hpdp); -out: - if (shift) - *shift = pdshift; - return ret_pte; -} -EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); - pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { + /* Only called for hugetlbfs pages, hence can ignore THP */ return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); } @@ -357,7 +301,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) int alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; - int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); + int idx = shift_to_mmu_psize(huge_page_shift(hstate)); int nr_gpages = gpage_freearray[idx].nr_gpages; if (nr_gpages == 0) @@ -736,11 +680,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) struct page *page; unsigned shift; unsigned long mask; - + /* + * Transparent hugepages are handled by generic code. We can skip them + * here. + */ ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); /* Verify it is a huge page else bail. */ - if (!ptep || !shift) + if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) return ERR_PTR(-EINVAL); mask = (1UL << shift) - 1; @@ -759,69 +706,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask; - unsigned long pte_end; - struct page *head, *page, *tail; - pte_t pte; - int refs; - - pte_end = (addr + sz) & ~(sz-1); - if (pte_end < end) - end = pte_end; - - pte = *ptep; - mask = _PAGE_PRESENT | _PAGE_USER; - if (write) - mask |= _PAGE_RW; - - if ((pte_val(pte) & mask) != mask) - return 0; - - /* hugepages are never "special" */ - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); - tail = page; - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - /* Could be optimized better */ - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - /* - * Any tail page need their mapcount reference taken before we - * return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } - - return 1; -} - static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { @@ -1038,3 +922,168 @@ void flush_dcache_icache_hugepage(struct page *page) } } } + +#endif /* CONFIG_HUGETLB_PAGE */ + +/* + * We have 4 cases for pgds and pmds: + * (1) invalid (all zeroes) + * (2) pointer to next table, as normal; bottom 6 bits == 0 + * (3) leaf pte for huge page, bottom two bits != 00 + * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table + * + * So long as we atomically load page table pointers we are safe against teardown, + * we can follow the address down to the the page and take a ref on it. + */ + +pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) +{ + pgd_t pgd, *pgdp; + pud_t pud, *pudp; + pmd_t pmd, *pmdp; + pte_t *ret_pte; + hugepd_t *hpdp = NULL; + unsigned pdshift = PGDIR_SHIFT; + + if (shift) + *shift = 0; + + pgdp = pgdir + pgd_index(ea); + pgd = ACCESS_ONCE(*pgdp); + /* + * Always operate on the local stack value. This make sure the + * value don't get updated by a parallel THP split/collapse, + * page fault or a page unmap. The return pte_t * is still not + * stable. So should be checked there for above conditions. + */ + if (pgd_none(pgd)) + return NULL; + else if (pgd_huge(pgd)) { + ret_pte = (pte_t *) pgdp; + goto out; + } else if (is_hugepd(&pgd)) + hpdp = (hugepd_t *)&pgd; + else { + /* + * Even if we end up with an unmap, the pgtable will not + * be freed, because we do an rcu free and here we are + * irq disabled + */ + pdshift = PUD_SHIFT; + pudp = pud_offset(&pgd, ea); + pud = ACCESS_ONCE(*pudp); + + if (pud_none(pud)) + return NULL; + else if (pud_huge(pud)) { + ret_pte = (pte_t *) pudp; + goto out; + } else if (is_hugepd(&pud)) + hpdp = (hugepd_t *)&pud; + else { + pdshift = PMD_SHIFT; + pmdp = pmd_offset(&pud, ea); + pmd = ACCESS_ONCE(*pmdp); + /* + * A hugepage collapse is captured by pmd_none, because + * it mark the pmd none and do a hpte invalidate. + * + * A hugepage split is captured by pmd_trans_splitting + * because we mark the pmd trans splitting and do a + * hpte invalidate + * + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + return NULL; + + if (pmd_huge(pmd) || pmd_large(pmd)) { + ret_pte = (pte_t *) pmdp; + goto out; + } else if (is_hugepd(&pmd)) + hpdp = (hugepd_t *)&pmd; + else + return pte_offset_kernel(&pmd, ea); + } + } + if (!hpdp) + return NULL; + + ret_pte = hugepte_offset(hpdp, ea, pdshift); + pdshift = hugepd_shift(*hpdp); +out: + if (shift) + *shift = pdshift; + return ret_pte; +} +EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); + +int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + unsigned long pte_end; + struct page *head, *page, *tail; + pte_t pte; + int refs; + + pte_end = (addr + sz) & ~(sz-1); + if (pte_end < end) + end = pte_end; + + pte = ACCESS_ONCE(*ptep); + mask = _PAGE_PRESENT | _PAGE_USER; + if (write) + mask |= _PAGE_RW; + + if ((pte_val(pte) & mask) != mask) + return 0; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* + * check for splitting here + */ + if (pmd_trans_splitting(pte_pmd(pte))) + return 0; +#endif + + /* hugepages are never "special" */ + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + + page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + /* Could be optimized better */ + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a90b9c458990..d0cd9e4c6837 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -88,7 +88,11 @@ static void pgd_ctor(void *addr) static void pmd_ctor(void *addr) { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + memset(addr, 0, PMD_TABLE_SIZE * 2); +#else memset(addr, 0, PMD_TABLE_SIZE); +#endif } struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; @@ -137,10 +141,9 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) void pgtable_cache_init(void) { pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); - pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor); - if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE)) + pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable caches"); - /* In all current configs, when the PUD index exists it's the * same size as either the pgd or pmd index. Verify that the * initialization above has also created a PUD cache. This diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0988a26e0413..7f4bea162026 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -299,47 +299,13 @@ void __init paging_init(void) void __init mem_init(void) { -#ifdef CONFIG_NEED_MULTIPLE_NODES - int nid; -#endif - pg_data_t *pgdat; - unsigned long i; - struct page *page; - unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; - #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif - num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - -#ifdef CONFIG_NEED_MULTIPLE_NODES - for_each_online_node(nid) { - if (NODE_DATA(nid)->node_spanned_pages != 0) { - printk("freeing bootmem node %d\n", nid); - totalram_pages += - free_all_bootmem_node(NODE_DATA(nid)); - } - } -#else - max_mapnr = max_pfn; - totalram_pages += free_all_bootmem(); -#endif - for_each_online_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; i++) { - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - page = pgdat_page_nr(pgdat, i); - if (PageReserved(page)) - reservedpages++; - } - } - - codesize = (unsigned long)&_sdata - (unsigned long)&_stext; - datasize = (unsigned long)&_edata - (unsigned long)&_sdata; - initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; - bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; + set_max_mapnr(max_pfn); + free_all_bootmem(); #ifdef CONFIG_HIGHMEM { @@ -349,13 +315,9 @@ void __init mem_init(void) for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; struct page *page = pfn_to_page(pfn); - if (memblock_is_reserved(paddr)) - continue; - free_highmem_page(page); - reservedpages--; + if (!memblock_is_reserved(paddr)) + free_highmem_page(page); } - printk(KERN_DEBUG "High memory: %luk\n", - totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* CONFIG_HIGHMEM */ @@ -368,16 +330,7 @@ void __init mem_init(void) (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; #endif - printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " - "%luk reserved, %luk data, %luk bss, %luk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - bsssize >> 10, - initsize >> 10); - + mem_init_print_info(NULL); #ifdef CONFIG_PPC32 pr_info("Kernel virtual memory layout:\n"); pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); @@ -407,7 +360,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif @@ -508,6 +461,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { #ifdef CONFIG_PPC_STD_MMU + /* + * We don't need to worry about _PAGE_PRESENT here because we are + * called with either mm->page_table_lock held or ptl lock held + */ unsigned long access = 0, trap; /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap.c index 67a42ed0d2fc..67a42ed0d2fc 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap.c diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index e779642c25e5..af3d78e19302 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -112,8 +112,10 @@ static unsigned int steal_context_smp(unsigned int id) */ for_each_cpu(cpu, mm_cpumask(mm)) { for (i = cpu_first_thread_sibling(cpu); - i <= cpu_last_thread_sibling(cpu); i++) - __set_bit(id, stale_map[i]); + i <= cpu_last_thread_sibling(cpu); i++) { + if (stale_map[i]) + __set_bit(id, stale_map[i]); + } cpu = i - 1; } return id; @@ -272,7 +274,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* XXX This clear should ultimately be part of local_flush_tlb_mm */ for (i = cpu_first_thread_sibling(cpu); i <= cpu_last_thread_sibling(cpu); i++) { - __clear_bit(id, stale_map[i]); + if (stale_map[i]) + __clear_bit(id, stale_map[i]); } } @@ -329,8 +332,8 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_SMP -static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int mmu_context_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; @@ -363,7 +366,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata mmu_context_cpu_nb = { +static struct notifier_block mmu_context_cpu_nb = { .notifier_call = mmu_context_cpu_notify, }; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 88c0425dc0a8..08397217e8ac 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -516,7 +516,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. */ -static int __cpuinit numa_setup_cpu(unsigned long lcpu) +static int numa_setup_cpu(unsigned long lcpu) { int nid = 0; struct device_node *cpu = of_get_cpu_node(lcpu, NULL); @@ -538,8 +538,7 @@ out: return nid; } -static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, - unsigned long action, +static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned long lcpu = (unsigned long)hcpu; @@ -919,7 +918,7 @@ static void __init *careful_zallocation(int nid, unsigned long size, return ret; } -static struct notifier_block __cpuinitdata ppc64_numa_nb = { +static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; @@ -1433,11 +1432,9 @@ static int update_cpu_topology(void *data) if (cpu != update->cpu) continue; - unregister_cpu_under_node(update->cpu, update->old_nid); unmap_cpu_from_node(update->cpu); map_cpu_to_node(update->cpu, update->new_nid); vdso_getcpu_init(); - register_cpu_under_node(update->cpu, update->new_nid); } return 0; @@ -1485,6 +1482,9 @@ int arch_update_cpu_topology(void) stop_machine(update_cpu_topology, &updates[0], &updated_cpus); for (ud = &updates[0]; ud; ud = ud->next) { + unregister_cpu_under_node(ud->cpu, ud->old_nid); + register_cpu_under_node(ud->cpu, ud->new_nid); + dev = get_cpu_device(ud->cpu); if (dev) kobject_uevent(&dev->kobj, KOBJ_CHANGE); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 214130a4edc6..edda589795c3 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -235,6 +235,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) pud = pud_offset(pgd, addr); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, addr); + /* + * khugepaged to collapse normal pages to hugepage, first set + * pmd to none to force page fault/gup to take mmap_sem. After + * pmd is set to none, we do a pte_clear which does this assertion + * so if we find pmd none, return. + */ + if (pmd_none(*pmd)) + return; BUG_ON(!pmd_present(*pmd)); assert_spin_locked(pte_lockptr(mm, pmd)); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index a854096e1023..536eec72c0f7 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +/* + * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags + * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. + */ +struct page *pmd_page(pmd_t pmd) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(pmd)) + return pfn_to_page(pmd_pfn(pmd)); +#endif + return virt_to_page(pmd_page_vaddr(pmd)); +} + #ifdef CONFIG_PPC_64K_PAGES static pte_t *get_from_cache(struct mm_struct *mm) { @@ -455,3 +468,404 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) } #endif #endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * This is called when relaxing access to a hugepage. It's also called in the page + * fault path when we don't hit any of the major fault cases, ie, a minor + * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have + * handled those two for us, we additionally deal with missing execute + * permission here on some processors + */ +int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp, pmd_t entry, int dirty) +{ + int changed; +#ifdef CONFIG_DEBUG_VM + WARN_ON(!pmd_trans_huge(*pmdp)); + assert_spin_locked(&vma->vm_mm->page_table_lock); +#endif + changed = !pmd_same(*(pmdp), entry); + if (changed) { + __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); + /* + * Since we are not supporting SW TLB systems, we don't + * have any thing similar to flush_tlb_page_nohash() + */ + } + return changed; +} + +unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, unsigned long clr) +{ + + unsigned long old, tmp; + +#ifdef CONFIG_DEBUG_VM + WARN_ON(!pmd_trans_huge(*pmdp)); + assert_spin_locked(&mm->page_table_lock); +#endif + +#ifdef PTE_ATOMIC_UPDATES + __asm__ __volatile__( + "1: ldarx %0,0,%3\n\ + andi. %1,%0,%6\n\ + bne- 1b \n\ + andc %1,%0,%4 \n\ + stdcx. %1,0,%3 \n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) + : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY) + : "cc" ); +#else + old = pmd_val(*pmdp); + *pmdp = __pmd(old & ~clr); +#endif + if (old & _PAGE_HASHPTE) + hpte_do_hugepage_flush(mm, addr, pmdp); + return old; +} + +pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t pmd; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + if (pmd_trans_huge(*pmdp)) { + pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); + } else { + /* + * khugepaged calls this for normal pmd + */ + pmd = *pmdp; + pmd_clear(pmdp); + /* + * Wait for all pending hash_page to finish. This is needed + * in case of subpage collapse. When we collapse normal pages + * to hugepage, we first clear the pmd, then invalidate all + * the PTE entries. The assumption here is that any low level + * page fault will see a none pmd and take the slow path that + * will wait on mmap_sem. But we could very well be in a + * hash_page with local ptep pointer value. Such a hash page + * can result in adding new HPTE entries for normal subpages. + * That means we could be modifying the page content as we + * copy them to a huge page. So wait for parallel hash_page + * to finish before invalidating HPTE entries. We can do this + * by sending an IPI to all the cpus and executing a dummy + * function there. + */ + kick_all_cpus_sync(); + /* + * Now invalidate the hpte entries in the range + * covered by pmd. This make sure we take a + * fault and will find the pmd as none, which will + * result in a major fault which takes mmap_sem and + * hence wait for collapse to complete. Without this + * the __collapse_huge_page_copy can result in copying + * the old content. + */ + flush_tlb_pmd_range(vma->vm_mm, &pmd, address); + } + return pmd; +} + +int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); +} + +/* + * We currently remove entries from the hashtable regardless of whether + * the entry was young or dirty. The generic routines only flush if the + * entry was young or dirty which is not good enough. + * + * We should be more intelligent about this but for the moment we override + * these functions and force a tlb flush unconditionally + */ +int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); +} + +/* + * We mark the pmd splitting and invalidate all the hpte + * entries for this hugepage. + */ +void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + unsigned long old, tmp; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + +#ifdef CONFIG_DEBUG_VM + WARN_ON(!pmd_trans_huge(*pmdp)); + assert_spin_locked(&vma->vm_mm->page_table_lock); +#endif + +#ifdef PTE_ATOMIC_UPDATES + + __asm__ __volatile__( + "1: ldarx %0,0,%3\n\ + andi. %1,%0,%6\n\ + bne- 1b \n\ + ori %1,%0,%4 \n\ + stdcx. %1,0,%3 \n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) + : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY) + : "cc" ); +#else + old = pmd_val(*pmdp); + *pmdp = __pmd(old | _PAGE_SPLITTING); +#endif + /* + * If we didn't had the splitting flag set, go and flush the + * HPTE entries. + */ + if (!(old & _PAGE_SPLITTING)) { + /* We need to flush the hpte */ + if (old & _PAGE_HASHPTE) + hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); + } +} + +/* + * We want to put the pgtable in pmd and use pgtable for tracking + * the base page size hptes + */ +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + pgtable_t *pgtable_slot; + assert_spin_locked(&mm->page_table_lock); + /* + * we store the pgtable in the second half of PMD + */ + pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; + *pgtable_slot = pgtable; + /* + * expose the deposited pgtable to other cpus. + * before we set the hugepage PTE at pmd level + * hash fault code looks at the deposted pgtable + * to store hash index values. + */ + smp_wmb(); +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + pgtable_t pgtable; + pgtable_t *pgtable_slot; + + assert_spin_locked(&mm->page_table_lock); + pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; + pgtable = *pgtable_slot; + /* + * Once we withdraw, mark the entry NULL. + */ + *pgtable_slot = NULL; + /* + * We store HPTE information in the deposited PTE fragment. + * zero out the content on withdraw. + */ + memset(pgtable, 0, PTE_FRAG_SIZE); + return pgtable; +} + +/* + * set a new huge pmd. We should not be called for updating + * an existing pmd entry. That should go via pmd_hugepage_update. + */ +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ +#ifdef CONFIG_DEBUG_VM + WARN_ON(!pmd_none(*pmdp)); + assert_spin_locked(&mm->page_table_lock); + WARN_ON(!pmd_trans_huge(pmd)); +#endif + return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); +} + +void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT); +} + +/* + * A linux hugepage PMD was changed and the corresponding hash table entries + * neesd to be flushed. + */ +void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + int ssize, i; + unsigned long s_addr; + int max_hpte_count; + unsigned int psize, valid; + unsigned char *hpte_slot_array; + unsigned long hidx, vpn, vsid, hash, shift, slot; + + /* + * Flush all the hptes mapping this hugepage + */ + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + /* get the base page size */ + psize = get_slice_psize(mm, s_addr); + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(mm, hpte_slot_array, + s_addr, psize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } + + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} + +static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) +{ + pmd_val(pmd) |= pgprot_val(pgprot); + return pmd; +} + +pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) +{ + pmd_t pmd; + /* + * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always + * set. We use this to check THP page at pmd level. + * leaf pte for huge page, bottom two bits != 00 + */ + pmd_val(pmd) = pfn << PTE_RPN_SHIFT; + pmd_val(pmd) |= _PAGE_THP_HUGE; + pmd = pmd_set_protbits(pmd, pgprot); + return pmd; +} + +pmd_t mk_pmd(struct page *page, pgprot_t pgprot) +{ + return pfn_pmd(page_to_pfn(page), pgprot); +} + +pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + + pmd_val(pmd) &= _HPAGE_CHG_MASK; + pmd = pmd_set_protbits(pmd, newprot); + return pmd; +} + +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a HUGE PMD entry in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux HUGE PMD entry. + */ +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + return; +} + +pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t old_pmd; + pgtable_t pgtable; + unsigned long old; + pgtable_t *pgtable_slot; + + old = pmd_hugepage_update(mm, addr, pmdp, ~0UL); + old_pmd = __pmd(old); + /* + * We have pmd == none and we are holding page_table_lock. + * So we can safely go and clear the pgtable hash + * index info. + */ + pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; + pgtable = *pgtable_slot; + /* + * Let's zero out old valid and hash index details + * hash fault look at them. + */ + memset(pgtable, 0, PTE_FRAG_SIZE); + return old_pmd; +} + +int has_transparent_hugepage(void) +{ + if (!mmu_has_feature(MMU_FTR_16M_PAGE)) + return 0; + /* + * We support THP only if PMD_SIZE is 16MB. + */ + if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT) + return 0; + /* + * We need to make sure that we support 16MB hugepage in a segement + * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE + * of 64K. + */ + /* + * If we have 64K HPTE, we will be using that by default + */ + if (mmu_psize_defs[MMU_PAGE_64K].shift && + (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1)) + return 0; + /* + * Ok we only have 4K HPTE + */ + if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1) + return 0; + + return 1; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 7c415ddde948..aa74acb0fdfc 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -130,6 +130,53 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) up_write(&mm->mmap_sem); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->private; + split_huge_page_pmd(vma, addr, pmd); + return 0; +} + +static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + struct vm_area_struct *vma; + struct mm_walk subpage_proto_walk = { + .mm = mm, + .pmd_entry = subpage_walk_pmd_entry, + }; + + /* + * We don't try too hard, we just mark all the vma in that range + * VM_NOHUGEPAGE and split them. + */ + vma = find_vma(mm, addr); + /* + * If the range is in unmapped range, just return + */ + if (vma && ((addr + len) <= vma->vm_start)) + return; + + while (vma) { + if (vma->vm_start >= (addr + len)) + break; + vma->vm_flags |= VM_NOHUGEPAGE; + subpage_proto_walk.private = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &subpage_proto_walk); + vma = vma->vm_next; + } +} +#else +static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + return; +} +#endif + /* * Copy in a subpage protection map for an address range. * The map has 2 bits per 4k subpage, so 32 bits per 64k page. @@ -168,6 +215,7 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) return -EFAULT; down_write(&mm->mmap_sem); + subpage_mark_vma_nohuge(mm, addr, len); for (limit = addr + len; addr < limit; addr = next) { next = pmd_addr_end(addr, limit); err = -ENOMEM; diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 7df1c5edda87..36e44b4260eb 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -189,6 +189,7 @@ void tlb_flush(struct mmu_gather *tlb) void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, unsigned long end) { + int hugepage_shift; unsigned long flags; start = _ALIGN_DOWN(start, PAGE_SIZE); @@ -206,7 +207,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_linux_pte(mm->pgd, start); + pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, + &hugepage_shift); unsigned long pte; if (ptep == NULL) @@ -214,7 +216,37 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, pte = pte_val(*ptep); if (!(pte & _PAGE_HASHPTE)) continue; - hpte_need_flush(mm, start, ptep, pte, 0); + if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) + hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); + else + hpte_need_flush(mm, start, ptep, pte, 0); + } + arch_leave_lazy_mmu_mode(); + local_irq_restore(flags); +} + +void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) +{ + pte_t *pte; + pte_t *start_pte; + unsigned long flags; + + addr = _ALIGN_DOWN(addr, PMD_SIZE); + /* Note: Normally, we should only ever use a batch within a + * PTE locked section. This violates the rule, but will work + * since we don't actually modify the PTEs, we just flush the + * hash while leaving the PTEs intact (including their reference + * to being hashed). This is not the most performance oriented + * way to do things but is fine for our needs here. + */ + local_irq_save(flags); + arch_enter_lazy_mmu_mode(); + start_pte = pte_offset_map(pmd, addr); + for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) { + unsigned long pteval = pte_val(*pte); + if (pteval & _PAGE_HASHPTE) + hpte_need_flush(mm, addr, pte, pteval, 0); + addr += PAGE_SIZE; } arch_leave_lazy_mmu_mode(); local_irq_restore(flags); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 6888cad5103d..41cd68dee681 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -648,7 +648,7 @@ void __init early_init_mmu(void) __early_init_mmu(1); } -void __cpuinit early_init_mmu_secondary(void) +void early_init_mmu_secondary(void) { __early_init_mmu(0); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index c427ae36374a..bf56e33f8257 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -650,8 +650,7 @@ void bpf_jit_compile(struct sk_filter *fp) proglen = cgctx.idx * 4; alloclen = proglen + FUNCTION_DESCR_SIZE; - image = module_alloc(max_t(unsigned int, alloclen, - sizeof(struct work_struct))); + image = module_alloc(alloclen); if (!image) goto out; @@ -688,20 +687,8 @@ out: return; } -static void jit_free_defer(struct work_struct *arg) -{ - module_free(NULL, arg); -} - -/* run from softirq, we must use a work_struct to call - * module_free() from process context - */ void bpf_jit_free(struct sk_filter *fp) { - if (fp->bpf_func != sk_run_filter) { - struct work_struct *work = (struct work_struct *)fp->bpf_func; - - INIT_WORK(work, jit_free_defer); - schedule_work(work); - } + if (fp->bpf_func != sk_run_filter) + module_free(NULL, fp->bpf_func); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29c6482890c8..a3985aee77fe 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -75,6 +75,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_FCHV 0 #define MMCR0_PMCjCE MMCR0_PMCnCE +#define MMCR0_FC56 0 +#define MMCR0_PMAO 0 +#define MMCR0_EBE 0 +#define MMCR0_PMCC 0 +#define MMCR0_PMCC_U6 0 #define SPRN_MMCRA SPRN_MMCR2 #define MMCRA_SAMPLE_ENABLE 0 @@ -102,6 +107,15 @@ static inline int siar_valid(struct pt_regs *regs) return 1; } +static bool is_ebb_event(struct perf_event *event) { return false; } +static int ebb_event_check(struct perf_event *event) { return 0; } +static void ebb_event_add(struct perf_event *event) { } +static void ebb_switch_out(unsigned long mmcr0) { } +static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) +{ + return mmcr0; +} + static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} void power_pmu_flush_branch_stack(void) {} @@ -462,6 +476,89 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) return; } +static bool is_ebb_event(struct perf_event *event) +{ + /* + * This could be a per-PMU callback, but we'd rather avoid the cost. We + * check that the PMU supports EBB, meaning those that don't can still + * use bit 63 of the event code for something else if they wish. + */ + return (ppmu->flags & PPMU_EBB) && + ((event->attr.config >> EVENT_CONFIG_EBB_SHIFT) & 1); +} + +static int ebb_event_check(struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + + /* Event and group leader must agree on EBB */ + if (is_ebb_event(leader) != is_ebb_event(event)) + return -EINVAL; + + if (is_ebb_event(event)) { + if (!(event->attach_state & PERF_ATTACH_TASK)) + return -EINVAL; + + if (!leader->attr.pinned || !leader->attr.exclusive) + return -EINVAL; + + if (event->attr.inherit || event->attr.sample_period || + event->attr.enable_on_exec || event->attr.freq) + return -EINVAL; + } + + return 0; +} + +static void ebb_event_add(struct perf_event *event) +{ + if (!is_ebb_event(event) || current->thread.used_ebb) + return; + + /* + * IFF this is the first time we've added an EBB event, set + * PMXE in the user MMCR0 so we can detect when it's cleared by + * userspace. We need this so that we can context switch while + * userspace is in the EBB handler (where PMXE is 0). + */ + current->thread.used_ebb = 1; + current->thread.mmcr0 |= MMCR0_PMXE; +} + +static void ebb_switch_out(unsigned long mmcr0) +{ + if (!(mmcr0 & MMCR0_EBE)) + return; + + current->thread.siar = mfspr(SPRN_SIAR); + current->thread.sier = mfspr(SPRN_SIER); + current->thread.sdar = mfspr(SPRN_SDAR); + current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK; + current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; +} + +static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) +{ + if (!ebb) + goto out; + + /* Enable EBB and read/write to all 6 PMCs for userspace */ + mmcr0 |= MMCR0_EBE | MMCR0_PMCC_U6; + + /* Add any bits from the user reg, FC or PMAO */ + mmcr0 |= current->thread.mmcr0; + + /* Be careful not to set PMXE if userspace had it cleared */ + if (!(current->thread.mmcr0 & MMCR0_PMXE)) + mmcr0 &= ~MMCR0_PMXE; + + mtspr(SPRN_SIAR, current->thread.siar); + mtspr(SPRN_SIER, current->thread.sier); + mtspr(SPRN_SDAR, current->thread.sdar); + mtspr(SPRN_MMCR2, current->thread.mmcr2); +out: + return mmcr0; +} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -732,6 +829,13 @@ static void power_pmu_read(struct perf_event *event) if (!event->hw.idx) return; + + if (is_ebb_event(event)) { + val = read_pmc(event->hw.idx); + local64_set(&event->hw.prev_count, val); + return; + } + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -852,7 +956,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags; + unsigned long flags, mmcr0, val; if (!ppmu) return; @@ -860,9 +964,6 @@ static void power_pmu_disable(struct pmu *pmu) cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - /* * Check if we ever enabled the PMU on this cpu. */ @@ -872,6 +973,21 @@ static void power_pmu_disable(struct pmu *pmu) } /* + * Set the 'freeze counters' bit, clear EBE/PMCC/PMAO/FC56. + */ + val = mmcr0 = mfspr(SPRN_MMCR0); + val |= MMCR0_FC; + val &= ~(MMCR0_EBE | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); + + /* + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events etc. + * before we return. + */ + write_mmcr0(cpuhw, val); + mb(); + + /* * Disable instruction sampling if it was enabled */ if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { @@ -880,15 +996,12 @@ static void power_pmu_disable(struct pmu *pmu) mb(); } - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the events - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + ebb_switch_out(mmcr0); } + local_irq_restore(flags); } @@ -903,23 +1016,36 @@ static void power_pmu_enable(struct pmu *pmu) struct cpu_hw_events *cpuhw; unsigned long flags; long i; - unsigned long val; + unsigned long val, mmcr0; s64 left; unsigned int hwc_index[MAX_HWEVENTS]; int n_lim; int idx; + bool ebb; if (!ppmu) return; local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; + if (!cpuhw->disabled) + goto out; + + if (cpuhw->n_events == 0) { + ppc_set_pmu_inuse(0); + goto out; } + cpuhw->disabled = 0; /* + * EBB requires an exclusive group and all events must have the EBB + * flag set, or not set, so we can just check a single event. Also we + * know we have at least one event. + */ + ebb = is_ebb_event(cpuhw->event[0]); + + /* * If we didn't change anything, or only removed events, * no need to recalculate MMCR* settings and reset the PMCs. * Just reenable the PMU with the current MMCR* settings @@ -928,8 +1054,6 @@ static void power_pmu_enable(struct pmu *pmu) if (!cpuhw->n_added) { mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_events == 0) - ppc_set_pmu_inuse(0); goto out_enable; } @@ -996,25 +1120,34 @@ static void power_pmu_enable(struct pmu *pmu) ++n_lim; continue; } - val = 0; - if (event->hw.sample_period) { - left = local64_read(&event->hw.period_left); - if (left < 0x80000000L) - val = 0x80000000L - left; + + if (ebb) + val = local64_read(&event->hw.prev_count); + else { + val = 0; + if (event->hw.sample_period) { + left = local64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + local64_set(&event->hw.prev_count, val); } - local64_set(&event->hw.prev_count, val); + event->hw.idx = idx; if (event->hw.state & PERF_HES_STOPPED) val = 0; write_pmc(idx, val); + perf_event_update_userpage(event); } cpuhw->n_limited = n_lim; cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; out_enable: + mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); + mb(); - write_mmcr0(cpuhw, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, mmcr0); /* * Enable instruction sampling if necessary @@ -1112,6 +1245,8 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) event->hw.config = cpuhw->events[n0]; nocheck: + ebb_event_add(event); + ++cpuhw->n_events; ++cpuhw->n_added; @@ -1472,6 +1607,11 @@ static int power_pmu_event_init(struct perf_event *event) } } + /* Extra checks for EBB */ + err = ebb_event_check(event); + if (err) + return err; + /* * If this is in a group, check if it can go on with all the * other hardware events in the group. We assume the event @@ -1511,6 +1651,13 @@ static int power_pmu_event_init(struct perf_event *event) local64_set(&event->hw.period_left, event->hw.last_period); /* + * For EBB events we just context switch the PMC value, we don't do any + * of the sample_period logic. We use hw.prev_count for this. + */ + if (is_ebb_event(event)) + local64_set(&event->hw.prev_count, 0); + + /* * See if we need to reserve the PMU. * If no events are currently in use, then we have to take a * mutex to ensure that we don't race with another task doing @@ -1786,7 +1933,7 @@ static void power_pmu_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } -static int __cpuinit +static int power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; @@ -1803,7 +1950,7 @@ power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu return NOTIFY_OK; } -int __cpuinit register_power_pmu(struct power_pmu *pmu) +int register_power_pmu(struct power_pmu *pmu) { if (ppmu) return -EBUSY; /* something's already registered */ diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f7d1c4fff303..96a64d6a8bdf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -31,9 +31,9 @@ * * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ thresh_cmp ] [ thresh_ctl ] - * | - * thresh start/stop OR FAB match -* + * | [ thresh_cmp ] [ thresh_ctl ] + * | | + * *- EBB (Linux) thresh start/stop OR FAB match -* * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -85,6 +85,7 @@ * */ +#define EVENT_EBB_MASK 1ull #define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ #define EVENT_THR_CMP_MASK 0x3ff #define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ @@ -109,6 +110,17 @@ #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) #define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ +#define EVENT_VALID_MASK \ + ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + (EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT) | \ + EVENT_PSEL_MASK) + /* MMCRA IFM bits - POWER8 */ #define POWER8_MMCRA_IFM1 0x0000000040000000UL #define POWER8_MMCRA_IFM2 0x0000000080000000UL @@ -130,10 +142,10 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * | | - * L1 I/D qualifier -* | Count of events for each PMC. - * | p1, p2, p3, p4, p5, p6. + * | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * EBB -* | | + * | | Count of events for each PMC. + * L1 I/D qualifier -* | p1, p2, p3, p4, p5, p6. * nc - number of counters -* * * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints @@ -149,6 +161,9 @@ #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) +#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) +#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) + #define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) #define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) @@ -207,14 +222,21 @@ static inline bool event_is_fab_match(u64 event) static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) { - unsigned int unit, pmc, cache; + unsigned int unit, pmc, cache, ebb; unsigned long mask, value; mask = value = 0; - pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; - unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + if (event & ~EVENT_VALID_MASK) + return -1; + + pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; + unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; + cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + ebb = (event >> EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; + + /* Clear the EBB bit in the event, so event checks work below */ + event &= ~(EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT); if (pmc) { if (pmc > 6) @@ -284,6 +306,18 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); } + if (!pmc && ebb) + /* EBB events must specify the PMC */ + return -1; + + /* + * All events must agree on EBB, either all request it or none. + * EBB events are pinned & exclusive, so this should never actually + * hit, but we leave it as a fallback in case. + */ + mask |= CNST_EBB_VAL(ebb); + value |= CNST_EBB_MASK; + *maskp = mask; *valp = value; @@ -378,6 +412,10 @@ static int power8_compute_mmcr(u64 event[], int n_ev, if (pmc_inuse & 0x7c) mmcr[0] |= MMCR0_PMCjCE; + /* If we're not using PMC 5 or 6, freeze them */ + if (!(pmc_inuse & 0x60)) + mmcr[0] |= MMCR0_FC56; + mmcr[1] = mmcr1; mmcr[2] = mmcra; @@ -574,7 +612,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, - .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB, + .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, .attr_groups = power8_pmu_attr_groups, diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c index ecd3890c40d7..7f1b71a01c6a 100644 --- a/arch/powerpc/platforms/44x/currituck.c +++ b/arch/powerpc/platforms/44x/currituck.c @@ -91,12 +91,12 @@ static void __init ppc47x_init_irq(void) } #ifdef CONFIG_SMP -static void __cpuinit smp_ppc47x_setup_cpu(int cpu) +static void smp_ppc47x_setup_cpu(int cpu) { mpic_setup_this_cpu(); } -static int __cpuinit smp_ppc47x_kick_cpu(int cpu) +static int smp_ppc47x_kick_cpu(int cpu) { struct device_node *cpunode = of_get_cpu_node(cpu, NULL); const u64 *spin_table_addr_prop; @@ -176,13 +176,48 @@ static int __init ppc47x_probe(void) return 1; } +static int board_rev = -1; +static int __init ppc47x_get_board_rev(void) +{ + u8 fpga_reg0; + void *fpga; + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga"); + if (!np) + goto fail; + + fpga = of_iomap(np, 0); + of_node_put(np); + if (!fpga) + goto fail; + + fpga_reg0 = ioread8(fpga); + board_rev = fpga_reg0 & 0x03; + pr_info("%s: Found board revision %d\n", __func__, board_rev); + iounmap(fpga); + return 0; + +fail: + pr_info("%s: Unable to find board revision\n", __func__); + return 0; +} +machine_arch_initcall(ppc47x, ppc47x_get_board_rev); + /* Use USB controller should have been hardware swizzled but it wasn't :( */ static void ppc47x_pci_irq_fixup(struct pci_dev *dev) { if (dev->vendor == 0x1033 && (dev->device == 0x0035 || dev->device == 0x00e0)) { - dev->irq = irq_create_mapping(NULL, 47); - pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq); + if (board_rev == 0) { + dev->irq = irq_create_mapping(NULL, 47); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else if (board_rev == 2) { + dev->irq = irq_create_mapping(NULL, 49); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else { + pr_alert("%s: Unknown board revision\n", __func__); + } } } diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index a28a8629727e..4241bc825800 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void) } #ifdef CONFIG_SMP -static void __cpuinit smp_iss4xx_setup_cpu(int cpu) +static void smp_iss4xx_setup_cpu(int cpu) { mpic_setup_this_cpu(); } -static int __cpuinit smp_iss4xx_kick_cpu(int cpu) +static int smp_iss4xx_kick_cpu(int cpu) { struct device_node *cpunode = of_get_cpu_node(cpu, NULL); const u64 *spin_table_addr_prop; diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 0a134e0469ef..3e90ece10ae9 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void) mpc83xx_add_bridge(np); #endif -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) - mpc512x_setup_diu(); -#endif + mpc512x_setup_arch(); } static void __init mpc5121_ads_init_IRQ(void) @@ -69,7 +67,7 @@ define_machine(mpc5121_ads) { .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, .init = mpc512x_init, - .init_early = mpc512x_init_diu, + .init_early = mpc512x_init_early, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index 0a8e60023944..cc97f022d028 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -12,18 +12,12 @@ #ifndef __MPC512X_H__ #define __MPC512X_H__ extern void __init mpc512x_init_IRQ(void); +extern void __init mpc512x_init_early(void); extern void __init mpc512x_init(void); +extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); -void __init mpc512x_declare_of_platform_devices(void); extern const char *mpc512x_select_psc_compat(void); +extern const char *mpc512x_select_reset_compat(void); extern void mpc512x_restart(char *cmd); -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) -void mpc512x_init_diu(void); -void mpc512x_setup_diu(void); -#else -#define mpc512x_init_diu NULL -#define mpc512x_setup_diu NULL -#endif - #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index 5fb919b30924..ce71408781a0 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -45,8 +45,8 @@ define_machine(mpc512x_generic) { .name = "MPC512x generic", .probe = mpc512x_generic_probe, .init = mpc512x_init, - .init_early = mpc512x_init_diu, - .setup_arch = mpc512x_setup_diu, + .init_early = mpc512x_init_early, + .setup_arch = mpc512x_setup_arch, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 6eb94ab99d39..a82a41b4fd91 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -35,8 +35,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base; static void __init mpc512x_restart_init(void) { struct device_node *np; + const char *reset_compat; - np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset"); + reset_compat = mpc512x_select_reset_compat(); + np = of_find_compatible_node(NULL, NULL, reset_compat); if (!np) return; @@ -58,7 +60,7 @@ void mpc512x_restart(char *cmd) ; } -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) +#if IS_ENABLED(CONFIG_FB_FSL_DIU) struct fsl_diu_shared_fb { u8 gamma[0x300]; /* 32-bit aligned! */ @@ -355,6 +357,17 @@ const char *mpc512x_select_psc_compat(void) return NULL; } +const char *mpc512x_select_reset_compat(void) +{ + if (of_machine_is_compatible("fsl,mpc5121")) + return "fsl,mpc5121-reset"; + + if (of_machine_is_compatible("fsl,mpc5125")) + return "fsl,mpc5125-reset"; + + return NULL; +} + static unsigned int __init get_fifo_size(struct device_node *np, char *prop_name) { @@ -436,14 +449,26 @@ void __init mpc512x_psc_fifo_init(void) } } +void __init mpc512x_init_early(void) +{ + mpc512x_restart_init(); + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_init_diu(); +} + void __init mpc512x_init(void) { mpc5121_clk_init(); mpc512x_declare_of_platform_devices(); - mpc512x_restart_init(); mpc512x_psc_fifo_init(); } +void __init mpc512x_setup_arch(void) +{ + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_setup_diu(); +} + /** * mpc512x_cs_config - Setup chip select configuration * @cs: chip select number diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index 0575e858291c..24b314d7bd5f 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -119,9 +119,9 @@ static int __init pdm360ng_probe(void) define_machine(pdm360ng) { .name = "PDM360NG", .probe = pdm360ng_probe, - .setup_arch = mpc512x_setup_diu, + .setup_arch = mpc512x_setup_arch, .init = pdm360ng_init, - .init_early = mpc512x_init_diu, + .init_early = mpc512x_init_early, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 624cb51d19c9..7bc315822935 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -231,17 +231,7 @@ static struct i2c_driver mcu_driver = { .id_table = mcu_ids, }; -static int __init mcu_init(void) -{ - return i2c_add_driver(&mcu_driver); -} -module_init(mcu_init); - -static void __exit mcu_exit(void) -{ - i2c_del_driver(&mcu_driver); -} -module_exit(mcu_exit); +module_i2c_driver(mcu_driver); MODULE_DESCRIPTION("Power Management and GPIO expander driver for " "MPC8349E-mITX-compatible MCU"); diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c index 753a42c29d4d..39cfa4044e6c 100644 --- a/arch/powerpc/platforms/85xx/p5020_ds.c +++ b/arch/powerpc/platforms/85xx/p5020_ds.c @@ -75,12 +75,7 @@ define_machine(p5020_ds) { #ifdef CONFIG_PCI .pcibios_fixup_bus = fsl_pcibios_fixup_bus, #endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else .get_irq = mpic_get_coreint_irq, -#endif .restart = fsl_rstcr_restart, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c index 11381851828e..f70e74cddf97 100644 --- a/arch/powerpc/platforms/85xx/p5040_ds.c +++ b/arch/powerpc/platforms/85xx/p5040_ds.c @@ -66,12 +66,7 @@ define_machine(p5040_ds) { #ifdef CONFIG_PCI .pcibios_fixup_bus = fsl_pcibios_fixup_bus, #endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else .get_irq = mpic_get_coreint_irq, -#endif .restart = fsl_rstcr_restart, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 6a1759939c6b..5ced4f5bb2b2 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -99,7 +99,7 @@ static void mpc85xx_take_timebase(void) } #ifdef CONFIG_HOTPLUG_CPU -static void __cpuinit smp_85xx_mach_cpu_die(void) +static void smp_85xx_mach_cpu_die(void) { unsigned int cpu = smp_processor_id(); u32 tmp; @@ -141,7 +141,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table) return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); } -static int __cpuinit smp_85xx_kick_cpu(int nr) +static int smp_85xx_kick_cpu(int nr) { unsigned long flags; const u64 *cpu_rel_addr; @@ -362,7 +362,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) } #endif /* CONFIG_KEXEC */ -static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) +static void smp_85xx_setup_cpu(int cpu_nr) { if (smp_85xx_ops.probe == smp_mpic_probe) mpic_setup_this_cpu(); diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c index 5998e9f33304..91ead6b1b8af 100644 --- a/arch/powerpc/platforms/85xx/t4240_qds.c +++ b/arch/powerpc/platforms/85xx/t4240_qds.c @@ -75,12 +75,7 @@ define_machine(t4240_qds) { #ifdef CONFIG_PCI .pcibios_fixup_bus = fsl_pcibios_fixup_bus, #endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else .get_irq = mpic_get_coreint_irq, -#endif .restart = fsl_rstcr_restart, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 1e121088826f..587a2828b06c 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) static struct irqaction tbint_irqaction = { .handler = timebase_interrupt, + .flags = IRQF_NO_THREAD, .name = "tbint", }; @@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd) static void cpm_cascade(unsigned int irq, struct irq_desc *desc) { - struct irq_chip *chip; - int cascade_irq; - - if ((cascade_irq = cpm_get_irq()) >= 0) { - struct irq_desc *cdesc = irq_to_desc(cascade_irq); + struct irq_chip *chip = irq_desc_get_chip(desc); + int cascade_irq = cpm_get_irq(); + if (cascade_irq >= 0) generic_handle_irq(cascade_irq); - chip = irq_desc_get_chip(cdesc); - chip->irq_eoi(&cdesc->irq_data); - } - - chip = irq_desc_get_chip(desc); chip->irq_eoi(&desc->irq_data); } diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index b62aab3e22ec..d703775bda30 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -86,6 +86,27 @@ config MPIC bool default n +config MPIC_TIMER + bool "MPIC Global Timer" + depends on MPIC && FSL_SOC + default n + help + The MPIC global timer is a hardware timer inside the + Freescale PIC complying with OpenPIC standard. When the + specified interval times out, the hardware timer generates + an interrupt. The driver currently is only tested on fsl + chip, but it can potentially support other global timers + complying with the OpenPIC standard. + +config FSL_MPIC_TIMER_WAKEUP + tristate "Freescale MPIC global timer wakeup driver" + depends on FSL_SOC && MPIC_TIMER && PM + default n + help + The driver provides a way to wake up the system by MPIC + timer. + e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup" + config PPC_EPAPR_HV_PIC bool default n @@ -164,6 +185,11 @@ config IBMEBUS help Bus device driver for GX bus based adapters. +config EEH + bool + depends on (PPC_POWERNV || PPC_PSERIES) && PCI + default y + config PPC_MPC106 bool default n @@ -193,37 +219,6 @@ config PPC_IO_WORKAROUNDS source "drivers/cpufreq/Kconfig" -menu "CPU Frequency drivers" - depends on CPU_FREQ - -config CPU_FREQ_PMAC - bool "Support for Apple PowerBooks" - depends on ADB_PMU && PPC32 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple PowerBooks, - this currently includes some models of iBook & Titanium - PowerBook. - -config CPU_FREQ_PMAC64 - bool "Support for some Apple G5s" - depends on PPC_PMAC && PPC64 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple iMac G5, - and some of the more recent desktop G5 machines as well. - -config PPC_PASEMI_CPUFREQ - bool "Support for PA Semi PWRficient" - depends on PPC_PASEMI - default y - select CPU_FREQ_TABLE - help - This adds the support for frequency switching on PA Semi - PWRficient processors. - -endmenu - menu "CPUIdle driver" source "drivers/cpuidle/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 54f3936001aa..47d9a03dd415 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -71,6 +71,7 @@ config PPC_BOOK3S_64 select PPC_FPU select PPC_HAVE_PMU_SUPPORT select SYS_SUPPORTS_HUGETLBFS + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES config PPC_BOOK3E_64 bool "Embedded processors" @@ -158,6 +159,7 @@ config E500 config PPC_E500MC bool "e500mc Support" select PPC_FPU + select COMMON_CLK depends on E500 help This must be enabled for running on e500mc (and derivatives diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 246e1d8b3af3..c34ee4e60873 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void) static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, } static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; @@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group, * already zero. For now I am paranoid. */ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, - unsigned long newpp, - unsigned long vpn, - int psize, int ssize, int local) + unsigned long newpp, + unsigned long vpn, + int psize, int apsize, + int ssize, int local) { unsigned long lpar_rc; unsigned long want_v; @@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, } static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 8c6dc42ecf65..9e5dfbcc00af 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void) ppc_md.get_irq = beatic_get_irq; /* Allocate an irq host */ - beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); + beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL); BUG_ON(beatic_host == NULL); irq_set_default_host(beatic_host); } diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index d35dbbc8ec79..f75f6fcac729 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -142,7 +142,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) * during boot if the user requests it. Odd-numbered * cpus are assumed to be secondary threads. */ - if (system_state < SYSTEM_RUNNING && + if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT) && !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile index ce6d789e0741..8e8d4cae5ebe 100644 --- a/arch/powerpc/platforms/pasemi/Makefile +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -1,3 +1,2 @@ obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o -obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c deleted file mode 100644 index be1e7958909e..000000000000 --- a/arch/powerpc/platforms/pasemi/cpufreq.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (C) 2007 PA Semi, Inc - * - * Authors: Egor Martovetsky <egor@pasemi.com> - * Olof Johansson <olof@lixom.net> - * - * Maintained by: Olof Johansson <olof@lixom.net> - * - * Based on arch/powerpc/platforms/cell/cbe_cpufreq.c: - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/cpufreq.h> -#include <linux/timer.h> -#include <linux/module.h> - -#include <asm/hw_irq.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/smp.h> - -#define SDCASR_REG 0x0100 -#define SDCASR_REG_STRIDE 0x1000 -#define SDCPWR_CFGA0_REG 0x0100 -#define SDCPWR_PWST0_REG 0x0000 -#define SDCPWR_GIZTIME_REG 0x0440 - -/* SDCPWR_GIZTIME_REG fields */ -#define SDCPWR_GIZTIME_GR 0x80000000 -#define SDCPWR_GIZTIME_LONGLOCK 0x000000ff - -/* Offset of ASR registers from SDC base */ -#define SDCASR_OFFSET 0x120000 - -static void __iomem *sdcpwr_mapbase; -static void __iomem *sdcasr_mapbase; - -static DEFINE_MUTEX(pas_switch_mutex); - -/* Current astate, is used when waking up from power savings on - * one core, in case the other core has switched states during - * the idle time. - */ -static int current_astate; - -/* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */ -static struct cpufreq_frequency_table pas_freqs[] = { - {0, 0}, - {1, 0}, - {2, 0}, - {3, 0}, - {4, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr *pas_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -/* - * hardware specific functions - */ - -static int get_astate_freq(int astate) -{ - u32 ret; - ret = in_le32(sdcpwr_mapbase + SDCPWR_CFGA0_REG + (astate * 0x10)); - - return ret & 0x3f; -} - -static int get_cur_astate(int cpu) -{ - u32 ret; - - ret = in_le32(sdcpwr_mapbase + SDCPWR_PWST0_REG); - ret = (ret >> (cpu * 4)) & 0x7; - - return ret; -} - -static int get_gizmo_latency(void) -{ - u32 giztime, ret; - - giztime = in_le32(sdcpwr_mapbase + SDCPWR_GIZTIME_REG); - - /* just provide the upper bound */ - if (giztime & SDCPWR_GIZTIME_GR) - ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 128000; - else - ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 1000; - - return ret; -} - -static void set_astate(int cpu, unsigned int astate) -{ - unsigned long flags; - - /* Return if called before init has run */ - if (unlikely(!sdcasr_mapbase)) - return; - - local_irq_save(flags); - - out_le32(sdcasr_mapbase + SDCASR_REG + SDCASR_REG_STRIDE*cpu, astate); - - local_irq_restore(flags); -} - -int check_astate(void) -{ - return get_cur_astate(hard_smp_processor_id()); -} - -void restore_astate(int cpu) -{ - set_astate(cpu, current_astate); -} - -/* - * cpufreq functions - */ - -static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - const u32 *max_freqp; - u32 max_freq; - int i, cur_astate; - struct resource res; - struct device_node *cpu, *dn; - int err = -ENODEV; - - cpu = of_get_cpu_node(policy->cpu, NULL); - - if (!cpu) - goto out; - - dn = of_find_compatible_node(NULL, NULL, "1682m-sdc"); - if (!dn) - dn = of_find_compatible_node(NULL, NULL, - "pasemi,pwrficient-sdc"); - if (!dn) - goto out; - err = of_address_to_resource(dn, 0, &res); - of_node_put(dn); - if (err) - goto out; - sdcasr_mapbase = ioremap(res.start + SDCASR_OFFSET, 0x2000); - if (!sdcasr_mapbase) { - err = -EINVAL; - goto out; - } - - dn = of_find_compatible_node(NULL, NULL, "1682m-gizmo"); - if (!dn) - dn = of_find_compatible_node(NULL, NULL, - "pasemi,pwrficient-gizmo"); - if (!dn) { - err = -ENODEV; - goto out_unmap_sdcasr; - } - err = of_address_to_resource(dn, 0, &res); - of_node_put(dn); - if (err) - goto out_unmap_sdcasr; - sdcpwr_mapbase = ioremap(res.start, 0x1000); - if (!sdcpwr_mapbase) { - err = -EINVAL; - goto out_unmap_sdcasr; - } - - pr_debug("init cpufreq on CPU %d\n", policy->cpu); - - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - if (!max_freqp) { - err = -EINVAL; - goto out_unmap_sdcpwr; - } - - /* we need the freq in kHz */ - max_freq = *max_freqp / 1000; - - pr_debug("max clock-frequency is at %u kHz\n", max_freq); - pr_debug("initializing frequency table\n"); - - /* initialize frequency table */ - for (i=0; pas_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) { - pas_freqs[i].frequency = get_astate_freq(pas_freqs[i].index) * 100000; - pr_debug("%d: %d\n", i, pas_freqs[i].frequency); - } - - policy->cpuinfo.transition_latency = get_gizmo_latency(); - - cur_astate = get_cur_astate(policy->cpu); - pr_debug("current astate is at %d\n",cur_astate); - - policy->cur = pas_freqs[cur_astate].frequency; - cpumask_copy(policy->cpus, cpu_online_mask); - - ppc_proc_freq = policy->cur * 1000ul; - - cpufreq_frequency_table_get_attr(pas_freqs, policy->cpu); - - /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max - * are set correctly - */ - return cpufreq_frequency_table_cpuinfo(policy, pas_freqs); - -out_unmap_sdcpwr: - iounmap(sdcpwr_mapbase); - -out_unmap_sdcasr: - iounmap(sdcasr_mapbase); -out: - return err; -} - -static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - /* - * We don't support CPU hotplug. Don't unmap after the system - * has already made it to a running state. - */ - if (system_state != SYSTEM_BOOTING) - return 0; - - if (sdcasr_mapbase) - iounmap(sdcasr_mapbase); - if (sdcpwr_mapbase) - iounmap(sdcpwr_mapbase); - - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static int pas_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, pas_freqs); -} - -static int pas_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct cpufreq_freqs freqs; - int pas_astate_new; - int i; - - cpufreq_frequency_table_target(policy, - pas_freqs, - target_freq, - relation, - &pas_astate_new); - - freqs.old = policy->cur; - freqs.new = pas_freqs[pas_astate_new].frequency; - - mutex_lock(&pas_switch_mutex); - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - - pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", - policy->cpu, - pas_freqs[pas_astate_new].frequency, - pas_freqs[pas_astate_new].index); - - current_astate = pas_astate_new; - - for_each_online_cpu(i) - set_astate(i, pas_astate_new); - - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - mutex_unlock(&pas_switch_mutex); - - ppc_proc_freq = freqs.new * 1000ul; - return 0; -} - -static struct cpufreq_driver pas_cpufreq_driver = { - .name = "pas-cpufreq", - .owner = THIS_MODULE, - .flags = CPUFREQ_CONST_LOOPS, - .init = pas_cpufreq_cpu_init, - .exit = pas_cpufreq_cpu_exit, - .verify = pas_cpufreq_verify, - .target = pas_cpufreq_target, - .attr = pas_cpu_freqs_attr, -}; - -/* - * module init and destoy - */ - -static int __init pas_cpufreq_init(void) -{ - if (!of_machine_is_compatible("PA6T-1682M") && - !of_machine_is_compatible("pasemi,pwrficient")) - return -ENODEV; - - return cpufreq_register_driver(&pas_cpufreq_driver); -} - -static void __exit pas_cpufreq_exit(void) -{ - cpufreq_unregister_driver(&pas_cpufreq_driver); -} - -module_init(pas_cpufreq_init); -module_exit(pas_cpufreq_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>"); diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index ea47df66fee5..52c6ce1cc985 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -9,8 +9,6 @@ obj-y += pic.o setup.o time.o feature.o pci.o \ sleep.o low_i2c.o cache.o pfunc_core.o \ pfunc_base.o udbg_scc.o udbg_adb.o obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o -obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o -obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really # need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really # CONFIG_NVRAM=y diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c deleted file mode 100644 index 3104fad82480..000000000000 --- a/arch/powerpc/platforms/powermac/cpufreq_32.c +++ /dev/null @@ -1,721 +0,0 @@ -/* - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * Copyright (C) 2004 John Steele Scott <toojays@toojays.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * TODO: Need a big cleanup here. Basically, we need to have different - * cpufreq_driver structures for the different type of HW instead of the - * current mess. We also need to better deal with the detection of the - * type of machine. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/adb.h> -#include <linux/pmu.h> -#include <linux/cpufreq.h> -#include <linux/init.h> -#include <linux/device.h> -#include <linux/hardirq.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/irq.h> -#include <asm/pmac_feature.h> -#include <asm/mmu_context.h> -#include <asm/sections.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/mpic.h> -#include <asm/keylargo.h> -#include <asm/switch_to.h> - -/* WARNING !!! This will cause calibrate_delay() to be called, - * but this is an __init function ! So you MUST go edit - * init/main.c to make it non-init before enabling DEBUG_FREQ - */ -#undef DEBUG_FREQ - -extern void low_choose_7447a_dfs(int dfs); -extern void low_choose_750fx_pll(int pll); -extern void low_sleep_handler(void); - -/* - * Currently, PowerMac cpufreq supports only high & low frequencies - * that are set by the firmware - */ -static unsigned int low_freq; -static unsigned int hi_freq; -static unsigned int cur_freq; -static unsigned int sleep_freq; -static unsigned long transition_latency; - -/* - * Different models uses different mechanisms to switch the frequency - */ -static int (*set_speed_proc)(int low_speed); -static unsigned int (*get_speed_proc)(void); - -/* - * Some definitions used by the various speedprocs - */ -static u32 voltage_gpio; -static u32 frequency_gpio; -static u32 slew_done_gpio; -static int no_schedule; -static int has_cpu_l2lve; -static int is_pmu_based; - -/* There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table pmac_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr* pmac_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static inline void local_delay(unsigned long ms) -{ - if (no_schedule) - mdelay(ms); - else - msleep(ms); -} - -#ifdef DEBUG_FREQ -static inline void debug_calc_bogomips(void) -{ - /* This will cause a recalc of bogomips and display the - * result. We backup/restore the value to avoid affecting the - * core cpufreq framework's own calculation. - */ - unsigned long save_lpj = loops_per_jiffy; - calibrate_delay(); - loops_per_jiffy = save_lpj; -} -#endif /* DEBUG_FREQ */ - -/* Switch CPU speed under 750FX CPU control - */ -static int cpu_750fx_cpu_speed(int low_speed) -{ - u32 hid2; - - if (low_speed == 0) { - /* ramping up, set voltage first */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Make sure we sleep for at least 1ms */ - local_delay(10); - - /* tweak L2 for high voltage */ - if (has_cpu_l2lve) { - hid2 = mfspr(SPRN_HID2); - hid2 &= ~0x2000; - mtspr(SPRN_HID2, hid2); - } - } -#ifdef CONFIG_6xx - low_choose_750fx_pll(low_speed); -#endif - if (low_speed == 1) { - /* tweak L2 for low voltage */ - if (has_cpu_l2lve) { - hid2 = mfspr(SPRN_HID2); - hid2 |= 0x2000; - mtspr(SPRN_HID2, hid2); - } - - /* ramping down, set voltage last */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - local_delay(10); - } - - return 0; -} - -static unsigned int cpu_750fx_get_cpu_speed(void) -{ - if (mfspr(SPRN_HID1) & HID1_PS) - return low_freq; - else - return hi_freq; -} - -/* Switch CPU speed using DFS */ -static int dfs_set_cpu_speed(int low_speed) -{ - if (low_speed == 0) { - /* ramping up, set voltage first */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Make sure we sleep for at least 1ms */ - local_delay(1); - } - - /* set frequency */ -#ifdef CONFIG_6xx - low_choose_7447a_dfs(low_speed); -#endif - udelay(100); - - if (low_speed == 1) { - /* ramping down, set voltage last */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - local_delay(1); - } - - return 0; -} - -static unsigned int dfs_get_cpu_speed(void) -{ - if (mfspr(SPRN_HID1) & HID1_DFS) - return low_freq; - else - return hi_freq; -} - - -/* Switch CPU speed using slewing GPIOs - */ -static int gpios_set_cpu_speed(int low_speed) -{ - int gpio, timeout = 0; - - /* If ramping up, set voltage first */ - if (low_speed == 0) { - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Delay is way too big but it's ok, we schedule */ - local_delay(10); - } - - /* Set frequency */ - gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); - if (low_speed == ((gpio & 0x01) == 0)) - goto skip; - - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio, - low_speed ? 0x04 : 0x05); - udelay(200); - do { - if (++timeout > 100) - break; - local_delay(1); - gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0); - } while((gpio & 0x02) == 0); - skip: - /* If ramping down, set voltage last */ - if (low_speed == 1) { - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - /* Delay is way too big but it's ok, we schedule */ - local_delay(10); - } - -#ifdef DEBUG_FREQ - debug_calc_bogomips(); -#endif - - return 0; -} - -/* Switch CPU speed under PMU control - */ -static int pmu_set_cpu_speed(int low_speed) -{ - struct adb_request req; - unsigned long save_l2cr; - unsigned long save_l3cr; - unsigned int pic_prio; - unsigned long flags; - - preempt_disable(); - -#ifdef DEBUG_FREQ - printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1)); -#endif - pmu_suspend(); - - /* Disable all interrupt sources on openpic */ - pic_prio = mpic_cpu_get_priority(); - mpic_cpu_set_priority(0xf); - - /* Make sure the decrementer won't interrupt us */ - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - /* Make sure any pending DEC interrupt occurring while we did - * the above didn't re-enable the DEC */ - mb(); - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - - /* We can now disable MSR_EE */ - local_irq_save(flags); - - /* Giveup the FPU & vec */ - enable_kernel_fp(); - -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - enable_kernel_altivec(); -#endif /* CONFIG_ALTIVEC */ - - /* Save & disable L2 and L3 caches */ - save_l3cr = _get_L3CR(); /* (returns -1 if not available) */ - save_l2cr = _get_L2CR(); /* (returns -1 if not available) */ - - /* Send the new speed command. My assumption is that this command - * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep - */ - pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed); - while (!req.complete) - pmu_poll(); - - /* Prepare the northbridge for the speed transition */ - pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1); - - /* Call low level code to backup CPU state and recover from - * hardware reset - */ - low_sleep_handler(); - - /* Restore the northbridge */ - pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0); - - /* Restore L2 cache */ - if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0) - _set_L2CR(save_l2cr); - /* Restore L3 cache */ - if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0) - _set_L3CR(save_l3cr); - - /* Restore userland MMU context */ - switch_mmu_context(NULL, current->active_mm); - -#ifdef DEBUG_FREQ - printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1)); -#endif - - /* Restore low level PMU operations */ - pmu_unlock(); - - /* - * Restore decrementer; we'll take a decrementer interrupt - * as soon as interrupts are re-enabled and the generic - * clockevents code will reprogram it with the right value. - */ - set_dec(1); - - /* Restore interrupts */ - mpic_cpu_set_priority(pic_prio); - - /* Let interrupts flow again ... */ - local_irq_restore(flags); - -#ifdef DEBUG_FREQ - debug_calc_bogomips(); -#endif - - pmu_resume(); - - preempt_enable(); - - return 0; -} - -static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode, - int notify) -{ - struct cpufreq_freqs freqs; - unsigned long l3cr; - static unsigned long prev_l3cr; - - freqs.old = cur_freq; - freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - - if (freqs.old == freqs.new) - return 0; - - if (notify) - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - if (speed_mode == CPUFREQ_LOW && - cpu_has_feature(CPU_FTR_L3CR)) { - l3cr = _get_L3CR(); - if (l3cr & L3CR_L3E) { - prev_l3cr = l3cr; - _set_L3CR(0); - } - } - set_speed_proc(speed_mode == CPUFREQ_LOW); - if (speed_mode == CPUFREQ_HIGH && - cpu_has_feature(CPU_FTR_L3CR)) { - l3cr = _get_L3CR(); - if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr) - _set_L3CR(prev_l3cr); - } - if (notify) - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - - return 0; -} - -static unsigned int pmac_cpufreq_get_speed(unsigned int cpu) -{ - return cur_freq; -} - -static int pmac_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs); -} - -static int pmac_cpufreq_target( struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - int rc; - - if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs, - target_freq, relation, &newstate)) - return -EINVAL; - - rc = do_set_cpu_speed(policy, newstate, 1); - - ppc_proc_freq = cur_freq * 1000ul; - return rc; -} - -static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - if (policy->cpu != 0) - return -ENODEV; - - policy->cpuinfo.transition_latency = transition_latency; - policy->cur = cur_freq; - - cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu); - return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs); -} - -static u32 read_gpio(struct device_node *np) -{ - const u32 *reg = of_get_property(np, "reg", NULL); - u32 offset; - - if (reg == NULL) - return 0; - /* That works for all keylargos but shall be fixed properly - * some day... The problem is that it seems we can't rely - * on the "reg" property of the GPIO nodes, they are either - * relative to the base of KeyLargo or to the base of the - * GPIO space, and the device-tree doesn't help. - */ - offset = *reg; - if (offset < KEYLARGO_GPIO_LEVELS0) - offset += KEYLARGO_GPIO_LEVELS0; - return offset; -} - -static int pmac_cpufreq_suspend(struct cpufreq_policy *policy) -{ - /* Ok, this could be made a bit smarter, but let's be robust for now. We - * always force a speed change to high speed before sleep, to make sure - * we have appropriate voltage and/or bus speed for the wakeup process, - * and to make sure our loops_per_jiffies are "good enough", that is will - * not cause too short delays if we sleep in low speed and wake in high - * speed.. - */ - no_schedule = 1; - sleep_freq = cur_freq; - if (cur_freq == low_freq && !is_pmu_based) - do_set_cpu_speed(policy, CPUFREQ_HIGH, 0); - return 0; -} - -static int pmac_cpufreq_resume(struct cpufreq_policy *policy) -{ - /* If we resume, first check if we have a get() function */ - if (get_speed_proc) - cur_freq = get_speed_proc(); - else - cur_freq = 0; - - /* We don't, hrm... we don't really know our speed here, best - * is that we force a switch to whatever it was, which is - * probably high speed due to our suspend() routine - */ - do_set_cpu_speed(policy, sleep_freq == low_freq ? - CPUFREQ_LOW : CPUFREQ_HIGH, 0); - - ppc_proc_freq = cur_freq * 1000ul; - - no_schedule = 0; - return 0; -} - -static struct cpufreq_driver pmac_cpufreq_driver = { - .verify = pmac_cpufreq_verify, - .target = pmac_cpufreq_target, - .get = pmac_cpufreq_get_speed, - .init = pmac_cpufreq_cpu_init, - .suspend = pmac_cpufreq_suspend, - .resume = pmac_cpufreq_resume, - .flags = CPUFREQ_PM_NO_WARN, - .attr = pmac_cpu_freqs_attr, - .name = "powermac", - .owner = THIS_MODULE, -}; - - -static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np = of_find_node_by_name(NULL, - "voltage-gpio"); - struct device_node *freq_gpio_np = of_find_node_by_name(NULL, - "frequency-gpio"); - struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL, - "slewing-done"); - const u32 *value; - - /* - * Check to see if it's GPIO driven or PMU only - * - * The way we extract the GPIO address is slightly hackish, but it - * works well enough for now. We need to abstract the whole GPIO - * stuff sooner or later anyway - */ - - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - if (freq_gpio_np) - frequency_gpio = read_gpio(freq_gpio_np); - if (slew_done_gpio_np) - slew_done_gpio = read_gpio(slew_done_gpio_np); - - /* If we use the frequency GPIOs, calculate the min/max speeds based - * on the bus frequencies - */ - if (frequency_gpio && slew_done_gpio) { - int lenp, rc; - const u32 *freqs, *ratio; - - freqs = of_get_property(cpunode, "bus-frequencies", &lenp); - lenp /= sizeof(u32); - if (freqs == NULL || lenp != 2) { - printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); - return 1; - } - ratio = of_get_property(cpunode, "processor-to-bus-ratio*2", - NULL); - if (ratio == NULL) { - printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); - return 1; - } - - /* Get the min/max bus frequencies */ - low_freq = min(freqs[0], freqs[1]); - hi_freq = max(freqs[0], freqs[1]); - - /* Grrrr.. It _seems_ that the device-tree is lying on the low bus - * frequency, it claims it to be around 84Mhz on some models while - * it appears to be approx. 101Mhz on all. Let's hack around here... - * fortunately, we don't need to be too precise - */ - if (low_freq < 98000000) - low_freq = 101000000; - - /* Convert those to CPU core clocks */ - low_freq = (low_freq * (*ratio)) / 2000; - hi_freq = (hi_freq * (*ratio)) / 2000; - - /* Now we get the frequencies, we read the GPIO to see what is out current - * speed - */ - rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); - cur_freq = (rc & 0x01) ? hi_freq : low_freq; - - set_speed_proc = gpios_set_cpu_speed; - return 1; - } - - /* If we use the PMU, look for the min & max frequencies in the - * device-tree - */ - value = of_get_property(cpunode, "min-clock-frequency", NULL); - if (!value) - return 1; - low_freq = (*value) / 1000; - /* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree - * here */ - if (low_freq < 100000) - low_freq *= 10; - - value = of_get_property(cpunode, "max-clock-frequency", NULL); - if (!value) - return 1; - hi_freq = (*value) / 1000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - - return 0; -} - -static int pmac_cpufreq_init_7447A(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np; - - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) - return 1; - - volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - if (!voltage_gpio){ - printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n"); - return 1; - } - - /* OF only reports the high frequency */ - hi_freq = cur_freq; - low_freq = cur_freq/2; - - /* Read actual frequency from CPU */ - cur_freq = dfs_get_cpu_speed(); - set_speed_proc = dfs_set_cpu_speed; - get_speed_proc = dfs_get_cpu_speed; - - return 0; -} - -static int pmac_cpufreq_init_750FX(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np; - u32 pvr; - const u32 *value; - - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) - return 1; - - hi_freq = cur_freq; - value = of_get_property(cpunode, "reduced-clock-frequency", NULL); - if (!value) - return 1; - low_freq = (*value) / 1000; - - volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - - pvr = mfspr(SPRN_PVR); - has_cpu_l2lve = !((pvr & 0xf00) == 0x100); - - set_speed_proc = cpu_750fx_cpu_speed; - get_speed_proc = cpu_750fx_get_cpu_speed; - cur_freq = cpu_750fx_get_cpu_speed(); - - return 0; -} - -/* Currently, we support the following machines: - * - * - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz) - * - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz) - * - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz) - * - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz) - * - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz) - * - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage) - * - Recent MacRISC3 laptops - * - All new machines with 7447A CPUs - */ -static int __init pmac_cpufreq_setup(void) -{ - struct device_node *cpunode; - const u32 *value; - - if (strstr(cmd_line, "nocpufreq")) - return 0; - - /* Assume only one CPU */ - cpunode = of_find_node_by_type(NULL, "cpu"); - if (!cpunode) - goto out; - - /* Get current cpu clock freq */ - value = of_get_property(cpunode, "clock-frequency", NULL); - if (!value) - goto out; - cur_freq = (*value) / 1000; - transition_latency = CPUFREQ_ETERNAL; - - /* Check for 7447A based MacRISC3 */ - if (of_machine_is_compatible("MacRISC3") && - of_get_property(cpunode, "dynamic-power-step", NULL) && - PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { - pmac_cpufreq_init_7447A(cpunode); - transition_latency = 8000000; - /* Check for other MacRISC3 machines */ - } else if (of_machine_is_compatible("PowerBook3,4") || - of_machine_is_compatible("PowerBook3,5") || - of_machine_is_compatible("MacRISC3")) { - pmac_cpufreq_init_MacRISC3(cpunode); - /* Else check for iBook2 500/600 */ - } else if (of_machine_is_compatible("PowerBook4,1")) { - hi_freq = cur_freq; - low_freq = 400000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for TiPb 550 */ - else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) { - hi_freq = cur_freq; - low_freq = 500000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for TiPb 400 & 500 */ - else if (of_machine_is_compatible("PowerBook3,2")) { - /* We only know about the 400 MHz and the 500Mhz model - * they both have 300 MHz as low frequency - */ - if (cur_freq < 350000 || cur_freq > 550000) - goto out; - hi_freq = cur_freq; - low_freq = 300000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for 750FX */ - else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) - pmac_cpufreq_init_750FX(cpunode); -out: - of_node_put(cpunode); - if (set_speed_proc == NULL) - return -ENODEV; - - pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq; - pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq; - ppc_proc_freq = cur_freq * 1000ul; - - printk(KERN_INFO "Registering PowerMac CPU frequency driver\n"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", - low_freq/1000, hi_freq/1000, cur_freq/1000); - - return cpufreq_register_driver(&pmac_cpufreq_driver); -} - -module_init(pmac_cpufreq_setup); - diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c deleted file mode 100644 index 7ba423431cfe..000000000000 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ /dev/null @@ -1,746 +0,0 @@ -/* - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * and Markus Demleitner <msdemlei@cl.uni-heidelberg.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, - * that is iMac G5 and latest single CPU desktop. - */ - -#undef DEBUG - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/cpufreq.h> -#include <linux/init.h> -#include <linux/completion.h> -#include <linux/mutex.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/irq.h> -#include <asm/sections.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/smu.h> -#include <asm/pmac_pfunc.h> - -#define DBG(fmt...) pr_debug(fmt) - -/* see 970FX user manual */ - -#define SCOM_PCR 0x0aa001 /* PCR scom addr */ - -#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ -#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ -#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ -#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ -#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ -#define PCR_SPEED_SHIFT 17 -#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ -#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ -#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ -#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ -#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ -#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ - -#define SCOM_PSR 0x408001 /* PSR scom addr */ -/* warning: PSR is a 64 bits register */ -#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ -#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ -#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ -#define PSR_CUR_SPEED_SHIFT (56) - -/* - * The G5 only supports two frequencies (Quarter speed is not supported) - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table g5_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr* g5_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -/* Power mode data is an array of the 32 bits PCR values to use for - * the various frequencies, retrieved from the device-tree - */ -static int g5_pmode_cur; - -static void (*g5_switch_volt)(int speed_mode); -static int (*g5_switch_freq)(int speed_mode); -static int (*g5_query_freq)(void); - -static DEFINE_MUTEX(g5_switch_mutex); - -static unsigned long transition_latency; - -#ifdef CONFIG_PMAC_SMU - -static const u32 *g5_pmode_data; -static int g5_pmode_max; - -static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ -static int g5_fvt_count; /* number of op. points */ -static int g5_fvt_cur; /* current op. point */ - -/* - * SMU based voltage switching for Neo2 platforms - */ - -static void g5_smu_switch_volt(int speed_mode) -{ - struct smu_simple_cmd cmd; - - DECLARE_COMPLETION_ONSTACK(comp); - smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete, - &comp, 'V', 'S', 'L', 'E', 'W', - 0xff, g5_fvt_cur+1, speed_mode); - wait_for_completion(&comp); -} - -/* - * Platform function based voltage/vdnap switching for Neo2 - */ - -static struct pmf_function *pfunc_set_vdnap0; -static struct pmf_function *pfunc_vdnap0_complete; - -static void g5_vdnap_switch_volt(int speed_mode) -{ - struct pmf_args args; - u32 slew, done = 0; - unsigned long timeout; - - slew = (speed_mode == CPUFREQ_LOW) ? 1 : 0; - args.count = 1; - args.u[0].p = &slew; - - pmf_call_one(pfunc_set_vdnap0, &args); - - /* It's an irq GPIO so we should be able to just block here, - * I'll do that later after I've properly tested the IRQ code for - * platform functions - */ - timeout = jiffies + HZ/10; - while(!time_after(jiffies, timeout)) { - args.count = 1; - args.u[0].p = &done; - pmf_call_one(pfunc_vdnap0_complete, &args); - if (done) - break; - msleep(1); - } - if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); -} - - -/* - * SCOM based frequency switching for 970FX rev3 - */ -static int g5_scom_switch_freq(int speed_mode) -{ - unsigned long flags; - int to; - - /* If frequency is going up, first ramp up the voltage */ - if (speed_mode < g5_pmode_cur) - g5_switch_volt(speed_mode); - - local_irq_save(flags); - - /* Clear PCR high */ - scom970_write(SCOM_PCR, 0); - /* Clear PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); - /* Set PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | - g5_pmode_data[speed_mode]); - - /* Wait for completion */ - for (to = 0; to < 10; to++) { - unsigned long psr = scom970_read(SCOM_PSR); - - if ((psr & PSR_CMD_RECEIVED) == 0 && - (((psr >> PSR_CUR_SPEED_SHIFT) ^ - (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) - == 0) - break; - if (psr & PSR_CMD_COMPLETED) - break; - udelay(100); - } - - local_irq_restore(flags); - - /* If frequency is going down, last ramp the voltage */ - if (speed_mode > g5_pmode_cur) - g5_switch_volt(speed_mode); - - g5_pmode_cur = speed_mode; - ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int g5_scom_query_freq(void) -{ - unsigned long psr = scom970_read(SCOM_PSR); - int i; - - for (i = 0; i <= g5_pmode_max; i++) - if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ - (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) - break; - return i; -} - -/* - * Fake voltage switching for platforms with missing support - */ - -static void g5_dummy_switch_volt(int speed_mode) -{ -} - -#endif /* CONFIG_PMAC_SMU */ - -/* - * Platform function based voltage switching for PowerMac7,2 & 7,3 - */ - -static struct pmf_function *pfunc_cpu0_volt_high; -static struct pmf_function *pfunc_cpu0_volt_low; -static struct pmf_function *pfunc_cpu1_volt_high; -static struct pmf_function *pfunc_cpu1_volt_low; - -static void g5_pfunc_switch_volt(int speed_mode) -{ - if (speed_mode == CPUFREQ_HIGH) { - if (pfunc_cpu0_volt_high) - pmf_call_one(pfunc_cpu0_volt_high, NULL); - if (pfunc_cpu1_volt_high) - pmf_call_one(pfunc_cpu1_volt_high, NULL); - } else { - if (pfunc_cpu0_volt_low) - pmf_call_one(pfunc_cpu0_volt_low, NULL); - if (pfunc_cpu1_volt_low) - pmf_call_one(pfunc_cpu1_volt_low, NULL); - } - msleep(10); /* should be faster , to fix */ -} - -/* - * Platform function based frequency switching for PowerMac7,2 & 7,3 - */ - -static struct pmf_function *pfunc_cpu_setfreq_high; -static struct pmf_function *pfunc_cpu_setfreq_low; -static struct pmf_function *pfunc_cpu_getfreq; -static struct pmf_function *pfunc_slewing_done; - -static int g5_pfunc_switch_freq(int speed_mode) -{ - struct pmf_args args; - u32 done = 0; - unsigned long timeout; - int rc; - - DBG("g5_pfunc_switch_freq(%d)\n", speed_mode); - - /* If frequency is going up, first ramp up the voltage */ - if (speed_mode < g5_pmode_cur) - g5_switch_volt(speed_mode); - - /* Do it */ - if (speed_mode == CPUFREQ_HIGH) - rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL); - else - rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL); - - if (rc) - printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc); - - /* It's an irq GPIO so we should be able to just block here, - * I'll do that later after I've properly tested the IRQ code for - * platform functions - */ - timeout = jiffies + HZ/10; - while(!time_after(jiffies, timeout)) { - args.count = 1; - args.u[0].p = &done; - pmf_call_one(pfunc_slewing_done, &args); - if (done) - break; - msleep(1); - } - if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); - - /* If frequency is going down, last ramp the voltage */ - if (speed_mode > g5_pmode_cur) - g5_switch_volt(speed_mode); - - g5_pmode_cur = speed_mode; - ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int g5_pfunc_query_freq(void) -{ - struct pmf_args args; - u32 val = 0; - - args.count = 1; - args.u[0].p = &val; - pmf_call_one(pfunc_cpu_getfreq, &args); - return val ? CPUFREQ_HIGH : CPUFREQ_LOW; -} - - -/* - * Common interface to the cpufreq core - */ - -static int g5_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); -} - -static int g5_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - int rc; - - if (cpufreq_frequency_table_target(policy, g5_cpu_freqs, - target_freq, relation, &newstate)) - return -EINVAL; - - if (g5_pmode_cur == newstate) - return 0; - - mutex_lock(&g5_switch_mutex); - - freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; - freqs.new = g5_cpu_freqs[newstate].frequency; - - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - rc = g5_switch_freq(newstate); - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - - mutex_unlock(&g5_switch_mutex); - - return rc; -} - -static unsigned int g5_cpufreq_get_speed(unsigned int cpu) -{ - return g5_cpu_freqs[g5_pmode_cur].frequency; -} - -static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - policy->cpuinfo.transition_latency = transition_latency; - policy->cur = g5_cpu_freqs[g5_query_freq()].frequency; - /* secondary CPUs are tied to the primary one by the - * cpufreq core if in the secondary policy we tell it that - * it actually must be one policy together with all others. */ - cpumask_copy(policy->cpus, cpu_online_mask); - cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu); - - return cpufreq_frequency_table_cpuinfo(policy, - g5_cpu_freqs); -} - - -static struct cpufreq_driver g5_cpufreq_driver = { - .name = "powermac", - .owner = THIS_MODULE, - .flags = CPUFREQ_CONST_LOOPS, - .init = g5_cpufreq_cpu_init, - .verify = g5_cpufreq_verify, - .target = g5_cpufreq_target, - .get = g5_cpufreq_get_speed, - .attr = g5_cpu_freqs_attr, -}; - - -#ifdef CONFIG_PMAC_SMU - -static int __init g5_neo2_cpufreq_init(struct device_node *cpus) -{ - struct device_node *cpunode; - unsigned int psize, ssize; - unsigned long max_freq; - char *freq_method, *volt_method; - const u32 *valp; - u32 pvr_hi; - int use_volts_vdnap = 0; - int use_volts_smu = 0; - int rc = -ENODEV; - - /* Check supported platforms */ - if (of_machine_is_compatible("PowerMac8,1") || - of_machine_is_compatible("PowerMac8,2") || - of_machine_is_compatible("PowerMac9,1")) - use_volts_smu = 1; - else if (of_machine_is_compatible("PowerMac11,2")) - use_volts_vdnap = 1; - else - return -ENODEV; - - /* Get first CPU node */ - for (cpunode = NULL; - (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - const u32 *reg = of_get_property(cpunode, "reg", NULL); - if (reg == NULL || (*reg) != 0) - continue; - if (!strcmp(cpunode->type, "cpu")) - break; - } - if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n"); - return -ENODEV; - } - - /* Check 970FX for now */ - valp = of_get_property(cpunode, "cpu-version", NULL); - if (!valp) { - DBG("No cpu-version property !\n"); - goto bail_noprops; - } - pvr_hi = (*valp) >> 16; - if (pvr_hi != 0x3c && pvr_hi != 0x44) { - printk(KERN_ERR "cpufreq: Unsupported CPU version\n"); - goto bail_noprops; - } - - /* Look for the powertune data in the device-tree */ - g5_pmode_data = of_get_property(cpunode, "power-mode-data",&psize); - if (!g5_pmode_data) { - DBG("No power-mode-data !\n"); - goto bail_noprops; - } - g5_pmode_max = psize / sizeof(u32) - 1; - - if (use_volts_smu) { - const struct smu_sdbp_header *shdr; - - /* Look for the FVT table */ - shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); - if (!shdr) - goto bail_noprops; - g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1]; - ssize = (shdr->len * sizeof(u32)) - - sizeof(struct smu_sdbp_header); - g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt); - g5_fvt_cur = 0; - - /* Sanity checking */ - if (g5_fvt_count < 1 || g5_pmode_max < 1) - goto bail_noprops; - - g5_switch_volt = g5_smu_switch_volt; - volt_method = "SMU"; - } else if (use_volts_vdnap) { - struct device_node *root; - - root = of_find_node_by_path("/"); - if (root == NULL) { - printk(KERN_ERR "cpufreq: Can't find root of " - "device tree\n"); - goto bail_noprops; - } - pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0"); - pfunc_vdnap0_complete = - pmf_find_function(root, "slewing-done"); - if (pfunc_set_vdnap0 == NULL || - pfunc_vdnap0_complete == NULL) { - printk(KERN_ERR "cpufreq: Can't find required " - "platform function\n"); - goto bail_noprops; - } - - g5_switch_volt = g5_vdnap_switch_volt; - volt_method = "GPIO"; - } else { - g5_switch_volt = g5_dummy_switch_volt; - volt_method = "none"; - } - - /* - * From what I see, clock-frequency is always the maximal frequency. - * The current driver can not slew sysclk yet, so we really only deal - * with powertune steps for now. We also only implement full freq and - * half freq in this version. So far, I haven't yet seen a machine - * supporting anything else. - */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) - return -ENODEV; - max_freq = (*valp)/1000; - g5_cpu_freqs[0].frequency = max_freq; - g5_cpu_freqs[1].frequency = max_freq/2; - - /* Set callbacks */ - transition_latency = 12000; - g5_switch_freq = g5_scom_switch_freq; - g5_query_freq = g5_scom_query_freq; - freq_method = "SCOM"; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - g5_switch_volt(CPUFREQ_HIGH); - msleep(10); - g5_pmode_cur = -1; - g5_switch_freq(g5_query_freq()); - - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n", - freq_method, volt_method); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - g5_cpu_freqs[1].frequency/1000, - g5_cpu_freqs[0].frequency/1000, - g5_cpu_freqs[g5_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&g5_cpufreq_driver); - - /* We keep the CPU node on hold... hopefully, Apple G5 don't have - * hotplug CPU with a dynamic device-tree ... - */ - return rc; - - bail_noprops: - of_node_put(cpunode); - - return rc; -} - -#endif /* CONFIG_PMAC_SMU */ - - -static int __init g5_pm72_cpufreq_init(struct device_node *cpus) -{ - struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL; - const u8 *eeprom = NULL; - const u32 *valp; - u64 max_freq, min_freq, ih, il; - int has_volt = 1, rc = 0; - - DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and" - " RackMac3,1...\n"); - - /* Get first CPU node */ - for (cpunode = NULL; - (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - if (!strcmp(cpunode->type, "cpu")) - break; - } - if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU node\n"); - return -ENODEV; - } - - /* Lookup the cpuid eeprom node */ - cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0"); - if (cpuid != NULL) - eeprom = of_get_property(cpuid, "cpuid", NULL); - if (eeprom == NULL) { - printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n"); - rc = -ENODEV; - goto bail; - } - - /* Lookup the i2c hwclock */ - for (hwclock = NULL; - (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){ - const char *loc = of_get_property(hwclock, - "hwctrl-location", NULL); - if (loc == NULL) - continue; - if (strcmp(loc, "CPU CLOCK")) - continue; - if (!of_get_property(hwclock, "platform-get-frequency", NULL)) - continue; - break; - } - if (hwclock == NULL) { - printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n"); - rc = -ENODEV; - goto bail; - } - - DBG("cpufreq: i2c clock chip found: %s\n", hwclock->full_name); - - /* Now get all the platform functions */ - pfunc_cpu_getfreq = - pmf_find_function(hwclock, "get-frequency"); - pfunc_cpu_setfreq_high = - pmf_find_function(hwclock, "set-frequency-high"); - pfunc_cpu_setfreq_low = - pmf_find_function(hwclock, "set-frequency-low"); - pfunc_slewing_done = - pmf_find_function(hwclock, "slewing-done"); - pfunc_cpu0_volt_high = - pmf_find_function(hwclock, "set-voltage-high-0"); - pfunc_cpu0_volt_low = - pmf_find_function(hwclock, "set-voltage-low-0"); - pfunc_cpu1_volt_high = - pmf_find_function(hwclock, "set-voltage-high-1"); - pfunc_cpu1_volt_low = - pmf_find_function(hwclock, "set-voltage-low-1"); - - /* Check we have minimum requirements */ - if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL || - pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) { - printk(KERN_ERR "cpufreq: Can't find platform functions !\n"); - rc = -ENODEV; - goto bail; - } - - /* Check that we have complete sets */ - if (pfunc_cpu0_volt_high == NULL || pfunc_cpu0_volt_low == NULL) { - pmf_put_function(pfunc_cpu0_volt_high); - pmf_put_function(pfunc_cpu0_volt_low); - pfunc_cpu0_volt_high = pfunc_cpu0_volt_low = NULL; - has_volt = 0; - } - if (!has_volt || - pfunc_cpu1_volt_high == NULL || pfunc_cpu1_volt_low == NULL) { - pmf_put_function(pfunc_cpu1_volt_high); - pmf_put_function(pfunc_cpu1_volt_low); - pfunc_cpu1_volt_high = pfunc_cpu1_volt_low = NULL; - } - - /* Note: The device tree also contains a "platform-set-values" - * function for which I haven't quite figured out the usage. It - * might have to be called on init and/or wakeup, I'm not too sure - * but things seem to work fine without it so far ... - */ - - /* Get max frequency from device-tree */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) { - printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n"); - rc = -ENODEV; - goto bail; - } - - max_freq = (*valp)/1000; - - /* Now calculate reduced frequency by using the cpuid input freq - * ratio. This requires 64 bits math unless we are willing to lose - * some precision - */ - ih = *((u32 *)(eeprom + 0x10)); - il = *((u32 *)(eeprom + 0x20)); - - /* Check for machines with no useful settings */ - if (il == ih) { - printk(KERN_WARNING "cpufreq: No low frequency mode available" - " on this model !\n"); - rc = -ENODEV; - goto bail; - } - - min_freq = 0; - if (ih != 0 && il != 0) - min_freq = (max_freq * il) / ih; - - /* Sanity check */ - if (min_freq >= max_freq || min_freq < 1000) { - printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n"); - rc = -ENXIO; - goto bail; - } - g5_cpu_freqs[0].frequency = max_freq; - g5_cpu_freqs[1].frequency = min_freq; - - /* Set callbacks */ - transition_latency = CPUFREQ_ETERNAL; - g5_switch_volt = g5_pfunc_switch_volt; - g5_switch_freq = g5_pfunc_switch_freq; - g5_query_freq = g5_pfunc_query_freq; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - g5_switch_volt(CPUFREQ_HIGH); - msleep(10); - g5_pmode_cur = -1; - g5_switch_freq(g5_query_freq()); - - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: i2c/pfunc, " - "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - g5_cpu_freqs[1].frequency/1000, - g5_cpu_freqs[0].frequency/1000, - g5_cpu_freqs[g5_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&g5_cpufreq_driver); - bail: - if (rc != 0) { - pmf_put_function(pfunc_cpu_getfreq); - pmf_put_function(pfunc_cpu_setfreq_high); - pmf_put_function(pfunc_cpu_setfreq_low); - pmf_put_function(pfunc_slewing_done); - pmf_put_function(pfunc_cpu0_volt_high); - pmf_put_function(pfunc_cpu0_volt_low); - pmf_put_function(pfunc_cpu1_volt_high); - pmf_put_function(pfunc_cpu1_volt_low); - } - of_node_put(hwclock); - of_node_put(cpuid); - of_node_put(cpunode); - - return rc; -} - -static int __init g5_cpufreq_init(void) -{ - struct device_node *cpus; - int rc = 0; - - cpus = of_find_node_by_path("/cpus"); - if (cpus == NULL) { - DBG("No /cpus node !\n"); - return -ENODEV; - } - - if (of_machine_is_compatible("PowerMac7,2") || - of_machine_is_compatible("PowerMac7,3") || - of_machine_is_compatible("RackMac3,1")) - rc = g5_pm72_cpufreq_init(cpus); -#ifdef CONFIG_PMAC_SMU - else - rc = g5_neo2_cpufreq_init(cpus); -#endif /* CONFIG_PMAC_SMU */ - - of_node_put(cpus); - return rc; -} - -module_init(g5_cpufreq_init); - - -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bdb738a69e41..5cbd4d67d5c4 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void) { int rc = -ENOMEM; - psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); + psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL); if (psurge_host) psurge_secondary_virq = irq_create_direct_mapping(psurge_host); @@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { +static struct notifier_block smp_core99_cpu_nb = { .notifier_call = smp_core99_cpu_notify, }; #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index bcc3cb48a44e..7fe595152478 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -3,3 +3,4 @@ obj-y += opal-rtc.o opal-nvram.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o +obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c new file mode 100644 index 000000000000..0cd1c4a71755 --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -0,0 +1,916 @@ +/* + * The file intends to implement the functions needed by EEH, which is + * built on IODA compliant chip. Actually, lots of functions related + * to EEH would be built based on the OPAL APIs. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bootmem.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/tce.h> + +#include "powernv.h" +#include "pci.h" + +/* Debugging option */ +#ifdef IODA_EEH_DBG_ON +#define IODA_EEH_DBG(args...) pr_info(args) +#else +#define IODA_EEH_DBG(args...) +#endif + +static char *hub_diag = NULL; +static int ioda_eeh_nb_init = 0; + +static int ioda_eeh_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + uint64_t changed_evts = (uint64_t)change; + + /* We simply send special EEH event */ + if ((changed_evts & OPAL_EVENT_PCI_ERROR) && + (events & OPAL_EVENT_PCI_ERROR)) + eeh_send_failure_event(NULL); + + return 0; +} + +static struct notifier_block ioda_eeh_nb = { + .notifier_call = ioda_eeh_event, + .next = NULL, + .priority = 0 +}; + +#ifdef CONFIG_DEBUG_FS +static int ioda_eeh_dbgfs_set(void *data, u64 val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + out_be64(phb->regs + 0xD10, val); + return 0; +} + +static int ioda_eeh_dbgfs_get(void *data, u64 *val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + *val = in_be64(phb->regs + 0xD10); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get, + ioda_eeh_dbgfs_set, "0x%llx\n"); +#endif /* CONFIG_DEBUG_FS */ + +/** + * ioda_eeh_post_init - Chip dependent post initialization + * @hose: PCI controller + * + * The function will be called after eeh PEs and devices + * have been built. That means the EEH is ready to supply + * service with I/O cache. + */ +static int ioda_eeh_post_init(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + int ret; + + /* Register OPAL event notifier */ + if (!ioda_eeh_nb_init) { + ret = opal_notifier_register(&ioda_eeh_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + + ioda_eeh_nb_init = 1; + } + + /* FIXME: Enable it for PHB3 later */ + if (phb->type == PNV_PHB_IODA1) { + if (!hub_diag) { + hub_diag = (char *)__get_free_page(GFP_KERNEL | + __GFP_ZERO); + if (!hub_diag) { + pr_err("%s: Out of memory !\n", + __func__); + return -ENOMEM; + } + } + +#ifdef CONFIG_DEBUG_FS + if (phb->dbgfs) + debugfs_create_file("err_injct", 0600, + phb->dbgfs, hose, + &ioda_eeh_dbgfs_ops); +#endif + + phb->eeh_state |= PNV_EEH_STATE_ENABLED; + } + + return 0; +} + +/** + * ioda_eeh_set_option - Set EEH operation or I/O setting + * @pe: EEH PE + * @option: options + * + * Enable or disable EEH option for the indicated PE. The + * function also can be used to enable I/O or DMA for the + * PE. + */ +static int ioda_eeh_set_option(struct eeh_pe *pe, int option) +{ + s64 ret; + u32 pe_no; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + /* Check on PE number */ + if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { + pr_err("%s: PE address %x out of range [0, %x] " + "on PHB#%x\n", + __func__, pe->addr, phb->ioda.total_pe, + hose->global_number); + return -EINVAL; + } + + pe_no = pe->addr; + switch (option) { + case EEH_OPT_DISABLE: + ret = -EEXIST; + break; + case EEH_OPT_ENABLE: + ret = 0; + break; + case EEH_OPT_THAW_MMIO: + ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); + if (ret) { + pr_warning("%s: Failed to enable MMIO for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + break; + case EEH_OPT_THAW_DMA: + ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); + if (ret) { + pr_warning("%s: Failed to enable DMA for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + break; + default: + pr_warning("%s: Invalid option %d\n", __func__, option); + return -EINVAL; + } + + return ret; +} + +/** + * ioda_eeh_get_state - Retrieve the state of PE + * @pe: EEH PE + * + * The PE's state should be retrieved from the PEEV, PEST + * IODA tables. Since the OPAL has exported the function + * to do it, it'd better to use that. + */ +static int ioda_eeh_get_state(struct eeh_pe *pe) +{ + s64 ret = 0; + u8 fstate; + u16 pcierr; + u32 pe_no; + int result; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + /* + * Sanity check on PE address. The PHB PE address should + * be zero. + */ + if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { + pr_err("%s: PE address %x out of range [0, %x] " + "on PHB#%x\n", + __func__, pe->addr, phb->ioda.total_pe, + hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } + + /* Retrieve PE status through OPAL */ + pe_no = pe->addr; + ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &fstate, &pcierr, NULL); + if (ret) { + pr_err("%s: Failed to get EEH status on " + "PHB#%x-PE#%x\n, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return EEH_STATE_NOT_SUPPORT; + } + + /* Check PHB status */ + if (pe->type & EEH_PE_PHB) { + result = 0; + result &= ~EEH_STATE_RESET_ACTIVE; + + if (pcierr != OPAL_EEH_PHB_ERROR) { + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + result |= EEH_STATE_DMA_ENABLED; + } + + return result; + } + + /* Parse result out */ + result = 0; + switch (fstate) { + case OPAL_EEH_STOPPED_NOT_FROZEN: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + result |= EEH_STATE_DMA_ENABLED; + break; + case OPAL_EEH_STOPPED_MMIO_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_DMA_ENABLED; + break; + case OPAL_EEH_STOPPED_DMA_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + break; + case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_RESET: + result |= EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_TEMP_UNAVAIL: + result |= EEH_STATE_UNAVAILABLE; + break; + case OPAL_EEH_STOPPED_PERM_UNAVAIL: + result |= EEH_STATE_NOT_SUPPORT; + break; + default: + pr_warning("%s: Unexpected EEH status 0x%x " + "on PHB#%x-PE#%x\n", + __func__, fstate, hose->global_number, pe_no); + } + + return result; +} + +static int ioda_eeh_pe_clear(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + u32 pe_no; + u8 fstate; + u16 pcierr; + s64 ret; + + pe_no = pe->addr; + hose = pe->phb; + phb = pe->phb->private_data; + + /* Clear the EEH error on the PE */ + ret = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (ret) { + pr_err("%s: Failed to clear EEH error for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + /* + * Read the PE state back and verify that the frozen + * state has been removed. + */ + ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &fstate, &pcierr, NULL); + if (ret) { + pr_err("%s: Failed to get EEH status on " + "PHB#%x-PE#%x\n, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) { + pr_err("%s: Frozen state not cleared on " + "PHB#%x-PE#%x, sts=%x\n", + __func__, hose->global_number, pe_no, fstate); + return -EIO; + } + + return 0; +} + +static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) +{ + s64 rc = OPAL_HARDWARE; + + while (1) { + rc = opal_pci_poll(phb->opal_id); + if (rc <= 0) + break; + + msleep(rc); + } + + return rc; +} + +static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* Issue PHB complete reset request */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_COMPLETE, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_COMPLETE, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* + * Poll state of the PHB until the request is done + * successfully. + */ + rc = ioda_eeh_phb_poll(phb); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int ioda_eeh_root_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_SUCCESS; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* + * During the reset deassert time, we needn't care + * the reset scope because the firmware does nothing + * for fundamental or hot reset during deassert phase. + */ + if (option == EEH_RESET_FUNDAMENTAL) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_HOT_RESET, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_HOT_RESET, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* Poll state of the PHB until the request is done */ + rc = ioda_eeh_phb_poll(phb); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int ioda_eeh_bridge_reset(struct pci_controller *hose, + struct pci_dev *dev, int option) +{ + u16 ctrl; + + pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n", + __func__, hose->global_number, dev->bus->number, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + case EEH_RESET_HOT: + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + break; + case EEH_RESET_DEACTIVATE: + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + break; + } + + return 0; +} + +/** + * ioda_eeh_reset - Reset the indicated PE + * @pe: EEH PE + * @option: reset option + * + * Do reset on the indicated PE. For PCI bus sensitive PE, + * we need to reset the parent p2p bridge. The PHB has to + * be reinitialized if the p2p bridge is root bridge. For + * PCI device sensitive PE, we will try to reset the device + * through FLR. For now, we don't have OPAL APIs to do HARD + * reset yet, so all reset would be SOFT (HOT) reset. + */ +static int ioda_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct eeh_dev *edev; + struct pci_dev *dev; + int ret; + + /* + * Anyway, we have to clear the problematic state for the + * corresponding PE. However, we needn't do it if the PE + * is PHB associated. That means the PHB is having fatal + * errors and it needs reset. Further more, the AIB interface + * isn't reliable any more. + */ + if (!(pe->type & EEH_PE_PHB) && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + ret = ioda_eeh_pe_clear(pe); + if (ret) + return -EIO; + } + + /* + * The rules applied to reset, either fundamental or hot reset: + * + * We always reset the direct upstream bridge of the PE. If the + * direct upstream bridge isn't root bridge, we always take hot + * reset no matter what option (fundamental or hot) is. Otherwise, + * we should do the reset according to the required option. + */ + if (pe->type & EEH_PE_PHB) { + ret = ioda_eeh_phb_reset(hose, option); + } else { + if (pe->type & EEH_PE_DEVICE) { + /* + * If it's device PE, we didn't refer to the parent + * PCI bus yet. So we have to figure it out indirectly. + */ + edev = list_first_entry(&pe->edevs, + struct eeh_dev, list); + dev = eeh_dev_to_pci_dev(edev); + dev = dev->bus->self; + } else { + /* + * If it's bus PE, the parent PCI bus is already there + * and just pick it up. + */ + dev = pe->bus->self; + } + + /* + * Do reset based on the fact that the direct upstream bridge + * is root bridge (port) or not. + */ + if (dev->bus->number == 0) + ret = ioda_eeh_root_reset(hose, option); + else + ret = ioda_eeh_bridge_reset(hose, dev, option); + } + + return ret; +} + +/** + * ioda_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: Severity level of the log + * @drv_log: buffer to store the log + * @len: space of the log buffer + * + * The function is used to retrieve error log from P7IOC. + */ +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + s64 ret; + unsigned long flags; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + spin_lock_irqsave(&phb->lock, flags); + + ret = opal_pci_get_phb_diag_data2(phb->opal_id, + phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + if (ret) { + spin_unlock_irqrestore(&phb->lock, flags); + pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n", + __func__, hose->global_number, pe->addr); + return -EIO; + } + + /* + * FIXME: We probably need log the error in somewhere. + * Lets make it up in future. + */ + /* pr_info("%s", phb->diag.blob); */ + + spin_unlock_irqrestore(&phb->lock, flags); + + return 0; +} + +/** + * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE + * @pe: EEH PE + * + * For particular PE, it might have included PCI bridges. In order + * to make the PE work properly, those PCI bridges should be configured + * correctly. However, we need do nothing on P7IOC since the reset + * function will do everything that should be covered by the function. + */ +static int ioda_eeh_configure_bridge(struct eeh_pe *pe) +{ + return 0; +} + +static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) +{ + /* GEM */ + pr_info(" GEM XFIR: %016llx\n", data->gemXfir); + pr_info(" GEM RFIR: %016llx\n", data->gemRfir); + pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); + pr_info(" GEM Mask: %016llx\n", data->gemMask); + pr_info(" GEM RWOF: %016llx\n", data->gemRwof); + + /* LEM */ + pr_info(" LEM FIR: %016llx\n", data->lemFir); + pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); + pr_info(" LEM Action 0: %016llx\n", data->lemAction0); + pr_info(" LEM Action 1: %016llx\n", data->lemAction1); + pr_info(" LEM WOF: %016llx\n", data->lemWof); +} + +static void ioda_eeh_hub_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoP7IOCErrorData *data; + long rc; + + data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); + return; + } + + switch (data->type) { + case OPAL_P7IOC_DIAG_TYPE_RGC: + pr_info("P7IOC diag-data for RGC\n\n"); + ioda_eeh_hub_diag_common(data); + pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); + pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); + break; + case OPAL_P7IOC_DIAG_TYPE_BI: + pr_info("P7IOC diag-data for BI %s\n\n", + data->bi.biDownbound ? "Downbound" : "Upbound"); + ioda_eeh_hub_diag_common(data); + pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); + pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); + pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); + pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); + break; + case OPAL_P7IOC_DIAG_TYPE_CI: + pr_info("P7IOC diag-data for CI Port %d\\nn", + data->ci.ciPort); + ioda_eeh_hub_diag_common(data); + pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); + pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); + break; + case OPAL_P7IOC_DIAG_TYPE_MISC: + pr_info("P7IOC diag-data for MISC\n\n"); + ioda_eeh_hub_diag_common(data); + break; + case OPAL_P7IOC_DIAG_TYPE_I2C: + pr_info("P7IOC diag-data for I2C\n\n"); + ioda_eeh_hub_diag_common(data); + break; + default: + pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); + } +} + +static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoP7IOCPhbErrorData *data; + int i; + + data = (struct OpalIoP7IOCPhbErrorData *)common; + + pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n", + hose->global_number, common->version); + + pr_info(" brdgCtl: %08x\n", data->brdgCtl); + + pr_info(" portStatusReg: %08x\n", data->portStatusReg); + pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); + pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); + + pr_info(" deviceStatus: %08x\n", data->deviceStatus); + pr_info(" slotStatus: %08x\n", data->slotStatus); + pr_info(" linkStatus: %08x\n", data->linkStatus); + pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); + pr_info(" devSecStatus: %08x\n", data->devSecStatus); + + pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); + pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); + pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); + pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); + pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); + pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); + pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); + pr_info(" sourceId: %08x\n", data->sourceId); + + pr_info(" errorClass: %016llx\n", data->errorClass); + pr_info(" correlator: %016llx\n", data->correlator); + pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); + pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); + pr_info(" lemFir: %016llx\n", data->lemFir); + pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); + pr_info(" lemWOF: %016llx\n", data->lemWOF); + pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); + pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); + pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); + pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); + pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); + pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); + pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); + pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); + pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); + pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); + pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); + pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); + pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); + pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); + pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); + pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); + + for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { + if ((data->pestA[i] >> 63) == 0 && + (data->pestB[i] >> 63) == 0) + continue; + + pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); + pr_info(" PESTB: %016llx\n", data->pestB[i]); + } +} + +static void ioda_eeh_phb_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoPhbErrorCommon *common; + long rc; + + common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; + rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, hose->global_number, rc); + return; + } + + switch (common->ioType) { + case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: + ioda_eeh_p7ioc_phb_diag(hose, common); + break; + default: + pr_warning("%s: Unrecognized I/O chip %d\n", + __func__, common->ioType); + } +} + +static int ioda_eeh_get_phb_pe(struct pci_controller *hose, + struct eeh_pe **pe) +{ + struct eeh_pe *phb_pe; + + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) { + pr_warning("%s Can't find PE for PHB#%d\n", + __func__, hose->global_number); + return -EEXIST; + } + + *pe = phb_pe; + return 0; +} + +static int ioda_eeh_get_pe(struct pci_controller *hose, + u16 pe_no, struct eeh_pe **pe) +{ + struct eeh_pe *phb_pe, *dev_pe; + struct eeh_dev dev; + + /* Find the PHB PE */ + if (ioda_eeh_get_phb_pe(hose, &phb_pe)) + return -EEXIST; + + /* Find the PE according to PE# */ + memset(&dev, 0, sizeof(struct eeh_dev)); + dev.phb = hose; + dev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&dev); + if (!dev_pe) { + pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n", + __func__, hose->global_number, pe_no); + return -EEXIST; + } + + *pe = dev_pe; + return 0; +} + +/** + * ioda_eeh_next_error - Retrieve next error for EEH core to handle + * @pe: The affected PE + * + * The function is expected to be called by EEH core while it gets + * special EEH event (without binding PE). The function calls to + * OPAL APIs for next error to handle. The informational error is + * handled internally by platform. However, the dead IOC, dead PHB, + * fenced PHB and frozen PE should be handled by EEH core eventually. + */ +static int ioda_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + u64 frozen_pe_no; + u16 err_type, severity; + long rc; + int ret = 1; + + /* + * While running here, it's safe to purge the event queue. + * And we should keep the cached OPAL notifier event sychronized + * between the kernel and firmware. + */ + eeh_remove_event(NULL); + opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + /* + * If the subordinate PCI buses of the PHB has been + * removed, we needn't take care of it any more. + */ + phb = hose->private_data; + if (phb->eeh_state & PNV_EEH_STATE_REMOVED) + continue; + + rc = opal_pci_next_error(phb->opal_id, + &frozen_pe_no, &err_type, &severity); + + /* If OPAL API returns error, we needn't proceed */ + if (rc != OPAL_SUCCESS) { + IODA_EEH_DBG("%s: Invalid return value on " + "PHB#%x (0x%lx) from opal_pci_next_error", + __func__, hose->global_number, rc); + continue; + } + + /* If the PHB doesn't have error, stop processing */ + if (err_type == OPAL_EEH_NO_ERROR || + severity == OPAL_EEH_SEV_NO_ERROR) { + IODA_EEH_DBG("%s: No error found on PHB#%x\n", + __func__, hose->global_number); + continue; + } + + /* + * Processing the error. We're expecting the error with + * highest priority reported upon multiple errors on the + * specific PHB. + */ + IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n", + err_type, severity, pe_no, hose->global_number); + switch (err_type) { + case OPAL_EEH_IOC_ERROR: + if (severity == OPAL_EEH_SEV_IOC_DEAD) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb = hose->private_data; + phb->eeh_state |= PNV_EEH_STATE_REMOVED; + } + + pr_err("EEH: dead IOC detected\n"); + ret = 4; + goto out; + } else if (severity == OPAL_EEH_SEV_INF) { + pr_info("EEH: IOC informative error " + "detected\n"); + ioda_eeh_hub_diag(hose); + } + + break; + case OPAL_EEH_PHB_ERROR: + if (severity == OPAL_EEH_SEV_PHB_DEAD) { + if (ioda_eeh_get_phb_pe(hose, pe)) + break; + + pr_err("EEH: dead PHB#%x detected\n", + hose->global_number); + phb->eeh_state |= PNV_EEH_STATE_REMOVED; + ret = 3; + goto out; + } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { + if (ioda_eeh_get_phb_pe(hose, pe)) + break; + + pr_err("EEH: fenced PHB#%x detected\n", + hose->global_number); + ret = 2; + goto out; + } else if (severity == OPAL_EEH_SEV_INF) { + pr_info("EEH: PHB#%x informative error " + "detected\n", + hose->global_number); + ioda_eeh_phb_diag(hose); + } + + break; + case OPAL_EEH_PE_ERROR: + if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) + break; + + pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", + (*pe)->addr, (*pe)->phb->global_number); + ret = 1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +struct pnv_eeh_ops ioda_eeh_ops = { + .post_init = ioda_eeh_post_init, + .set_option = ioda_eeh_set_option, + .get_state = ioda_eeh_get_state, + .reset = ioda_eeh_reset, + .get_log = ioda_eeh_get_log, + .configure_bridge = ioda_eeh_configure_bridge, + .next_error = ioda_eeh_next_error +}; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c new file mode 100644 index 000000000000..969cce73055a --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -0,0 +1,379 @@ +/* + * The file intends to implement the platform dependent EEH operations on + * powernv platform. Actually, the powernv was created in order to fully + * hypervisor support. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/atomic.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> + +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/firmware.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/machdep.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> +#include <asm/ppc-pci.h> + +#include "powernv.h" +#include "pci.h" + +/** + * powernv_eeh_init - EEH platform dependent initialization + * + * EEH platform dependent initialization on powernv + */ +static int powernv_eeh_init(void) +{ + /* We require OPALv3 */ + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + pr_warning("%s: OPALv3 is required !\n", __func__); + return -EINVAL; + } + + /* Set EEH probe mode */ + eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + + return 0; +} + +/** + * powernv_eeh_post_init - EEH platform dependent post initialization + * + * EEH platform dependent post initialization on powernv. When + * the function is called, the EEH PEs and devices should have + * been built. If the I/O cache staff has been built, EEH is + * ready to supply service. + */ +static int powernv_eeh_post_init(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + int ret = 0; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->eeh_ops && phb->eeh_ops->post_init) { + ret = phb->eeh_ops->post_init(hose); + if (ret) + break; + } + } + + return ret; +} + +/** + * powernv_eeh_dev_probe - Do probe on PCI device + * @dev: PCI device + * @flag: unused + * + * When EEH module is installed during system boot, all PCI devices + * are checked one by one to see if it supports EEH. The function + * is introduced for the purpose. By default, EEH has been enabled + * on all PCI devices. That's to say, we only need do necessary + * initialization on the corresponding eeh device and create PE + * accordingly. + * + * It's notable that's unsafe to retrieve the EEH device through + * the corresponding PCI device. During the PCI device hotplug, which + * was possiblly triggered by EEH core, the binding between EEH device + * and the PCI device isn't built yet. + */ +static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + + /* + * When probing the root bridge, which doesn't have any + * subordinate PCI devices. We don't have OF node for + * the root bridge. So it's not reasonable to continue + * the probing. + */ + if (!dn || !edev) + return 0; + + /* Skip for PCI-ISA bridge */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA) + return 0; + + /* Initialize eeh device */ + edev->class_code = dev->class; + edev->mode = 0; + edev->config_addr = ((dev->bus->number << 8) | dev->devfn); + edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); + + /* Create PE */ + eeh_add_to_parent_pe(edev); + + /* + * Enable EEH explicitly so that we will do EEH check + * while accessing I/O stuff + * + * FIXME: Enable that for PHB3 later + */ + if (phb->type == PNV_PHB_IODA1) + eeh_subsystem_enabled = 1; + + /* Save memory bars */ + eeh_save_bars(edev); + + return 0; +} + +/** + * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * @pe: EEH PE + * @option: operation to be issued + * + * The function is used to control the EEH functionality globally. + * Currently, following options are support according to PAPR: + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA + */ +static int powernv_eeh_set_option(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + /* + * What we need do is pass it down for hardware + * implementation to handle it. + */ + if (phb->eeh_ops && phb->eeh_ops->set_option) + ret = phb->eeh_ops->set_option(pe, option); + + return ret; +} + +/** + * powernv_eeh_get_pe_addr - Retrieve PE address + * @pe: EEH PE + * + * Retrieve the PE address according to the given tranditional + * PCI BDF (Bus/Device/Function) address. + */ +static int powernv_eeh_get_pe_addr(struct eeh_pe *pe) +{ + return pe->addr; +} + +/** + * powernv_eeh_get_state - Retrieve PE state + * @pe: EEH PE + * @delay: delay while PE state is temporarily unavailable + * + * Retrieve the state of the specified PE. For IODA-compitable + * platform, it should be retrieved from IODA table. Therefore, + * we prefer passing down to hardware implementation to handle + * it. + */ +static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = EEH_STATE_NOT_SUPPORT; + + if (phb->eeh_ops && phb->eeh_ops->get_state) { + ret = phb->eeh_ops->get_state(pe); + + /* + * If the PE state is temporarily unavailable, + * to inform the EEH core delay for default + * period (1 second) + */ + if (delay) { + *delay = 0; + if (ret & EEH_STATE_UNAVAILABLE) + *delay = 1000; + } + } + + return ret; +} + +/** + * powernv_eeh_reset - Reset the specified PE + * @pe: EEH PE + * @option: reset option + * + * Reset the specified PE + */ +static int powernv_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->reset) + ret = phb->eeh_ops->reset(pe, option); + + return ret; +} + +/** + * powernv_eeh_wait_state - Wait for PE state + * @pe: EEH PE + * @max_wait: maximal period in microsecond + * + * Wait for the state of associated PE. It might take some time + * to retrieve the PE's state. + */ +static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) +{ + int ret; + int mwait; + + while (1) { + ret = powernv_eeh_get_state(pe, &mwait); + + /* + * If the PE's state is temporarily unavailable, + * we have to wait for the specified time. Otherwise, + * the PE's state will be returned immediately. + */ + if (ret != EEH_STATE_UNAVAILABLE) + return ret; + + max_wait -= mwait; + if (max_wait <= 0) { + pr_warning("%s: Timeout getting PE#%x's state (%d)\n", + __func__, pe->addr, max_wait); + return EEH_STATE_NOT_SUPPORT; + } + + msleep(mwait); + } + + return EEH_STATE_NOT_SUPPORT; +} + +/** + * powernv_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: temporary or permanent error log + * @drv_log: driver log to be combined with retrieved error log + * @len: length of driver log + * + * Retrieve the temporary or permanent error from the PE. + */ +static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->get_log) + ret = phb->eeh_ops->get_log(pe, severity, drv_log, len); + + return ret; +} + +/** + * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * @pe: EEH PE + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int powernv_eeh_configure_bridge(struct eeh_pe *pe) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = 0; + + if (phb->eeh_ops && phb->eeh_ops->configure_bridge) + ret = phb->eeh_ops->configure_bridge(pe); + + return ret; +} + +/** + * powernv_eeh_next_error - Retrieve next EEH error to handle + * @pe: Affected PE + * + * Using OPAL API, to retrieve next EEH error for EEH core to handle + */ +static int powernv_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb = NULL; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + break; + } + + if (phb && phb->eeh_ops->next_error) + return phb->eeh_ops->next_error(pe); + + return -EEXIST; +} + +static struct eeh_ops powernv_eeh_ops = { + .name = "powernv", + .init = powernv_eeh_init, + .post_init = powernv_eeh_post_init, + .of_probe = NULL, + .dev_probe = powernv_eeh_dev_probe, + .set_option = powernv_eeh_set_option, + .get_pe_addr = powernv_eeh_get_pe_addr, + .get_state = powernv_eeh_get_state, + .reset = powernv_eeh_reset, + .wait_state = powernv_eeh_wait_state, + .get_log = powernv_eeh_get_log, + .configure_bridge = powernv_eeh_configure_bridge, + .read_config = pnv_pci_cfg_read, + .write_config = pnv_pci_cfg_write, + .next_error = powernv_eeh_next_error +}; + +/** + * eeh_powernv_init - Register platform dependent EEH operations + * + * EEH initialization on powernv platform. This function should be + * called before any EEH related functions. + */ +static int __init eeh_powernv_init(void) +{ + int ret = -EINVAL; + + if (!machine_is(powernv)) + return ret; + + ret = eeh_ops_register(&powernv_eeh_ops); + if (!ret) + pr_info("EEH: PowerNV platform initialized\n"); + else + pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); + + return ret; +} + +early_initcall(eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6fabe92eafb6..e88863ffb135 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); +OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); +OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); +OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 628c564ceadb..106301fd2fa5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/interrupt.h> +#include <linux/notifier.h> #include <linux/slab.h> #include <asm/opal.h> #include <asm/firmware.h> @@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; +static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); +static DEFINE_SPINLOCK(opal_notifier_lock); +static uint64_t last_notified_mask = 0x0ul; +static atomic_t opal_notifier_hold = ATOMIC_INIT(0); int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) @@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void) early_initcall(opal_register_exception_handlers); +int opal_notifier_register(struct notifier_block *nb) +{ + if (!nb) { + pr_warning("%s: Invalid argument (%p)\n", + __func__, nb); + return -EINVAL; + } + + atomic_notifier_chain_register(&opal_notifier_head, nb); + return 0; +} + +static void opal_do_notifier(uint64_t events) +{ + unsigned long flags; + uint64_t changed_mask; + + if (atomic_read(&opal_notifier_hold)) + return; + + spin_lock_irqsave(&opal_notifier_lock, flags); + changed_mask = last_notified_mask ^ events; + last_notified_mask = events; + spin_unlock_irqrestore(&opal_notifier_lock, flags); + + /* + * We feed with the event bits and changed bits for + * enough information to the callback. + */ + atomic_notifier_call_chain(&opal_notifier_head, + events, (void *)changed_mask); +} + +void opal_notifier_update_evt(uint64_t evt_mask, + uint64_t evt_val) +{ + unsigned long flags; + + spin_lock_irqsave(&opal_notifier_lock, flags); + last_notified_mask &= ~evt_mask; + last_notified_mask |= evt_val; + spin_unlock_irqrestore(&opal_notifier_lock, flags); +} + +void opal_notifier_enable(void) +{ + int64_t rc; + uint64_t evt = 0; + + atomic_set(&opal_notifier_hold, 0); + + /* Process pending events */ + rc = opal_poll_events(&evt); + if (rc == OPAL_SUCCESS && evt) + opal_do_notifier(evt); +} + +void opal_notifier_disable(void) +{ + atomic_set(&opal_notifier_hold, 1); +} + int opal_get_chars(uint32_t vtermno, char *buf, int count) { s64 len, rc; @@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data) opal_handle_interrupt(virq_to_hw(irq), &events); - /* XXX TODO: Do something with the events */ + opal_do_notifier(events); return IRQ_HANDLED; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9c9d15e4cdf2..49b57b9f835d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/debugfs.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> @@ -32,6 +33,7 @@ #include <asm/iommu.h> #include <asm/tce.h> #include <asm/xics.h> +#include <asm/debug.h> #include "powernv.h" #include "pci.h" @@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base(&pdev->dev, &pe->tce32_table); } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp) { @@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR; } iommu_init_table(tbl, phb->hose->node); + iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); + + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); return; fail: @@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: if (pe->tce32_seg >= 0) @@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void) } } +static void pnv_pci_ioda_create_dbgfs(void) +{ +#ifdef CONFIG_DEBUG_FS + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + char name[16]; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + sprintf(name, "PCI%04x", hose->global_number); + phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); + if (!phb->dbgfs) + pr_warning("%s: Error on creating debugfs on PHB#%x\n", + __func__, hose->global_number); + } +#endif /* CONFIG_DEBUG_FS */ +} + static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); pnv_pci_ioda_setup_seg(); pnv_pci_ioda_setup_DMA(); + + pnv_pci_ioda_create_dbgfs(); + +#ifdef CONFIG_EEH + eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + eeh_addr_cache_build(); + eeh_init(); +#endif } /* @@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) +void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; static int primary = 1; @@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) hose->first_busno = 0; hose->last_busno = 0xff; hose->private_data = phb; + phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = ioda_type; @@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) phb->ioda.io_size, phb->ioda.io_segsize); phb->hose->ops = &pnv_pci_ops; +#ifdef CONFIG_EEH + phb->eeh_ops = &ioda_eeh_ops; +#endif /* Setup RID -> PE mapping function */ phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; @@ -1212,7 +1268,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) void pnv_pci_init_ioda2_phb(struct device_node *np) { - pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2); + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); } void __init pnv_pci_init_ioda_hub(struct device_node *np) @@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np) for_each_child_of_node(np, phbn) { /* Look for IODA1 PHBs */ if (of_device_is_compatible(phbn, "ibm,ioda-phb")) - pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1); + pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); } } diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 92b37a0186c9..b68db6325c1b 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - if (phb->p5ioc2.iommu_table.it_map == NULL) + if (phb->p5ioc2.iommu_table.it_map == NULL) { iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); + iommu_register_group(&phb->p5ioc2.iommu_table, + pci_domain_nr(phb->hose->bus), phb->opal_id); + } set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); } -static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, +static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, void *tce_mem, u64 tce_size) { struct pnv_phb *phb; @@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, phb->hose->first_busno = 0; phb->hose->last_busno = 0xff; phb->hose->private_data = phb; + phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; phb->model = PNV_PHB_MODEL_P5IOC2; @@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { - pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb); + pnv_pci_init_p5ioc2_phb(phbn, hub_id, + tce_mem, tce_per_phb); tce_mem += tce_per_phb; } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 277343cc6a3d..a28d3b5e6393 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -20,6 +20,7 @@ #include <linux/irq.h> #include <linux/io.h> #include <linux/msi.h> +#include <linux/iommu.h> #include <asm/sections.h> #include <asm/io.h> @@ -32,6 +33,8 @@ #include <asm/iommu.h> #include <asm/tce.h> #include <asm/firmware.h> +#include <asm/eeh_event.h> +#include <asm/eeh.h> #include "powernv.h" #include "pci.h" @@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_lock_irqsave(&phb->lock, flags); - rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); has_diag = (rc == OPAL_SUCCESS); rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, @@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_unlock_irqrestore(&phb->lock, flags); } -static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, - u32 bdfn) +static void pnv_pci_config_check_eeh(struct pnv_phb *phb, + struct device_node *dn) { s64 rc; u8 fstate; u16 pcierr; u32 pe_no; - /* Get PE# if we support IODA */ - pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0; + /* + * Get the PE#. During the PCI probe stage, we might not + * setup that yet. So all ER errors should be mapped to + * PE#0 + */ + pe_no = PCI_DN(dn)->pe_number; + if (pe_no == IODA_INVALID_PE) + pe_no = 0; /* Read freeze status */ rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, NULL); if (rc) { - pr_warning("PCI %d: Failed to read EEH status for PE#%d," - " err %lld\n", phb->hose->global_number, pe_no, rc); + pr_warning("%s: Can't read EEH status (PE#%d) for " + "%s, err %lld\n", + __func__, pe_no, dn->full_name, rc); return; } - cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", - bdfn, pe_no, fstate); + cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", + (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), + pe_no, fstate); if (fstate != 0) pnv_pci_handle_eeh_config(phb, pe_no); } -static int pnv_pci_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 *val) +int pnv_pci_cfg_read(struct device_node *dn, + int where, int size, u32 *val) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; + struct pci_dn *pdn = PCI_DN(dn); + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; +#ifdef CONFIG_EEH + struct eeh_pe *phb_pe = NULL; +#endif s64 rc; - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - switch (size) { case 1: { u8 v8; @@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } - cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n", - bus->number, devfn, where, size, *val); - - /* Check if the PHB got frozen due to an error (no response) */ - pnv_pci_config_check_eeh(phb, bus, bdfn); + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, *val); + + /* + * Check if the specified PE has been put into frozen + * state. On the other hand, we needn't do that while + * the PHB has been put into frozen state because of + * PHB-fatal errors. + */ +#ifdef CONFIG_EEH + phb_pe = eeh_phb_pe_get(pdn->phb); + if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED)) + return PCIBIOS_SUCCESSFUL; + + if (phb->eeh_state & PNV_EEH_STATE_ENABLED) { + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + } else { + pnv_pci_config_check_eeh(phb, dn); + } +#else + pnv_pci_config_check_eeh(phb, dn); +#endif return PCIBIOS_SUCCESSFUL; } -static int pnv_pci_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 val) +int pnv_pci_cfg_write(struct device_node *dn, + int where, int size, u32 val) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; - - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; + struct pci_dn *pdn = PCI_DN(dn); + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; - cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n", - bus->number, devfn, where, size, val); + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + pdn->busno, pdn->devfn, where, size, val); switch (size) { case 1: opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); @@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } + /* Check if the PHB got frozen due to an error (no response) */ - pnv_pci_config_check_eeh(phb, bus, bdfn); +#ifdef CONFIG_EEH + if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED)) + pnv_pci_config_check_eeh(phb, dn); +#else + pnv_pci_config_check_eeh(phb, dn); +#endif return PCIBIOS_SUCCESSFUL; } +static int pnv_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + + for (dn = busdn->child; dn; dn = dn->sibling) { + pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn) + return pnv_pci_cfg_read(dn, where, size, val); + } + + *val = 0xFFFFFFFF; + return PCIBIOS_DEVICE_NOT_FOUND; + +} + +static int pnv_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + + for (dn = busdn->child; dn; dn = dn->sibling) { + pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn) + return pnv_pci_cfg_write(dn, where, size, val); + } + + return PCIBIOS_DEVICE_NOT_FOUND; +} + struct pci_ops pnv_pci_ops = { - .read = pnv_pci_read_config, + .read = pnv_pci_read_config, .write = pnv_pci_write_config, }; @@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), be32_to_cpup(sizep), 0); iommu_init_table(tbl, hose->node); + iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); /* Deal with SW invalidated TCEs when needed (BML way) */ swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 25d76c4df50b..d633c64e05a1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -66,15 +66,43 @@ struct pnv_ioda_pe { struct list_head list; }; +/* IOC dependent EEH operations */ +#ifdef CONFIG_EEH +struct pnv_eeh_ops { + int (*post_init)(struct pci_controller *hose); + int (*set_option)(struct eeh_pe *pe, int option); + int (*get_state)(struct eeh_pe *pe); + int (*reset)(struct eeh_pe *pe, int option); + int (*get_log)(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len); + int (*configure_bridge)(struct eeh_pe *pe); + int (*next_error)(struct eeh_pe **pe); +}; + +#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */ +#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */ + +#endif /* CONFIG_EEH */ + struct pnv_phb { struct pci_controller *hose; enum pnv_phb_type type; enum pnv_phb_model model; + u64 hub_id; u64 opal_id; void __iomem *regs; int initialized; spinlock_t lock; +#ifdef CONFIG_EEH + struct pnv_eeh_ops *eeh_ops; + int eeh_state; +#endif + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs; +#endif + #ifdef CONFIG_PCI_MSI unsigned int msi_base; unsigned int msi32_support; @@ -150,7 +178,14 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; +#ifdef CONFIG_EEH +extern struct pnv_eeh_ops ioda_eeh_ops; +#endif +int pnv_pci_cfg_read(struct device_node *dn, + int where, int size, u32 *val); +int pnv_pci_cfg_write(struct device_node *dn, + int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d4459bfc92f7..84438af96c05 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -93,6 +93,8 @@ static void __noreturn pnv_restart(char *cmd) { long rc = OPAL_BUSY; + opal_notifier_disable(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_reboot(); if (rc == OPAL_BUSY_EVENT) @@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void) { long rc = OPAL_BUSY; + opal_notifier_disable(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_power_down(0); if (rc == OPAL_BUSY_EVENT) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 88c9459c3e07..89e3857af4e0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -40,7 +40,7 @@ #define DBG(fmt...) #endif -static void __cpuinit pnv_smp_setup_cpu(int cpu) +static void pnv_smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); @@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr) /* Special case - we inhibit secondary thread startup * during boot if the user requests it. */ - if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { + if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; if (smt_enabled_at_boot diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 177a2f70700c..3e270e3412ae 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group) } static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long vpn, int psize, int ssize, int local) + unsigned long vpn, int psize, int apsize, + int ssize, int local) { int result; u64 hpte_v, want_v, hpte_rs; @@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, } static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, int ssize, int local) { unsigned long flags; int result; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 4459eff7a75a..1bd3399146ed 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -33,11 +33,6 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. -config EEH - bool - depends on PPC_PSERIES && PCI - default y - config PSERIES_MSI bool depends on PCI_MSI && EEH diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 53866e537a92..8ae010381316 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,9 +6,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ - eeh_driver.o eeh_event.o eeh_sysfs.o \ - eeh_pseries.o +obj-$(CONFIG_EEH) += eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PSERIES_MSI) += msi.o diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index ef9d9d84c7d5..5ea88d1541f7 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) * by scope or event type alone. For example, Torrent ISR route change * event is reported with scope 0x00 (Not Applicatable) rather than * 0x3B (Torrent-hub). It is better to let the clients to identify - * who owns the the event. + * who owns the event. */ static irqreturn_t ioei_interrupt(int irq, void *dev_id) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 86ae364900d6..23fc1dcf4434 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + iommu_register_group(tbl, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) ppci->phb->node); iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(tbl, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->iommu_table); } } @@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) phb->node); iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); + iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); return; } @@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pci->phb->node); iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); pr_debug(" created table: %p\n", pci->iommu_table); } else { pr_debug(" found DMA window, table: %p\n", pci->iommu_table); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6d62072a7d5a..02d6e21619bb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -45,6 +45,13 @@ #include "plpar_wrappers.h" #include "pseries.h" +/* Flag bits for H_BULK_REMOVE */ +#define HBR_REQUEST 0x4000000000000000UL +#define HBR_RESPONSE 0x8000000000000000UL +#define HBR_END 0xc000000000000000UL +#define HBR_AVPN 0x0200000000000000UL +#define HBR_ANDCOND 0x0100000000000000UL + /* in hvCall.S */ EXPORT_SYMBOL(plpar_hcall); @@ -64,6 +71,9 @@ void vpa_init(int cpu) if (cpu_has_feature(CPU_FTR_ALTIVEC)) lppaca_of(cpu).vmxregs_in_use = 1; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lppaca_of(cpu).ebb_regs_in_use = 1; + addr = __pa(&lppaca_of(cpu)); ret = register_vpa(hwcpu, addr); @@ -240,7 +250,8 @@ static void pSeries_lpar_hptab_clear(void) static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; @@ -328,7 +339,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, } static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; @@ -345,6 +357,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, BUG_ON(lpar_rc != H_SUCCESS); } +/* + * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need + * to make sure that we avoid bouncing the hypervisor tlbie lock. + */ +#define PPC64_HUGE_HPTE_BATCH 12 + +static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, + unsigned long *vpn, int count, + int psize, int ssize) +{ + unsigned long param[8]; + int i = 0, pix = 0, rc; + unsigned long flags = 0; + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + for (i = 0; i < count; i++) { + + if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0, + ssize, 0); + } else { + param[pix] = HBR_REQUEST | HBR_AVPN | slot[i]; + param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize); + pix += 2; + if (pix == 8) { + rc = plpar_hcall9(H_BULK_REMOVE, param, + param[0], param[1], param[2], + param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + pix = 0; + } + } + } + if (pix) { + param[pix] = HBR_END; + rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], + param[2], param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + } + + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, + unsigned char *hpte_slot_array, + unsigned long addr, int psize) +{ + int ssize = 0, i, index = 0; + unsigned long s_addr = addr; + unsigned int max_hpte_count, valid; + unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; + unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; + unsigned long shift, hidx, vpn = 0, vsid, hash, slot; + + shift = mmu_psize_defs[psize].shift; + max_hpte_count = 1U << (PMD_SHIFT - shift); + + for (i = 0; i < max_hpte_count; i++) { + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } + + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + + slot_array[index] = slot; + vpn_array[index] = vpn; + if (index == PPC64_HUGE_HPTE_BATCH - 1) { + /* + * Now do a bluk invalidate + */ + __pSeries_lpar_hugepage_invalidate(slot_array, + vpn_array, + PPC64_HUGE_HPTE_BATCH, + psize, ssize); + index = 0; + } else + index++; + } + if (index) + __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, + index, psize, ssize); +} + static void pSeries_lpar_hpte_removebolted(unsigned long ea, int psize, int ssize) { @@ -356,17 +475,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - - pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); + /* + * lpar doesn't use the passed actual page size + */ + pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); } -/* Flag bits for H_BULK_REMOVE */ -#define HBR_REQUEST 0x4000000000000000UL -#define HBR_RESPONSE 0x8000000000000000UL -#define HBR_END 0xc000000000000000UL -#define HBR_AVPN 0x0200000000000000UL -#define HBR_ANDCOND 0x0100000000000000UL - /* * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie * lock. @@ -400,8 +514,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + /* + * lpar doesn't use the passed actual page size + */ pSeries_lpar_hpte_invalidate(slot, vpn, psize, - ssize, local); + 0, ssize, local); } else { param[pix] = HBR_REQUEST | HBR_AVPN | slot; param[pix+1] = hpte_encode_avpn(vpn, psize, @@ -452,6 +569,7 @@ void __init hpte_init_lpar(void) ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; + ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } #ifdef CONFIG_PPC_SMLPAR diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 8733a86ad52e..9f8671a44551 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -18,6 +18,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/kmsg_dump.h> +#include <linux/pstore.h> #include <linux/ctype.h> #include <linux/zlib.h> #include <asm/uaccess.h> @@ -29,6 +30,13 @@ /* Max bytes to read/write in one go */ #define NVRW_CNT 0x20 +/* + * Set oops header version to distingush between old and new format header. + * lnx,oops-log partition max size is 4000, header version > 4000 will + * help in identifying new header. + */ +#define OOPS_HDR_VERSION 5000 + static unsigned int nvram_size; static int nvram_fetch, nvram_store; static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ @@ -45,20 +53,23 @@ struct nvram_os_partition { int min_size; /* minimum acceptable size (0 means req_size) */ long size; /* size of data portion (excluding err_log_info) */ long index; /* offset of data portion of partition */ + bool os_partition; /* partition initialized by OS, not FW */ }; static struct nvram_os_partition rtas_log_partition = { .name = "ibm,rtas-log", .req_size = 2079, .min_size = 1055, - .index = -1 + .index = -1, + .os_partition = true }; static struct nvram_os_partition oops_log_partition = { .name = "lnx,oops-log", .req_size = 4000, .min_size = 2000, - .index = -1 + .index = -1, + .os_partition = true }; static const char *pseries_nvram_os_partitions[] = { @@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = { NULL }; +struct oops_log_info { + u16 version; + u16 report_length; + u64 timestamp; +} __attribute__((packed)); + static void oops_to_nvram(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); @@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */ * big_oops_buf[] holds the uncompressed text we're capturing. * - * oops_buf[] holds the compressed text, preceded by a prefix. - * The prefix is just a u16 holding the length of the compressed* text. - * (*Or uncompressed, if compression fails.) oops_buf[] gets written - * to NVRAM. + * oops_buf[] holds the compressed text, preceded by a oops header. + * oops header has u16 holding the version of oops header (to differentiate + * between old and new format header) followed by u16 holding the length of + * the compressed* text (*Or uncompressed, if compression fails.) and u64 + * holding the timestamp. oops_buf[] gets written to NVRAM. * - * oops_len points to the prefix. oops_data points to the compressed text. + * oops_log_info points to the header. oops_data points to the compressed text. * * +- oops_buf - * | +- oops_data - * v v - * +------------+-----------------------------------------------+ - * | length | text | - * | (2 bytes) | (oops_data_sz bytes) | - * +------------+-----------------------------------------------+ + * | +- oops_data + * v v + * +-----------+-----------+-----------+------------------------+ + * | version | length | timestamp | text | + * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) | + * +-----------+-----------+-----------+------------------------+ * ^ - * +- oops_len + * +- oops_log_info * * We preallocate these buffers during init to avoid kmalloc during oops/panic. */ static size_t big_oops_buf_sz; static char *big_oops_buf, *oops_buf; -static u16 *oops_len; static char *oops_data; static size_t oops_data_sz; @@ -114,6 +131,30 @@ static size_t oops_data_sz; #define MEM_LEVEL 4 static struct z_stream_s stream; +#ifdef CONFIG_PSTORE +static struct nvram_os_partition of_config_partition = { + .name = "of-config", + .index = -1, + .os_partition = false +}; + +static struct nvram_os_partition common_partition = { + .name = "common", + .index = -1, + .os_partition = false +}; + +static enum pstore_type_id nvram_type_ids[] = { + PSTORE_TYPE_DMESG, + PSTORE_TYPE_PPC_RTAS, + PSTORE_TYPE_PPC_OF, + PSTORE_TYPE_PPC_COMMON, + -1 +}; +static int read_type; +static unsigned long last_rtas_event; +#endif + static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) { unsigned int i; @@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length, { int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, err_type, error_log_cnt); - if (!rc) + if (!rc) { last_unread_rtas_event = get_seconds(); +#ifdef CONFIG_PSTORE + last_rtas_event = get_seconds(); +#endif + } + return rc; } -/* nvram_read_error_log +/* nvram_read_partition * - * Reads nvram for error log for at most 'length' + * Reads nvram partition for at most 'length' */ -int nvram_read_error_log(char * buff, int length, - unsigned int * err_type, unsigned int * error_log_cnt) +int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; struct err_log_info info; - if (rtas_log_partition.index == -1) + if (part->index == -1) return -1; - if (length > rtas_log_partition.size) - length = rtas_log_partition.size; + if (length > part->size) + length = part->size; - tmp_index = rtas_log_partition.index; + tmp_index = part->index; - rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; + if (part->os_partition) { + rc = ppc_md.nvram_read((char *)&info, + sizeof(struct err_log_info), + &tmp_index); + if (rc <= 0) { + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, + rc); + return rc; + } } rc = ppc_md.nvram_read(buff, length, &tmp_index); if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc); return rc; } - *error_log_cnt = info.seq_num; - *err_type = info.error_type; + if (part->os_partition) { + *error_log_cnt = info.seq_num; + *err_type = info.error_type; + } return 0; } +/* nvram_read_error_log + * + * Reads nvram for error log for at most 'length' + */ +int nvram_read_error_log(char *buff, int length, + unsigned int *err_type, unsigned int *error_log_cnt) +{ + return nvram_read_partition(&rtas_log_partition, buff, length, + err_type, error_log_cnt); +} + /* This doesn't actually zero anything, but it sets the event_logged * word to tell that this event is safely in syslog. */ @@ -405,6 +470,349 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition return 0; } +/* + * Are we using the ibm,rtas-log for oops/panic reports? And if so, + * would logging this oops/panic overwrite an RTAS event that rtas_errd + * hasn't had a chance to read and process? Return 1 if so, else 0. + * + * We assume that if rtas_errd hasn't read the RTAS event in + * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. + */ +static int clobbering_unread_rtas_event(void) +{ + return (oops_log_partition.index == rtas_log_partition.index + && last_unread_rtas_event + && get_seconds() - last_unread_rtas_event <= + NVRAM_RTAS_READ_TIMEOUT); +} + +/* Derived from logfs_compress() */ +static int nvram_compress(const void *in, void *out, size_t inlen, + size_t outlen) +{ + int err, ret; + + ret = -EIO; + err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, + MEM_LEVEL, Z_DEFAULT_STRATEGY); + if (err != Z_OK) + goto error; + + stream.next_in = in; + stream.avail_in = inlen; + stream.total_in = 0; + stream.next_out = out; + stream.avail_out = outlen; + stream.total_out = 0; + + err = zlib_deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) + goto error; + + err = zlib_deflateEnd(&stream); + if (err != Z_OK) + goto error; + + if (stream.total_out >= stream.total_in) + goto error; + + ret = stream.total_out; +error: + return ret; +} + +/* Compress the text from big_oops_buf into oops_buf. */ +static int zip_oops(size_t text_len) +{ + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; + int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, + oops_data_sz); + if (zipped_len < 0) { + pr_err("nvram: compression failed; returned %d\n", zipped_len); + pr_err("nvram: logging uncompressed oops/panic report\n"); + return -1; + } + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) zipped_len; + oops_hdr->timestamp = get_seconds(); + return 0; +} + +#ifdef CONFIG_PSTORE +/* Derived from logfs_uncompress */ +int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen) +{ + int err, ret; + + ret = -EIO; + err = zlib_inflateInit(&stream); + if (err != Z_OK) + goto error; + + stream.next_in = in; + stream.avail_in = inlen; + stream.total_in = 0; + stream.next_out = out; + stream.avail_out = outlen; + stream.total_out = 0; + + err = zlib_inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) + goto error; + + err = zlib_inflateEnd(&stream); + if (err != Z_OK) + goto error; + + ret = stream.total_out; +error: + return ret; +} + +static int unzip_oops(char *oops_buf, char *big_buf) +{ + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; + u64 timestamp = oops_hdr->timestamp; + char *big_oops_data = NULL; + char *oops_data_buf = NULL; + size_t big_oops_data_sz; + int unzipped_len; + + big_oops_data = big_buf + sizeof(struct oops_log_info); + big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info); + oops_data_buf = oops_buf + sizeof(struct oops_log_info); + + unzipped_len = nvram_decompress(oops_data_buf, big_oops_data, + oops_hdr->report_length, + big_oops_data_sz); + + if (unzipped_len < 0) { + pr_err("nvram: decompression failed; returned %d\n", + unzipped_len); + return -1; + } + oops_hdr = (struct oops_log_info *)big_buf; + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) unzipped_len; + oops_hdr->timestamp = timestamp; + return 0; +} + +static int nvram_pstore_open(struct pstore_info *psi) +{ + /* Reset the iterator to start reading partitions again */ + read_type = -1; + return 0; +} + +/** + * nvram_pstore_write - pstore write callback for nvram + * @type: Type of message logged + * @reason: reason behind dump (oops/panic) + * @id: identifier to indicate the write performed + * @part: pstore writes data to registered buffer in parts, + * part number will indicate the same. + * @count: Indicates oops count + * @hsize: Size of header added by pstore + * @size: number of bytes written to the registered buffer + * @psi: registered pstore_info structure + * + * Called by pstore_dump() when an oops or panic report is logged in the + * printk buffer. + * Returns 0 on successful write. + */ +static int nvram_pstore_write(enum pstore_type_id type, + enum kmsg_dump_reason reason, + u64 *id, unsigned int part, int count, + size_t hsize, size_t size, + struct pstore_info *psi) +{ + int rc; + unsigned int err_type = ERR_TYPE_KERNEL_PANIC; + struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; + + /* part 1 has the recent messages from printk buffer */ + if (part > 1 || type != PSTORE_TYPE_DMESG || + clobbering_unread_rtas_event()) + return -1; + + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) size; + oops_hdr->timestamp = get_seconds(); + + if (big_oops_buf) { + rc = zip_oops(size); + /* + * If compression fails copy recent log messages from + * big_oops_buf to oops_data. + */ + if (rc != 0) { + size_t diff = size - oops_data_sz + hsize; + + if (size > oops_data_sz) { + memcpy(oops_data, big_oops_buf, hsize); + memcpy(oops_data + hsize, big_oops_buf + diff, + oops_data_sz - hsize); + + oops_hdr->report_length = (u16) oops_data_sz; + } else + memcpy(oops_data, big_oops_buf, size); + } else + err_type = ERR_TYPE_KERNEL_PANIC_GZ; + } + + rc = nvram_write_os_partition(&oops_log_partition, oops_buf, + (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, + count); + + if (rc != 0) + return rc; + + *id = part; + return 0; +} + +/* + * Reads the oops/panic report, rtas, of-config and common partition. + * Returns the length of the data we read from each partition. + * Returns 0 if we've been called before. + */ +static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, + int *count, struct timespec *time, char **buf, + struct pstore_info *psi) +{ + struct oops_log_info *oops_hdr; + unsigned int err_type, id_no, size = 0; + struct nvram_os_partition *part = NULL; + char *buff = NULL, *big_buff = NULL; + int rc, sig = 0; + loff_t p; + +read_partition: + read_type++; + + switch (nvram_type_ids[read_type]) { + case PSTORE_TYPE_DMESG: + part = &oops_log_partition; + *type = PSTORE_TYPE_DMESG; + break; + case PSTORE_TYPE_PPC_RTAS: + part = &rtas_log_partition; + *type = PSTORE_TYPE_PPC_RTAS; + time->tv_sec = last_rtas_event; + time->tv_nsec = 0; + break; + case PSTORE_TYPE_PPC_OF: + sig = NVRAM_SIG_OF; + part = &of_config_partition; + *type = PSTORE_TYPE_PPC_OF; + *id = PSTORE_TYPE_PPC_OF; + time->tv_sec = 0; + time->tv_nsec = 0; + break; + case PSTORE_TYPE_PPC_COMMON: + sig = NVRAM_SIG_SYS; + part = &common_partition; + *type = PSTORE_TYPE_PPC_COMMON; + *id = PSTORE_TYPE_PPC_COMMON; + time->tv_sec = 0; + time->tv_nsec = 0; + break; + default: + return 0; + } + + if (!part->os_partition) { + p = nvram_find_partition(part->name, sig, &size); + if (p <= 0) { + pr_err("nvram: Failed to find partition %s, " + "err %d\n", part->name, (int)p); + return 0; + } + part->index = p; + part->size = size; + } + + buff = kmalloc(part->size, GFP_KERNEL); + + if (!buff) + return -ENOMEM; + + if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) { + kfree(buff); + return 0; + } + + *count = 0; + + if (part->os_partition) + *id = id_no; + + if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { + oops_hdr = (struct oops_log_info *)buff; + *buf = buff + sizeof(*oops_hdr); + + if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) { + big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL); + if (!big_buff) + return -ENOMEM; + + rc = unzip_oops(buff, big_buff); + + if (rc != 0) { + kfree(buff); + kfree(big_buff); + goto read_partition; + } + + oops_hdr = (struct oops_log_info *)big_buff; + *buf = big_buff + sizeof(*oops_hdr); + kfree(buff); + } + + time->tv_sec = oops_hdr->timestamp; + time->tv_nsec = 0; + return oops_hdr->report_length; + } + + *buf = buff; + return part->size; +} + +static struct pstore_info nvram_pstore_info = { + .owner = THIS_MODULE, + .name = "nvram", + .open = nvram_pstore_open, + .read = nvram_pstore_read, + .write = nvram_pstore_write, +}; + +static int nvram_pstore_init(void) +{ + int rc = 0; + + if (big_oops_buf) { + nvram_pstore_info.buf = big_oops_buf; + nvram_pstore_info.bufsize = big_oops_buf_sz; + } else { + nvram_pstore_info.buf = oops_data; + nvram_pstore_info.bufsize = oops_data_sz; + } + + rc = pstore_register(&nvram_pstore_info); + if (rc != 0) + pr_err("nvram: pstore_register() failed, defaults to " + "kmsg_dump; returned %d\n", rc); + + return rc; +} +#else +static int nvram_pstore_init(void) +{ + return -1; +} +#endif + static void __init nvram_init_oops_partition(int rtas_partition_exists) { int rc; @@ -425,9 +833,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) oops_log_partition.name); return; } - oops_len = (u16*) oops_buf; - oops_data = oops_buf + sizeof(u16); - oops_data_sz = oops_log_partition.size - sizeof(u16); + oops_data = oops_buf + sizeof(struct oops_log_info); + oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info); /* * Figure compression (preceded by elimination of each line's <n> @@ -452,6 +859,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) stream.workspace = NULL; } + rc = nvram_pstore_init(); + + if (!rc) + return; + rc = kmsg_dump_register(&nvram_kmsg_dumper); if (rc != 0) { pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); @@ -501,70 +913,6 @@ int __init pSeries_nvram_init(void) return 0; } -/* - * Are we using the ibm,rtas-log for oops/panic reports? And if so, - * would logging this oops/panic overwrite an RTAS event that rtas_errd - * hasn't had a chance to read and process? Return 1 if so, else 0. - * - * We assume that if rtas_errd hasn't read the RTAS event in - * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. - */ -static int clobbering_unread_rtas_event(void) -{ - return (oops_log_partition.index == rtas_log_partition.index - && last_unread_rtas_event - && get_seconds() - last_unread_rtas_event <= - NVRAM_RTAS_READ_TIMEOUT); -} - -/* Derived from logfs_compress() */ -static int nvram_compress(const void *in, void *out, size_t inlen, - size_t outlen) -{ - int err, ret; - - ret = -EIO; - err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, - MEM_LEVEL, Z_DEFAULT_STRATEGY); - if (err != Z_OK) - goto error; - - stream.next_in = in; - stream.avail_in = inlen; - stream.total_in = 0; - stream.next_out = out; - stream.avail_out = outlen; - stream.total_out = 0; - - err = zlib_deflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) - goto error; - - err = zlib_deflateEnd(&stream); - if (err != Z_OK) - goto error; - - if (stream.total_out >= stream.total_in) - goto error; - - ret = stream.total_out; -error: - return ret; -} - -/* Compress the text from big_oops_buf into oops_buf. */ -static int zip_oops(size_t text_len) -{ - int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, - oops_data_sz); - if (zipped_len < 0) { - pr_err("nvram: compression failed; returned %d\n", zipped_len); - pr_err("nvram: logging uncompressed oops/panic report\n"); - return -1; - } - *oops_len = (u16) zipped_len; - return 0; -} /* * This is our kmsg_dump callback, called after an oops or panic report @@ -576,6 +924,7 @@ static int zip_oops(size_t text_len) static void oops_to_nvram(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; static bool panicking = false; static DEFINE_SPINLOCK(lock); @@ -619,14 +968,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, } if (rc != 0) { kmsg_dump_rewind(dumper); - kmsg_dump_get_buffer(dumper, true, + kmsg_dump_get_buffer(dumper, false, oops_data, oops_data_sz, &text_len); err_type = ERR_TYPE_KERNEL_PANIC; - *oops_len = (u16) text_len; + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) text_len; + oops_hdr->timestamp = get_seconds(); } (void) nvram_write_os_partition(&oops_log_partition, oops_buf, - (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); + (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, + ++oops_count); spin_unlock_irqrestore(&lock, flags); } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index c91b22be9288..efe61374f6ea 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn) } EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); -/** - * __pcibios_remove_pci_devices - remove all devices under this bus - * @bus: the indicated PCI bus - * @purge_pe: destroy the PE on removal of PCI devices - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - * By default, the corresponding PE will be destroied during the - * normal PCI hotplug path. For PCI hotplug during EEH recovery, - * the corresponding PE won't be destroied and deallocated. - */ -void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) -{ - struct pci_dev *dev, *tmp; - struct pci_bus *child_bus; - - /* First go down child busses */ - list_for_each_entry(child_bus, &bus->children, node) - __pcibios_remove_pci_devices(child_bus, purge_pe); - - pr_debug("PCI: Removing devices on bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev, purge_pe); - pci_stop_and_remove_bus_device(dev); - } -} - -/** - * pcibios_remove_pci_devices - remove all devices under this bus - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - */ -void pcibios_remove_pci_devices(struct pci_bus *bus) -{ - __pcibios_remove_pci_devices(bus, 1); -} -EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); - -/** - * pcibios_add_pci_devices - adds new pci devices to bus - * - * This routine will find and fixup new pci devices under - * the indicated bus. This routine presumes that there - * might already be some devices under this bridge, so - * it carefully tries to add only new devices. (And that - * is how this routine differs from other, similar pcibios - * routines.) - */ -void pcibios_add_pci_devices(struct pci_bus * bus) -{ - int slotno, num, mode, pass, max; - struct pci_dev *dev; - struct device_node *dn = pci_bus_to_OF_node(bus); - - eeh_add_device_tree_early(dn); - - mode = PCI_PROBE_NORMAL; - if (ppc_md.pci_probe_mode) - mode = ppc_md.pci_probe_mode(bus); - - if (mode == PCI_PROBE_DEVTREE) { - /* use ofdt-based probe */ - of_rescan_bus(dn, bus); - } else if (mode == PCI_PROBE_NORMAL) { - /* use legacy probe */ - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (!num) - return; - pcibios_setup_bus_devices(bus); - max = bus->busn_res.start; - for (pass=0; pass < 2; pass++) - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) - max = pci_scan_bridge(bus, dev, max, pass); - } - } - pcibios_finish_adding_to_bus(bus); -} -EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); - struct pci_controller *init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c4dfccd3a3d9..7b3cbde8c783 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier) switch (event_modifier) { case EPOW_SHUTDOWN_NORMAL: pr_emerg("Firmware initiated power off"); - orderly_poweroff(1); + orderly_poweroff(true); break; case EPOW_SHUTDOWN_ON_UPS: @@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier) pr_emerg("Loss of system critical functions reported by " "firmware"); pr_emerg("Check RTAS error log for details"); - orderly_poweroff(1); + orderly_poweroff(true); break; case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: pr_emerg("Ambient temperature too high reported by firmware"); pr_emerg("Check RTAS error log for details"); - orderly_poweroff(1); + orderly_poweroff(true); break; default: @@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log) case EPOW_SYSTEM_HALT: pr_emerg("Firmware initiated power off"); - orderly_poweroff(1); + orderly_poweroff(true); break; case EPOW_MAIN_ENCLOSURE: diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 12bc8c3663ad..306643cc9dbc 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -192,7 +192,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) /* Special case - we inhibit secondary thread startup * during boot if the user requests it. */ - if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { + if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; if (smt_enabled_at_boot diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl index 7f5b83808862..3f46e8b9c56d 100755 --- a/arch/powerpc/relocs_check.pl +++ b/arch/powerpc/relocs_check.pl @@ -7,7 +7,7 @@ # as published by the Free Software Foundation; either version # 2 of the License, or (at your option) any later version. -# This script checks the relcoations of a vmlinux for "suspicious" +# This script checks the relocations of a vmlinux for "suspicious" # relocations. use strict; @@ -28,7 +28,7 @@ open(FD, "$objdump -R $vmlinux|") or die; while (<FD>) { study $_; - # Only look at relcoation lines. + # Only look at relocation lines. next if (!/\s+R_/); # These relocations are okay @@ -45,7 +45,7 @@ while (<FD>) { /\bR_PPC_ADDR16_HA\b/ or /\bR_PPC_RELATIVE\b/ or /\bR_PPC_NONE\b/); - # If we see this type of relcoation it's an idication that + # If we see this type of relocation it's an idication that # we /may/ be using an old version of binutils. if (/R_PPC64_UADDR64/) { $old_binutils++; @@ -61,6 +61,6 @@ if ($bad_relocs_count) { } if ($old_binutils) { - print "WARNING: You need at binutils >= 2.19 to build a ". - "CONFIG_RELCOATABLE kernel\n"; + print "WARNING: You need at least binutils >= 2.19 to build a ". + "CONFIG_RELOCATABLE kernel\n"; } diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 99464a7bdb3b..f67ac900d870 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -4,6 +4,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) +obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o +obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP) += fsl_mpic_timer_wakeup.o mpic-msgr-obj-$(CONFIG_MPIC_MSGR) += mpic_msgr.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y) obj-$(CONFIG_PPC_EPAPR_HV_PIC) += ehv_pic.o diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index d4fa03f2b6ac..5e6ff38ea69f 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev) static struct irqaction cpm_error_irqaction = { .handler = cpm_error_interrupt, + .flags = IRQF_NO_THREAD, .name = "error", }; diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c new file mode 100644 index 000000000000..1707bf04dec6 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c @@ -0,0 +1,161 @@ +/* + * MPIC timer wakeup driver + * + * Copyright 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/device.h> + +#include <asm/mpic_timer.h> +#include <asm/mpic.h> + +struct fsl_mpic_timer_wakeup { + struct mpic_timer *timer; + struct work_struct free_work; +}; + +static struct fsl_mpic_timer_wakeup *fsl_wakeup; +static DEFINE_MUTEX(sysfs_lock); + +static void fsl_free_resource(struct work_struct *ws) +{ + struct fsl_mpic_timer_wakeup *wakeup = + container_of(ws, struct fsl_mpic_timer_wakeup, free_work); + + mutex_lock(&sysfs_lock); + + if (wakeup->timer) { + disable_irq_wake(wakeup->timer->irq); + mpic_free_timer(wakeup->timer); + } + + wakeup->timer = NULL; + mutex_unlock(&sysfs_lock); +} + +static irqreturn_t fsl_mpic_timer_irq(int irq, void *dev_id) +{ + struct fsl_mpic_timer_wakeup *wakeup = dev_id; + + schedule_work(&wakeup->free_work); + + return wakeup->timer ? IRQ_HANDLED : IRQ_NONE; +} + +static ssize_t fsl_timer_wakeup_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct timeval interval; + int val = 0; + + mutex_lock(&sysfs_lock); + if (fsl_wakeup->timer) { + mpic_get_remain_time(fsl_wakeup->timer, &interval); + val = interval.tv_sec + 1; + } + mutex_unlock(&sysfs_lock); + + return sprintf(buf, "%d\n", val); +} + +static ssize_t fsl_timer_wakeup_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct timeval interval; + int ret; + + interval.tv_usec = 0; + if (kstrtol(buf, 0, &interval.tv_sec)) + return -EINVAL; + + mutex_lock(&sysfs_lock); + + if (fsl_wakeup->timer) { + disable_irq_wake(fsl_wakeup->timer->irq); + mpic_free_timer(fsl_wakeup->timer); + fsl_wakeup->timer = NULL; + } + + if (!interval.tv_sec) { + mutex_unlock(&sysfs_lock); + return count; + } + + fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq, + fsl_wakeup, &interval); + if (!fsl_wakeup->timer) { + mutex_unlock(&sysfs_lock); + return -EINVAL; + } + + ret = enable_irq_wake(fsl_wakeup->timer->irq); + if (ret) { + mpic_free_timer(fsl_wakeup->timer); + fsl_wakeup->timer = NULL; + mutex_unlock(&sysfs_lock); + + return ret; + } + + mpic_start_timer(fsl_wakeup->timer); + + mutex_unlock(&sysfs_lock); + + return count; +} + +static struct device_attribute mpic_attributes = __ATTR(timer_wakeup, 0644, + fsl_timer_wakeup_show, fsl_timer_wakeup_store); + +static int __init fsl_wakeup_sys_init(void) +{ + int ret; + + fsl_wakeup = kzalloc(sizeof(struct fsl_mpic_timer_wakeup), GFP_KERNEL); + if (!fsl_wakeup) + return -ENOMEM; + + INIT_WORK(&fsl_wakeup->free_work, fsl_free_resource); + + ret = device_create_file(mpic_subsys.dev_root, &mpic_attributes); + if (ret) + kfree(fsl_wakeup); + + return ret; +} + +static void __exit fsl_wakeup_sys_exit(void) +{ + device_remove_file(mpic_subsys.dev_root, &mpic_attributes); + + mutex_lock(&sysfs_lock); + + if (fsl_wakeup->timer) { + disable_irq_wake(fsl_wakeup->timer->irq); + mpic_free_timer(fsl_wakeup->timer); + } + + kfree(fsl_wakeup); + + mutex_unlock(&sysfs_lock); +} + +module_init(fsl_wakeup_sys_init); +module_exit(fsl_wakeup_sys_exit); + +MODULE_DESCRIPTION("Freescale MPIC global timer wakeup driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Wang Dongsheng <dongsheng.wang@freescale.com>"); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3cc2f9159ab1..1be54faf60dd 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -48,6 +48,12 @@ #define DBG(fmt...) #endif +struct bus_type mpic_subsys = { + .name = "mpic", + .dev_name = "mpic", +}; +EXPORT_SYMBOL_GPL(mpic_subsys); + static struct mpic *mpics; static struct mpic *mpic_primary; static DEFINE_RAW_SPINLOCK(mpic_lock); @@ -920,6 +926,22 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type) return IRQ_SET_MASK_OK_NOCOPY; } +static int mpic_irq_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_desc *desc = container_of(d, struct irq_desc, irq_data); + struct mpic *mpic = mpic_from_irq_data(d); + + if (!(mpic->flags & MPIC_FSL)) + return -ENXIO; + + if (on) + desc->action->flags |= IRQF_NO_SUSPEND; + else + desc->action->flags &= ~IRQF_NO_SUSPEND; + + return 0; +} + void mpic_set_vector(unsigned int virq, unsigned int vector) { struct mpic *mpic = mpic_from_irq(virq); @@ -957,6 +979,7 @@ static struct irq_chip mpic_irq_chip = { .irq_unmask = mpic_unmask_irq, .irq_eoi = mpic_end_irq, .irq_set_type = mpic_set_irq_type, + .irq_set_wake = mpic_irq_set_wake, }; #ifdef CONFIG_SMP @@ -971,6 +994,7 @@ static struct irq_chip mpic_tm_chip = { .irq_mask = mpic_mask_tm, .irq_unmask = mpic_unmask_tm, .irq_eoi = mpic_end_irq, + .irq_set_wake = mpic_irq_set_wake, }; #ifdef CONFIG_MPIC_U3_HT_IRQS @@ -1173,10 +1197,33 @@ static struct irq_domain_ops mpic_host_ops = { .xlate = mpic_host_xlate, }; +static u32 fsl_mpic_get_version(struct mpic *mpic) +{ + u32 brr1; + + if (!(mpic->flags & MPIC_FSL)) + return 0; + + brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, + MPIC_FSL_BRR1); + + return brr1 & MPIC_FSL_BRR1_VER; +} + /* * Exported functions */ +u32 fsl_mpic_primary_get_version(void) +{ + struct mpic *mpic = mpic_primary; + + if (mpic) + return fsl_mpic_get_version(mpic); + + return 0; +} + struct mpic * __init mpic_alloc(struct device_node *node, phys_addr_t phys_addr, unsigned int flags, @@ -1323,7 +1370,6 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); if (mpic->flags & MPIC_FSL) { - u32 brr1; int ret; /* @@ -1334,9 +1380,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs, MPIC_CPU_THISBASE, 0x1000); - brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, - MPIC_FSL_BRR1); - fsl_version = brr1 & MPIC_FSL_BRR1_VER; + fsl_version = fsl_mpic_get_version(mpic); /* Error interrupt mask register (EIMR) is required for * handling individual device error interrupts. EIMR @@ -1526,9 +1570,7 @@ void __init mpic_init(struct mpic *mpic) mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); if (mpic->flags & MPIC_FSL) { - u32 brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, - MPIC_FSL_BRR1); - u32 version = brr1 & MPIC_FSL_BRR1_VER; + u32 version = fsl_mpic_get_version(mpic); /* * Timer group B is present at the latest in MPIC 3.1 (e.g. @@ -1999,6 +2041,8 @@ static struct syscore_ops mpic_syscore_ops = { static int mpic_init_sys(void) { register_syscore_ops(&mpic_syscore_ops); + subsys_system_register(&mpic_subsys, NULL); + return 0; } diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c new file mode 100644 index 000000000000..c06db92a4fb1 --- /dev/null +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -0,0 +1,593 @@ +/* + * MPIC timer driver + * + * Copyright 2013 Freescale Semiconductor, Inc. + * Author: Dongsheng Wang <Dongsheng.Wang@freescale.com> + * Li Yang <leoli@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/syscore_ops.h> +#include <sysdev/fsl_soc.h> +#include <asm/io.h> + +#include <asm/mpic_timer.h> + +#define FSL_GLOBAL_TIMER 0x1 + +/* Clock Ratio + * Divide by 64 0x00000300 + * Divide by 32 0x00000200 + * Divide by 16 0x00000100 + * Divide by 8 0x00000000 (Hardware default div) + */ +#define MPIC_TIMER_TCR_CLKDIV 0x00000300 + +#define MPIC_TIMER_TCR_ROVR_OFFSET 24 + +#define TIMER_STOP 0x80000000 +#define TIMERS_PER_GROUP 4 +#define MAX_TICKS (~0U >> 1) +#define MAX_TICKS_CASCADE (~0U) +#define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num)) + +/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */ +#define ONE_SECOND 1000000 + +struct timer_regs { + u32 gtccr; + u32 res0[3]; + u32 gtbcr; + u32 res1[3]; + u32 gtvpr; + u32 res2[3]; + u32 gtdr; + u32 res3[3]; +}; + +struct cascade_priv { + u32 tcr_value; /* TCR register: CASC & ROVR value */ + unsigned int cascade_map; /* cascade map */ + unsigned int timer_num; /* cascade control timer */ +}; + +struct timer_group_priv { + struct timer_regs __iomem *regs; + struct mpic_timer timer[TIMERS_PER_GROUP]; + struct list_head node; + unsigned int timerfreq; + unsigned int idle; + unsigned int flags; + spinlock_t lock; + void __iomem *group_tcr; +}; + +static struct cascade_priv cascade_timer[] = { + /* cascade timer 0 and 1 */ + {0x1, 0xc, 0x1}, + /* cascade timer 1 and 2 */ + {0x2, 0x6, 0x2}, + /* cascade timer 2 and 3 */ + {0x4, 0x3, 0x3} +}; + +static LIST_HEAD(timer_group_list); + +static void convert_ticks_to_time(struct timer_group_priv *priv, + const u64 ticks, struct timeval *time) +{ + u64 tmp_sec; + + time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq); + tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; + + time->tv_usec = (__kernel_suseconds_t) + div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq); + + return; +} + +/* the time set by the user is converted to "ticks" */ +static int convert_time_to_ticks(struct timer_group_priv *priv, + const struct timeval *time, u64 *ticks) +{ + u64 max_value; /* prevent u64 overflow */ + u64 tmp = 0; + + u64 tmp_sec; + u64 tmp_ms; + u64 tmp_us; + + max_value = div_u64(ULLONG_MAX, priv->timerfreq); + + if (time->tv_sec > max_value || + (time->tv_sec == max_value && time->tv_usec > 0)) + return -EINVAL; + + tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; + tmp += tmp_sec; + + tmp_ms = time->tv_usec / 1000; + tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000); + tmp += tmp_ms; + + tmp_us = time->tv_usec % 1000; + tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000); + tmp += tmp_us; + + *ticks = tmp; + + return 0; +} + +/* detect whether there is a cascade timer available */ +static struct mpic_timer *detect_idle_cascade_timer( + struct timer_group_priv *priv) +{ + struct cascade_priv *casc_priv; + unsigned int map; + unsigned int array_size = ARRAY_SIZE(cascade_timer); + unsigned int num; + unsigned int i; + unsigned long flags; + + casc_priv = cascade_timer; + for (i = 0; i < array_size; i++) { + spin_lock_irqsave(&priv->lock, flags); + map = casc_priv->cascade_map & priv->idle; + if (map == casc_priv->cascade_map) { + num = casc_priv->timer_num; + priv->timer[num].cascade_handle = casc_priv; + + /* set timer busy */ + priv->idle &= ~casc_priv->cascade_map; + spin_unlock_irqrestore(&priv->lock, flags); + return &priv->timer[num]; + } + spin_unlock_irqrestore(&priv->lock, flags); + casc_priv++; + } + + return NULL; +} + +static int set_cascade_timer(struct timer_group_priv *priv, u64 ticks, + unsigned int num) +{ + struct cascade_priv *casc_priv; + u32 tcr; + u32 tmp_ticks; + u32 rem_ticks; + + /* set group tcr reg for cascade */ + casc_priv = priv->timer[num].cascade_handle; + if (!casc_priv) + return -EINVAL; + + tcr = casc_priv->tcr_value | + (casc_priv->tcr_value << MPIC_TIMER_TCR_ROVR_OFFSET); + setbits32(priv->group_tcr, tcr); + + tmp_ticks = div_u64_rem(ticks, MAX_TICKS_CASCADE, &rem_ticks); + + out_be32(&priv->regs[num].gtccr, 0); + out_be32(&priv->regs[num].gtbcr, tmp_ticks | TIMER_STOP); + + out_be32(&priv->regs[num - 1].gtccr, 0); + out_be32(&priv->regs[num - 1].gtbcr, rem_ticks); + + return 0; +} + +static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv, + u64 ticks) +{ + struct mpic_timer *allocated_timer; + + /* Two cascade timers: Support the maximum time */ + const u64 max_ticks = (u64)MAX_TICKS * (u64)MAX_TICKS_CASCADE; + int ret; + + if (ticks > max_ticks) + return NULL; + + /* detect idle timer */ + allocated_timer = detect_idle_cascade_timer(priv); + if (!allocated_timer) + return NULL; + + /* set ticks to timer */ + ret = set_cascade_timer(priv, ticks, allocated_timer->num); + if (ret < 0) + return NULL; + + return allocated_timer; +} + +static struct mpic_timer *get_timer(const struct timeval *time) +{ + struct timer_group_priv *priv; + struct mpic_timer *timer; + + u64 ticks; + unsigned int num; + unsigned int i; + unsigned long flags; + int ret; + + list_for_each_entry(priv, &timer_group_list, node) { + ret = convert_time_to_ticks(priv, time, &ticks); + if (ret < 0) + return NULL; + + if (ticks > MAX_TICKS) { + if (!(priv->flags & FSL_GLOBAL_TIMER)) + return NULL; + + timer = get_cascade_timer(priv, ticks); + if (!timer) + continue; + + return timer; + } + + for (i = 0; i < TIMERS_PER_GROUP; i++) { + /* one timer: Reverse allocation */ + num = TIMERS_PER_GROUP - 1 - i; + spin_lock_irqsave(&priv->lock, flags); + if (priv->idle & (1 << i)) { + /* set timer busy */ + priv->idle &= ~(1 << i); + /* set ticks & stop timer */ + out_be32(&priv->regs[num].gtbcr, + ticks | TIMER_STOP); + out_be32(&priv->regs[num].gtccr, 0); + priv->timer[num].cascade_handle = NULL; + spin_unlock_irqrestore(&priv->lock, flags); + return &priv->timer[num]; + } + spin_unlock_irqrestore(&priv->lock, flags); + } + } + + return NULL; +} + +/** + * mpic_start_timer - start hardware timer + * @handle: the timer to be started. + * + * It will do ->fn(->dev) callback from the hardware interrupt at + * the ->timeval point in the future. + */ +void mpic_start_timer(struct mpic_timer *handle) +{ + struct timer_group_priv *priv = container_of(handle, + struct timer_group_priv, timer[handle->num]); + + clrbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP); +} +EXPORT_SYMBOL(mpic_start_timer); + +/** + * mpic_stop_timer - stop hardware timer + * @handle: the timer to be stoped + * + * The timer periodically generates an interrupt. Unless user stops the timer. + */ +void mpic_stop_timer(struct mpic_timer *handle) +{ + struct timer_group_priv *priv = container_of(handle, + struct timer_group_priv, timer[handle->num]); + struct cascade_priv *casc_priv; + + setbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP); + + casc_priv = priv->timer[handle->num].cascade_handle; + if (casc_priv) { + out_be32(&priv->regs[handle->num].gtccr, 0); + out_be32(&priv->regs[handle->num - 1].gtccr, 0); + } else { + out_be32(&priv->regs[handle->num].gtccr, 0); + } +} +EXPORT_SYMBOL(mpic_stop_timer); + +/** + * mpic_get_remain_time - get timer time + * @handle: the timer to be selected. + * @time: time for timer + * + * Query timer remaining time. + */ +void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) +{ + struct timer_group_priv *priv = container_of(handle, + struct timer_group_priv, timer[handle->num]); + struct cascade_priv *casc_priv; + + u64 ticks; + u32 tmp_ticks; + + casc_priv = priv->timer[handle->num].cascade_handle; + if (casc_priv) { + tmp_ticks = in_be32(&priv->regs[handle->num].gtccr); + ticks = ((u64)tmp_ticks & UINT_MAX) * (u64)MAX_TICKS_CASCADE; + tmp_ticks = in_be32(&priv->regs[handle->num - 1].gtccr); + ticks += tmp_ticks; + } else { + ticks = in_be32(&priv->regs[handle->num].gtccr); + } + + convert_ticks_to_time(priv, ticks, time); +} +EXPORT_SYMBOL(mpic_get_remain_time); + +/** + * mpic_free_timer - free hardware timer + * @handle: the timer to be removed. + * + * Free the timer. + * + * Note: can not be used in interrupt context. + */ +void mpic_free_timer(struct mpic_timer *handle) +{ + struct timer_group_priv *priv = container_of(handle, + struct timer_group_priv, timer[handle->num]); + + struct cascade_priv *casc_priv; + unsigned long flags; + + mpic_stop_timer(handle); + + casc_priv = priv->timer[handle->num].cascade_handle; + + free_irq(priv->timer[handle->num].irq, priv->timer[handle->num].dev); + + spin_lock_irqsave(&priv->lock, flags); + if (casc_priv) { + u32 tcr; + tcr = casc_priv->tcr_value | (casc_priv->tcr_value << + MPIC_TIMER_TCR_ROVR_OFFSET); + clrbits32(priv->group_tcr, tcr); + priv->idle |= casc_priv->cascade_map; + priv->timer[handle->num].cascade_handle = NULL; + } else { + priv->idle |= TIMER_OFFSET(handle->num); + } + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL(mpic_free_timer); + +/** + * mpic_request_timer - get a hardware timer + * @fn: interrupt handler function + * @dev: callback function of the data + * @time: time for timer + * + * This executes the "request_irq", returning NULL + * else "handle" on success. + */ +struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, + const struct timeval *time) +{ + struct mpic_timer *allocated_timer; + int ret; + + if (list_empty(&timer_group_list)) + return NULL; + + if (!(time->tv_sec + time->tv_usec) || + time->tv_sec < 0 || time->tv_usec < 0) + return NULL; + + if (time->tv_usec > ONE_SECOND) + return NULL; + + allocated_timer = get_timer(time); + if (!allocated_timer) + return NULL; + + ret = request_irq(allocated_timer->irq, fn, + IRQF_TRIGGER_LOW, "global-timer", dev); + if (ret) { + mpic_free_timer(allocated_timer); + return NULL; + } + + allocated_timer->dev = dev; + + return allocated_timer; +} +EXPORT_SYMBOL(mpic_request_timer); + +static int timer_group_get_freq(struct device_node *np, + struct timer_group_priv *priv) +{ + u32 div; + + if (priv->flags & FSL_GLOBAL_TIMER) { + struct device_node *dn; + + dn = of_find_compatible_node(NULL, NULL, "fsl,mpic"); + if (dn) { + of_property_read_u32(dn, "clock-frequency", + &priv->timerfreq); + of_node_put(dn); + } + } + + if (priv->timerfreq <= 0) + return -EINVAL; + + if (priv->flags & FSL_GLOBAL_TIMER) { + div = (1 << (MPIC_TIMER_TCR_CLKDIV >> 8)) * 8; + priv->timerfreq /= div; + } + + return 0; +} + +static int timer_group_get_irq(struct device_node *np, + struct timer_group_priv *priv) +{ + const u32 all_timer[] = { 0, TIMERS_PER_GROUP }; + const u32 *p; + u32 offset; + u32 count; + + unsigned int i; + unsigned int j; + unsigned int irq_index = 0; + unsigned int irq; + int len; + + p = of_get_property(np, "fsl,available-ranges", &len); + if (p && len % (2 * sizeof(u32)) != 0) { + pr_err("%s: malformed available-ranges property.\n", + np->full_name); + return -EINVAL; + } + + if (!p) { + p = all_timer; + len = sizeof(all_timer); + } + + len /= 2 * sizeof(u32); + + for (i = 0; i < len; i++) { + offset = p[i * 2]; + count = p[i * 2 + 1]; + for (j = 0; j < count; j++) { + irq = irq_of_parse_and_map(np, irq_index); + if (!irq) { + pr_err("%s: irq parse and map failed.\n", + np->full_name); + return -EINVAL; + } + + /* Set timer idle */ + priv->idle |= TIMER_OFFSET((offset + j)); + priv->timer[offset + j].irq = irq; + priv->timer[offset + j].num = offset + j; + irq_index++; + } + } + + return 0; +} + +static void timer_group_init(struct device_node *np) +{ + struct timer_group_priv *priv; + unsigned int i = 0; + int ret; + + priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL); + if (!priv) { + pr_err("%s: cannot allocate memory for group.\n", + np->full_name); + return; + } + + if (of_device_is_compatible(np, "fsl,mpic-global-timer")) + priv->flags |= FSL_GLOBAL_TIMER; + + priv->regs = of_iomap(np, i++); + if (!priv->regs) { + pr_err("%s: cannot ioremap timer register address.\n", + np->full_name); + goto out; + } + + if (priv->flags & FSL_GLOBAL_TIMER) { + priv->group_tcr = of_iomap(np, i++); + if (!priv->group_tcr) { + pr_err("%s: cannot ioremap tcr address.\n", + np->full_name); + goto out; + } + } + + ret = timer_group_get_freq(np, priv); + if (ret < 0) { + pr_err("%s: cannot get timer frequency.\n", np->full_name); + goto out; + } + + ret = timer_group_get_irq(np, priv); + if (ret < 0) { + pr_err("%s: cannot get timer irqs.\n", np->full_name); + goto out; + } + + spin_lock_init(&priv->lock); + + /* Init FSL timer hardware */ + if (priv->flags & FSL_GLOBAL_TIMER) + setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV); + + list_add_tail(&priv->node, &timer_group_list); + + return; + +out: + if (priv->regs) + iounmap(priv->regs); + + if (priv->group_tcr) + iounmap(priv->group_tcr); + + kfree(priv); +} + +static void mpic_timer_resume(void) +{ + struct timer_group_priv *priv; + + list_for_each_entry(priv, &timer_group_list, node) { + /* Init FSL timer hardware */ + if (priv->flags & FSL_GLOBAL_TIMER) + setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV); + } +} + +static const struct of_device_id mpic_timer_ids[] = { + { .compatible = "fsl,mpic-global-timer", }, + {}, +}; + +static struct syscore_ops mpic_timer_syscore_ops = { + .resume = mpic_timer_resume, +}; + +static int __init mpic_timer_init(void) +{ + struct device_node *np = NULL; + + for_each_matching_node(np, mpic_timer_ids) + timer_group_init(np); + + register_syscore_ops(&mpic_timer_syscore_ops); + + if (list_empty(&timer_group_list)) + return -ENODEV; + + return 0; +} +subsys_initcall(mpic_timer_init); diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 7ef60b52d6e0..42be53743133 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -32,7 +32,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_mem_data { +struct appldata_mem_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -63,7 +63,7 @@ static struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} __attribute__((packed)) appldata_mem_data; +} __packed; /* @@ -118,7 +118,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), .callback = &appldata_get_mem_data, - .data = &appldata_mem_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -131,7 +130,17 @@ static struct appldata_ops ops = { */ static int __init appldata_mem_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -142,6 +151,7 @@ static int __init appldata_mem_init(void) static void __exit appldata_mem_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 2d224b945355..66037d2622b4 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -29,7 +29,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_net_sum_data { +struct appldata_net_sum_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -51,7 +51,7 @@ static struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} __attribute__((packed)) appldata_net_sum_data; +} __packed; /* @@ -121,7 +121,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), .callback = &appldata_get_net_sum_data, - .data = &appldata_net_sum_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -134,7 +133,17 @@ static struct appldata_ops ops = { */ static int __init appldata_net_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -145,6 +154,7 @@ static int __init appldata_net_init(void) static void __exit appldata_net_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 7fd3690b6760..138893e5f736 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -651,9 +651,7 @@ static int hypfs_create_cpu_files(struct super_block *sb, } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + return PTR_RET(rc); } static void *hypfs_create_lpar_files(struct super_block *sb, @@ -702,9 +700,7 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb, return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + return PTR_RET(rc); } static void *hypfs_create_phys_files(struct super_block *sb, diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 9819891ed7a2..4066cee0c2d2 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -9,9 +9,18 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H -typedef void (*adapter_int_handler_t)(void *, void *); +struct airq_struct { + struct hlist_node list; /* Handler queueing. */ + void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ + u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ + u8 lsi_mask; /* Local-Summary-Indicator mask */ + u8 isc; /* Interrupt-subclass */ + u8 flags; +}; -void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8); -void s390_unregister_adapter_interrupt(void *, u8); +#define AIRQ_PTR_ALLOCATED 0x01 + +int register_adapter_interrupt(struct airq_struct *airq); +void unregister_adapter_interrupt(struct airq_struct *airq); #endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 2f8c1abeb086..3fbc67d9e197 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -53,7 +53,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) debug_dma_mapping_error(dev, dma_addr); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); - return (dma_addr == DMA_ERROR_CODE); + return dma_addr == DMA_ERROR_CODE; } static inline void *dma_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 2ee66a65f2d4..0aa6a7ed95a3 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -13,6 +13,16 @@ #define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +static inline int __test_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) facilities + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + /* * The test_facility function uses the bit odering where the MSB is bit 0. * That makes it easier to query facility bits with the bit number as @@ -20,12 +30,7 @@ */ static inline int test_facility(unsigned long nr) { - unsigned char *ptr; - - if (nr >= MAX_FACILITY_BIT) - return 0; - ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); - return (*ptr & (0x80 >> (nr & 7))) != 0; + return __test_facility(nr, &S390_lowcore.stfle_fac_list); } /** diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fd9be010f9b2..cd6b9ee7b69c 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -13,28 +13,6 @@ #include <asm/page.h> #include <asm/pci_io.h> -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(volatile void * address) -{ - unsigned long real_address; - asm volatile( - " lra %0,0(%1)\n" - " jz 0f\n" - " la %0,0\n" - "0:" - : "=a" (real_address) : "a" (address) : "cc"); - return real_address; -} -#define virt_to_phys virt_to_phys - -static inline void * phys_to_virt(unsigned long address) -{ - return (void *) address; -} - void *xlate_dev_mem_ptr(unsigned long phys); #define xlate_dev_mem_ptr xlate_dev_mem_ptr void unxlate_dev_mem_ptr(unsigned long phys, void *addr); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cdb..3238d4004e84 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -62,13 +62,20 @@ struct sca_block { #define CPUSTAT_MCDS 0x00000100 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 #define CPUSTAT_J 0x00000002 #define CPUSTAT_P 0x00000001 struct kvm_s390_sie_block { atomic_t cpuflags; /* 0x0000 */ __u32 prefix; /* 0x0004 */ - __u8 reserved8[32]; /* 0x0008 */ + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + __u8 reserved10[16]; /* 0x0010 */ +#define PROG_BLOCK_SIE 0x00000001 + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ __u64 cputm; /* 0x0028 */ __u64 ckc; /* 0x0030 */ __u64 epoch; /* 0x0038 */ @@ -90,7 +97,8 @@ struct kvm_s390_sie_block { __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ __u8 ecb; /* 0x0061 */ - __u8 reserved62[2]; /* 0x0062 */ + __u8 ecb2; /* 0x0062 */ + __u8 reserved63[1]; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ @@ -130,6 +138,7 @@ struct kvm_vcpu_stat { u32 deliver_program_int; u32 deliver_io_int; u32 exit_wait_state; + u32 instruction_pfmf; u32 instruction_stidp; u32 instruction_spx; u32 instruction_stpx; @@ -166,7 +175,7 @@ struct kvm_s390_ext_info { }; #define PGM_OPERATION 0x01 -#define PGM_PRIVILEGED_OPERATION 0x02 +#define PGM_PRIVILEGED_OP 0x02 #define PGM_EXECUTE 0x03 #define PGM_PROTECTION 0x04 #define PGM_ADDRESSING 0x05 @@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt { atomic_t active; struct kvm_s390_float_interrupt *float_int; int timer_due; /* event indicator for waitqueue below */ - wait_queue_head_t wq; + wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; }; @@ -266,4 +275,5 @@ struct kvm_arch{ }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); +extern char sie_exit; #endif diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6c1801235db9..6e577ba0e5da 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -120,7 +120,6 @@ struct zpci_dev { struct dentry *debugfs_dev; struct dentry *debugfs_perf; - struct dentry *debugfs_debug; }; struct pci_hp_callback_ops { @@ -143,7 +142,6 @@ int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); void zpci_stop_device(struct zpci_dev *); void zpci_free_device(struct zpci_dev *); -int zpci_scan_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 5f0173a31693..1141fb3e7b21 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -14,3 +14,13 @@ /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 + +#ifdef CONFIG_64BIT + +/* Perf callbacks */ +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 590c3219c634..e1408ddb94f8 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,6 +22,9 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq); + static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9aefa3c64eb2..75fb726de91f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -296,18 +296,16 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf0000000UL -#define RCP_FP_BIT 0x08000000UL -#define RCP_PCL_BIT 0x00800000UL -#define RCP_HR_BIT 0x00400000UL -#define RCP_HC_BIT 0x00200000UL -#define RCP_GR_BIT 0x00040000UL -#define RCP_GC_BIT 0x00020000UL -#define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x00008000UL -#define KVM_UC_BIT 0x00004000UL +#define PGSTE_ACC_BITS 0xf0000000UL +#define PGSTE_FP_BIT 0x08000000UL +#define PGSTE_PCL_BIT 0x00800000UL +#define PGSTE_HR_BIT 0x00400000UL +#define PGSTE_HC_BIT 0x00200000UL +#define PGSTE_GR_BIT 0x00040000UL +#define PGSTE_GC_BIT 0x00020000UL +#define PGSTE_UR_BIT 0x00008000UL +#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */ #else /* CONFIG_64BIT */ @@ -364,18 +362,16 @@ extern unsigned long MODULES_END; | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf000000000000000UL -#define RCP_FP_BIT 0x0800000000000000UL -#define RCP_PCL_BIT 0x0080000000000000UL -#define RCP_HR_BIT 0x0040000000000000UL -#define RCP_HC_BIT 0x0020000000000000UL -#define RCP_GR_BIT 0x0004000000000000UL -#define RCP_GC_BIT 0x0002000000000000UL -#define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x0000800000000000UL -#define KVM_UC_BIT 0x0000400000000000UL +#define PGSTE_ACC_BITS 0xf000000000000000UL +#define PGSTE_FP_BIT 0x0800000000000000UL +#define PGSTE_PCL_BIT 0x0080000000000000UL +#define PGSTE_HR_BIT 0x0040000000000000UL +#define PGSTE_HC_BIT 0x0020000000000000UL +#define PGSTE_GR_BIT 0x0004000000000000UL +#define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_UR_BIT 0x0000800000000000UL +#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ #endif /* CONFIG_64BIT */ @@ -615,8 +611,8 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) asm( " lg %0,%2\n" "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ - " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ " csg %0,%1,%2\n" " jl 0b\n" : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) @@ -629,7 +625,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE asm( - " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " nihh %1,0xff7f\n" /* clear PCL bit */ " stg %1,%0\n" : "=Q" (ptep[PTRS_PER_PTE]) : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) @@ -662,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) else if (bits) page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ /* Get host changed & referenced bits from pgste */ - bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52; /* Transfer page changed & referenced bit to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ /* Clear relevant host bits in pgste. */ - pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); - pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); /* Copy page access key and fetch protection bit to pgste */ pgste_val(pgste) |= (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; @@ -690,15 +686,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) /* Get referenced bit from storage key */ young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); if (young) - pgste_val(pgste) |= RCP_GR_BIT; + pgste_val(pgste) |= PGSTE_GR_BIT; /* Get host referenced bit from pgste */ - if (pgste_val(pgste) & RCP_HR_BIT) { - pgste_val(pgste) &= ~RCP_HR_BIT; + if (pgste_val(pgste) & PGSTE_HR_BIT) { + pgste_val(pgste) &= ~PGSTE_HR_BIT; young = 1; } /* Transfer referenced bit to kvm user bits and pte */ if (young) { - pgste_val(pgste) |= KVM_UR_BIT; + pgste_val(pgste) |= PGSTE_UR_BIT; pte_val(*ptep) |= _PAGE_SWR; } #endif @@ -720,7 +716,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) * The guest C/R information is still in the PGSTE, set real * key C/R to 0. */ - nkey = (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; page_set_storage_key(address, nkey, 0); #endif } @@ -750,6 +746,7 @@ struct gmap { struct mm_struct *mm; unsigned long *table; unsigned long asce; + void *private; struct list_head crst_list; }; @@ -808,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - if (pgste_val(pgste) & RCP_IN_BIT) { - pgste_val(pgste) &= ~RCP_IN_BIT; + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; gmap_do_ipte_notify(mm, addr, ptep); } #endif @@ -977,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_all(ptep, pgste); - dirty = !!(pgste_val(pgste) & KVM_UC_BIT); - pgste_val(pgste) &= ~KVM_UC_BIT; + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; pgste_set_unlock(ptep, pgste); return dirty; } @@ -997,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_young(ptep, pgste); - young = !!(pgste_val(pgste) & KVM_UR_BIT); - pgste_val(pgste) &= ~KVM_UR_BIT; + young = !!(pgste_val(pgste) & PGSTE_UR_BIT); + pgste_val(pgste) &= ~PGSTE_UR_BIT; pgste_set_unlock(ptep, pgste); } return young; @@ -1367,10 +1364,11 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 559512a455da..52b56533c57c 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -24,6 +24,7 @@ struct pt_regs unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; unsigned int int_code; + unsigned int int_parm; unsigned long int_parm_long; }; diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 9ccd1905bdad..6a9a9eb645f5 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -35,6 +35,7 @@ header-y += siginfo.h header-y += signal.h header-y += socket.h header-y += sockios.h +header-y += sclp_ctl.h header-y += stat.h header-y += statfs.h header-y += swab.h diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index 1c6a7f85a581..65dc694725a8 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -29,6 +29,16 @@ struct chsc_async_area { __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; } __attribute__ ((packed)); +struct chsc_header { + __u16 length; + __u16 code; +} __attribute__ ((packed)); + +struct chsc_sync_area { + struct chsc_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; +} __attribute__ ((packed)); + struct chsc_response_struct { __u16 length; __u16 code; @@ -126,5 +136,8 @@ struct chsc_cpd_info { #define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area) +#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area) +#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b) #endif diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 38eca3ba40e2..5812a3b2df9e 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data { #define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) /* Resume IO on device */ #define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +/* Abort all I/O on a device */ +#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) +/* Allow I/O on a device */ +#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241) /* retrieve API version number */ diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h new file mode 100644 index 000000000000..f2818613ee41 --- /dev/null +++ b/arch/s390/include/uapi/asm/sclp_ctl.h @@ -0,0 +1,24 @@ +/* + * IOCTL interface for SCLP + * + * Copyright IBM Corp. 2012 + * + * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifndef _ASM_SCLP_CTL_H +#define _ASM_SCLP_CTL_H + +#include <linux/types.h> + +struct sclp_ctl_sccb { + __u32 cmdw; + __u64 sccb; +} __attribute__((packed)); + +#define SCLP_CTL_IOCTL_MAGIC 0x10 + +#define SCLP_CTL_SCCB \ + _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb) + +#endif diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 2dacb306835c..0c5105fbaaf3 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 7a82f9f70100..2416138ebd3e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define ASM_OFFSETS_C #include <linux/kbuild.h> +#include <linux/kvm_host.h> #include <linux/sched.h> #include <asm/cputime.h> #include <asm/vdso.h> @@ -47,6 +48,7 @@ int main(void) DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -161,6 +163,8 @@ int main(void) DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4d5e6f8a7978..be7a408be7a1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -429,11 +429,19 @@ io_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -573,10 +581,10 @@ ext_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS TRACE_IRQS_OFF lr %r2,%r11 # pass pointer to pt_regs - l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 # call do_extint j io_return diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index aa0ab02e9595..3ddbc26d246e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -54,7 +54,7 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka, void do_notify_resume(struct pt_regs *regs); struct ext_code; -void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); +void do_extint(struct pt_regs *regs); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 4c17eece707e..1c039d0c24c7 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #endif .endm - .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck + .macro HANDLE_SIE_INTERCEPT scratch,reason #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) tmhh %r8,0x0001 # interrupting from user ? - jnz .+42 + jnz .+62 lgr \scratch,%r9 - slg \scratch,BASED(.Lsie_loop) - clg \scratch,BASED(.Lsie_length) - .if \pgmcheck + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 # Some program interrupts are suppressing (e.g. protection). # We must also check the instruction after SIE in that case. # do_protection_exception will rewind to rewind_pad - jh .+22 + jh .+42 .else - jhe .+22 + jhe .+42 .endif - lg %r9,BASED(.Lsie_loop) - LPP BASED(.Lhost_id) # set host id + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason #endif .endm @@ -450,7 +453,7 @@ ENTRY(io_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_IO_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,2 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user? jz io_skip @@ -460,10 +463,18 @@ io_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -595,7 +606,7 @@ ENTRY(ext_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_EXT_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,3 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user ? jz ext_skip @@ -605,13 +616,13 @@ ext_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs - llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - llgf %r4,__LC_EXT_PARAMS # get external parameter - lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint j io_return @@ -643,7 +654,7 @@ ENTRY(mcck_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_MCK_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_panic # yes -> rest of mcck code invalid lghi %r14,__LC_CPU_TIMER_SAVE_AREA @@ -937,56 +948,50 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason lmg %r0,%r13,0(%r3) # load guest gprs 0-13 -# some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions in the sie_loop should not cause program interrupts. So -# lets use a nop (47 00 00 00) as a landing pad. -# See also HANDLE_SIE_INTERCEPT -rewind_pad: - nop 0 -sie_loop: - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - tm __TI_flags+7(%r14),_TIF_EXIT_SIE - jnz sie_exit lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz sie_gmap lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done LPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) sie_done: LPP __SF_EMPTY+16(%r15) # set host id - lg %r14,__LC_THREAD_INFO # pointer thread_info struct -sie_exit: + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions beween sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,0 + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code br %r14 sie_fault: - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - lg %r14,__SF_EMPTY+8(%r15) # load guest register save area - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,-EFAULT - br %r14 + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit .align 8 -.Lsie_loop: - .quad sie_loop -.Lsie_length: - .quad sie_done - sie_loop -.Lhost_id: - .quad 0 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap EX_TABLE(rewind_pad,sie_fault) - EX_TABLE(sie_loop,sie_fault) + EX_TABLE(sie_exit,sie_fault) #endif .section .rodata, "a" diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index dd3c1994b8bd..54b0995514e8 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -234,9 +234,9 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, - unsigned int param32, unsigned long param64) +void __irq_entry do_extint(struct pt_regs *regs) { + struct ext_code ext_code; struct pt_regs *old_regs; struct ext_int_info *p; int index; @@ -248,6 +248,7 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, clock_comparator_work(); } kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); + ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; @@ -255,7 +256,8 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, rcu_read_lock(); list_for_each_entry_rcu(p, &ext_int_hash[index], entry) if (likely(p->code == ext_code.code)) - p->handler(ext_code, param32, param64); + p->handler(ext_code, regs->int_parm, + regs->int_parm_long); rcu_read_unlock(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index f58f37f66824..a6fc037671b1 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/perf_event.h> +#include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/export.h> #include <asm/irq.h> @@ -39,6 +40,57 @@ int perf_num_counters(void) } EXPORT_SYMBOL(perf_num_counters); +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + unsigned long ip = instruction_pointer(regs); + + if (user_mode(regs)) + return false; + + return ip == (unsigned long) &sie_exit; +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + void perf_event_print_debug(void) { struct cpumf_ctr_info cf_info; diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9bdbcef1da9e..3bac589844a7 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount); #endif #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); #endif EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095104c9..497451ec5e26 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,10 +719,6 @@ static void reserve_oldmem(void) } create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); - if (OLDMEM_BASE + OLDMEM_SIZE == real_size) - saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; - else - saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4f977d0d25c2..15a016c10563 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -49,7 +49,6 @@ enum { ec_schedule = 0, - ec_call_function, ec_call_function_single, ec_stop_cpu, }; @@ -438,8 +437,6 @@ static void smp_handle_ext_call(void) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) scheduler_ipi(); - if (test_bit(ec_call_function, &bits)) - generic_smp_call_function_interrupt(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); } @@ -456,7 +453,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a4585..40b4c6470f88 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1c01a9912989..3074475c8ae0 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + trace_kvm_s390_handle_diag(vcpu, code); switch (code) { case 0x10: diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b7d1b2edeeb3..5ee56e5acc23 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,87 +22,6 @@ #include "trace.h" #include "trace-s390.h" -static int handle_lctlg(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - int reg, rc; - - vcpu->stat.instruction_lctlg++; - - useraddr = kvm_s390_get_base_disp_rsy(vcpu); - - if (useraddr & 7) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - reg = reg1; - - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); - - do { - rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], - (u64 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - useraddr += 8; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static int handle_lctl(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - u32 val = 0; - int reg, rc; - - vcpu->stat.instruction_lctl++; - - useraddr = kvm_s390_get_base_disp_rs(vcpu); - - if (useraddr & 3) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); - - reg = reg1; - do { - rc = get_guest(vcpu, val, (u32 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - useraddr += 4; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static const intercept_handler_t eb_handlers[256] = { - [0x2f] = handle_lctlg, - [0x8a] = kvm_s390_handle_priv_eb, -}; - -static int handle_eb(struct kvm_vcpu *vcpu) -{ - intercept_handler_t handler; - - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; -} static const intercept_handler_t instruction_handlers[256] = { [0x01] = kvm_s390_handle_01, @@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = { [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, - [0xb7] = handle_lctl, + [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, [0xe5] = kvm_s390_handle_e5, - [0xeb] = handle_eb, + [0xeb] = kvm_s390_handle_eb, }; static int handle_noop(struct kvm_vcpu *vcpu) @@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu) { - unsigned long vmaddr; int viwhy = vcpu->arch.sie_block->ipb >> 16; - int rc; vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - if (viwhy == 0x37) { - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - } else - rc = -EOPNOTSUPP; - -out: - if (rc) - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return rc; + WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); + return -EOPNOTSUPP; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c948177529e..7f35cb33e510 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) no_timer: spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->arch.local_int.wq, &wait); + add_wait_queue(&vcpu->wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && (!vcpu->arch.local_int.timer_due) && @@ -452,7 +452,7 @@ no_timer: } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->arch.local_int.wq, &wait); + remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); @@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm) spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->arch.local_int.wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock(&vcpu->arch.local_int.lock); } @@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) spin_lock_bh(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); - BUG_ON(waitqueue_active(&li->wq)); + BUG_ON(waitqueue_active(li->wq)); spin_unlock_bh(&li->lock); return 0; } @@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, li = fi->local_int[sigcpu]; spin_lock_bh(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c1c7c683fa26..ba694d2ba51e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, { "instruction_stidp", VCPU_STAT(instruction_stidp) }, { "instruction_spx", VCPU_STAT(instruction_spx) }, { "instruction_stpx", VCPU_STAT(instruction_stpx) }, @@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { }; static unsigned long long *facilities; +static struct gmap_notifier gmap_notifier; /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) @@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage) { } +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); + int kvm_arch_hardware_setup(void) { + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_ipte_notifier(&gmap_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { + gmap_unregister_ipte_notifier(&gmap_notifier); } void kvm_arch_check_processor_compat(void *rtn) @@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.gmap = gmap_alloc(current->mm); if (!kvm->arch.gmap) goto out_nogmap; + kvm->arch.gmap->private = kvm; } kvm->arch.css_support = 0; @@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.gmap = gmap_alloc(current->mm); if (!vcpu->arch.gmap) return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; return 0; } @@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | - CPUSTAT_STOPPED); + CPUSTAT_STOPPED | + CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; + vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) facilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); @@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = -ENOMEM; - vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) goto out; @@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; spin_lock(&kvm->arch.float_int.lock); kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; - init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; spin_unlock(&kvm->arch.float_int.lock); @@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); out: return ERR_PTR(rc); } @@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return 0; } +void s390_vcpu_block(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +/* + * Kick a guest cpu out of SIE and wait until SIE is not running. + * If the CPU is not running (e.g. waiting as idle) the function will + * return immediately. */ +void exit_sie(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* Kick a guest cpu out of SIE and prevent SIE-reentry */ +void exit_sie_sync(struct kvm_vcpu *vcpu) +{ + s390_vcpu_block(vcpu); + exit_sie(vcpu); +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +{ + int i; + struct kvm *kvm = gmap->private; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* match against both prefix pages */ + if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + exit_sie_sync(vcpu); + } + } +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ @@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) +{ + /* + * We use MMU_RELOAD just to re-arm the ipte notifier for the + * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * This ensures that the ipte instruction for this request has + * already finished. We might race against a second unmapper that + * wants to set the blocking bit. Lets just retry the request loop. + */ + while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + int rc; + rc = gmap_ipte_notify(vcpu->arch.gmap, + vcpu->arch.sie_block->prefix, + PAGE_SIZE * 2); + if (rc) + return rc; + s390_vcpu_unblock(vcpu); + } + return 0; +} + static int __vcpu_run(struct kvm_vcpu *vcpu) { int rc; @@ -621,6 +697,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) if (!kvm_is_ucontrol(vcpu->kvm)) kvm_s390_deliver_pending_interrupts(vcpu); + rc = kvm_s390_handle_requests(vcpu); + if (rc) + return rc; + vcpu->arch.sie_block->icptcode = 0; preempt_disable(); kvm_guest_enter(); @@ -630,7 +710,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) trace_kvm_s390_sie_enter(vcpu, atomic_read(&vcpu->arch.sie_block->cpuflags)); rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - if (rc) { + if (rc > 0) + rc = 0; + if (rc < 0) { if (kvm_is_ucontrol(vcpu->kvm)) { rc = SIE_INTERCEPT_UCONTROL; } else { @@ -1046,7 +1128,7 @@ static int __init kvm_s390_init(void) return -ENOMEM; } memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff00fff3f47c0000ULL; + facilities[0] &= 0xff82fff3f47c0000ULL; facilities[1] &= 0x001c000000000000ULL; return 0; } @@ -1059,3 +1141,12 @@ static void __exit kvm_s390_exit(void) module_init(kvm_s390_init); module_exit(kvm_s390_exit); + +/* + * Enable autoloading of the kvm module. + * Note that we add the module alias here instead of virt/kvm/kvm_main.c + * since x86 takes a different approach. + */ +#include <linux/miscdevice.h> +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index efc14f687265..028ca9fd2158 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) @@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) +{ + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; +} + static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; @@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); @@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +void s390_vcpu_block(struct kvm_vcpu *vcpu); +void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void exit_sie(struct kvm_vcpu *vcpu); +void exit_sie_sync(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6bbd7b5a0bbe..0da3e6eb6be6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,7 +1,7 @@ /* * handling privileged instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -20,6 +20,9 @@ #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> #include "gaccess.h" @@ -34,6 +37,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_spx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -65,6 +71,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stpx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -89,6 +98,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stap++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + useraddr = kvm_s390_get_base_disp_s(vcpu); if (useraddr & 1) @@ -105,7 +117,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int handle_skey(struct kvm_vcpu *vcpu) { vcpu->stat.instruction_storage_key++; - vcpu->arch.sie_block->gpsw.addr -= 4; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -129,9 +146,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * Store the two-word I/O interruption code into the * provided area. */ - put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); - put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); - put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr) + || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2)) + || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } else { /* * Store the three-word I/O interruption code into @@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + if (vcpu->kvm->arch.css_support) { /* * Most I/O instructions will be handled by userspace. @@ -210,8 +231,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu) int rc; vcpu->stat.instruction_stfl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; + facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); @@ -255,8 +280,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) u64 addr; if (gpsw->mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -278,6 +303,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -296,6 +324,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stidp++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); if (operand2 & 7) @@ -351,16 +382,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); - operand2 = kvm_s390_get_base_disp_s(vcpu); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (fc > 3) { + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ + return 0; + } - if (operand2 & 0xfff && fc > 0) + if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 + || vcpu->run->s.regs.gprs[1] & 0xffff0000) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - switch (fc) { - case 0: + if (fc == 0) { vcpu->run->s.regs.gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ return 0; + } + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 0xfff) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: mem = get_zeroed_page(GFP_KERNEL); @@ -377,8 +422,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; - default: - goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { @@ -432,20 +475,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) intercept_handler_t handler; /* - * a lot of B2 instructions are priviledged. We first check for - * the privileged ones, that we can handle in the kernel. If the - * kernel can handle this instruction, we check for the problem - * state bit and (a) handle the instruction or (b) send a code 2 - * program check. - * Anything else goes to userspace.*/ + * A lot of B2 instructions are priviledged. Here we check for + * the privileged ones, that we can handle in the kernel. + * Anything else goes to userspace. + */ handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } @@ -453,8 +490,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu) { int reg1, reg2; - reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; - reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; @@ -467,9 +503,88 @@ static int handle_epsw(struct kvm_vcpu *vcpu) return 0; } +#define PFMF_RESERVED 0xfffc0101UL +#define PFMF_SK 0x00020000UL +#define PFMF_CF 0x00010000UL +#define PFMF_UI 0x00008000UL +#define PFMF_FSC 0x00007000UL +#define PFMF_NQ 0x00000800UL +#define PFMF_MR 0x00000400UL +#define PFMF_MC 0x00000200UL +#define PFMF_KEY 0x000000feUL + +static int handle_pfmf(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + unsigned long start, end; + + vcpu->stat.instruction_pfmf++; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + if (!MACHINE_HAS_PFMF) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Only provide non-quiescing support if the host supports it */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && + S390_lowcore.stfl_fac_list & 0x00020000) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* No support for conditional-SSKE */ + if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + case 0x00000000: + end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + break; + case 0x00001000: + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + break; + /* We dont support EDAT2 + case 0x00002000: + end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + break;*/ + default: + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + } + while (start < end) { + unsigned long useraddr; + + useraddr = gmap_translate(start, vcpu->arch.gmap); + if (IS_ERR((void *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (clear_user((void __user *)useraddr, PAGE_SIZE)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { + if (set_guest_storage_key(current->mm, useraddr, + vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + start += PAGE_SIZE; + } + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) + vcpu->run->s.regs.gprs[reg2] = end; + return 0; +} + static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, [0x9c] = handle_io_inst, + [0xaf] = handle_pfmf, }; int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) @@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) /* This is handled just as for the B2 instructions. */ handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if ((handler != handle_epsw) && - (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rs(vcpu); + + if (useraddr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); + + reg = reg1; + do { + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rsy(vcpu); + + if (useraddr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); + + do { + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + static const intercept_handler_t eb_handlers[256] = { + [0x2f] = handle_lctlg, [0x8a] = handle_io_inst, }; -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu) +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) { intercept_handler_t handler; - /* All eb instructions that end up here are privileged. */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; if (handler) return handler(vcpu); @@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu) vcpu->stat.instruction_tprot++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); /* we only handle the Linux memory detection case: @@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) u32 value; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) return kvm_s390_inject_program_int(vcpu, diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1c48ab2845e0..bec398c57acf 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); @@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); @@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); out: spin_unlock_bh(&li->lock); @@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); @@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) /* sigp in userspace can exit */ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4008f2..ce36ea80e4f9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -135,30 +135,17 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); @@ -166,13 +153,14 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a938b548f07e..a8154a1a2c94 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -689,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) entry = *ptep; if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= RCP_IN_BIT; + pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); start += PAGE_SIZE; len -= PAGE_SIZE; @@ -771,6 +771,54 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits; + unsigned char skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Set storage key ACC and FP */ + page_set_storage_key(address, + (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)), + !nq); + + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + /* Transfer skey changed & referenced bit to kvm user bits */ + pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(*ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + #else /* CONFIG_PGSTE */ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, @@ -1117,7 +1165,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -1131,7 +1180,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 1912f3bb190c..0022e1ebfbde 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -81,8 +81,8 @@ struct hws_data_entry { unsigned int:16; unsigned int prim_asn:16; /* primary ASN */ unsigned long long ia; /* Instruction Address */ - unsigned long long lpp; /* Logical-Partition Program Param. */ - unsigned long long vpp; /* Virtual-Machine Program Param. */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ }; struct hws_trailer_entry { diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index f1e5be85d592..e2956ad39a4f 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -82,10 +82,13 @@ struct intr_bucket { static struct intr_bucket *bucket; -/* Adapter local summary indicator */ -static u8 *zpci_irq_si; +/* Adapter interrupt definitions */ +static void zpci_irq_handler(struct airq_struct *airq); -static atomic_t irq_retries = ATOMIC_INIT(0); +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; /* I/O Map */ static DEFINE_SPINLOCK(zpci_iomap_lock); @@ -404,7 +407,7 @@ static struct pci_ops pci_root_ops = { /* store the last handled bit to implement fair scheduling of devices */ static DEFINE_PER_CPU(unsigned long, next_sbit); -static void zpci_irq_handler(void *dont, void *need) +static void zpci_irq_handler(struct airq_struct *airq) { unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); int rescan = 0, max = aisb_max; @@ -452,7 +455,6 @@ scan: max = aisb_max; sbit = find_first_bit_left(bucket->aisb, max); if (sbit != max) { - atomic_inc(&irq_retries); rescan++; goto scan; } @@ -565,7 +567,21 @@ static void zpci_map_resources(struct zpci_dev *zdev) pr_debug("BAR%i: -> start: %Lx end: %Lx\n", i, pdev->resource[i].start, pdev->resource[i].end); } -}; +} + +static void zpci_unmap_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pci_iounmap(pdev, (void *) pdev->resource[i].start); + } +} struct zpci_dev *zpci_alloc_device(void) { @@ -701,25 +717,20 @@ static int __init zpci_irq_init(void) goto out_alloc; } - isc_register(PCI_ISC); - zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC); - if (IS_ERR(zpci_irq_si)) { - rc = PTR_ERR(zpci_irq_si); - zpci_irq_si = NULL; + rc = register_adapter_interrupt(&zpci_airq); + if (rc) goto out_ai; - } + /* Set summary to 1 to be called every time for the ISC. */ + *zpci_airq.lsi_ptr = 1; for_each_online_cpu(cpu) per_cpu(next_sbit, cpu) = 0; spin_lock_init(&bucket->lock); - /* set summary to 1 to be called every time for the ISC */ - *zpci_irq_si = 1; set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; out_ai: - isc_unregister(PCI_ISC); free_page((unsigned long) bucket->alloc); out_alloc: free_page((unsigned long) bucket->aisb); @@ -732,21 +743,10 @@ static void zpci_irq_exit(void) { free_page((unsigned long) bucket->alloc); free_page((unsigned long) bucket->aisb); - s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC); - isc_unregister(PCI_ISC); + unregister_adapter_interrupt(&zpci_airq); kfree(bucket); } -void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m) -{ - if (!zdev) - return; - - seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries)); - seq_printf(m, "aibv[0]:%016lx aibv[1]:%016lx aisb:%016lx\n", - get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb); -} - static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, unsigned long flags, int domain) { @@ -810,6 +810,16 @@ int pcibios_add_device(struct pci_dev *pdev) return 0; } +void pcibios_release_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_unmap_resources(zdev); + zpci_fmb_disable_device(zdev); + zpci_debug_exit_device(zdev); + zdev->pdev = NULL; +} + static int zpci_scan_bus(struct zpci_dev *zdev) { struct resource *res; @@ -950,25 +960,6 @@ void zpci_stop_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_stop_device); -int zpci_scan_device(struct zpci_dev *zdev) -{ - zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN); - if (!zdev->pdev) { - pr_err("pci_scan_single_device failed for fid: 0x%x\n", - zdev->fid); - goto out; - } - - pci_bus_add_devices(zdev->bus); - - return 0; -out: - zpci_dma_exit_device(zdev); - clp_disable_fh(zdev); - return -EIO; -} -EXPORT_SYMBOL_GPL(zpci_scan_device); - static inline int barsize(u8 size) { return (size) ? (1 << size) >> 10 : 0; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index bd34359d1546..2e9539625d93 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -236,7 +236,6 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) return 0; - dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh); rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); if (!rc) /* Success -> store disabled handle in zdev */ diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 771b82359af4..75c69b402e05 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -115,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -static int pci_debug_show(struct seq_file *m, void *v) -{ - struct zpci_dev *zdev = m->private; - - zpci_debug_info(zdev, m); - return 0; -} - -static int pci_debug_seq_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, pci_debug_show, - file_inode(filp)->i_private); -} - -static const struct file_operations debugfs_pci_debug_fops = { - .open = pci_debug_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void zpci_debug_init_device(struct zpci_dev *zdev) { zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), @@ -149,19 +128,11 @@ void zpci_debug_init_device(struct zpci_dev *zdev) &debugfs_pci_perf_fops); if (IS_ERR(zdev->debugfs_perf)) zdev->debugfs_perf = NULL; - - zdev->debugfs_debug = debugfs_create_file("debug", - S_IFREG | S_IRUGO | S_IWUSR, - zdev->debugfs_dev, zdev, - &debugfs_pci_debug_fops); - if (IS_ERR(zdev->debugfs_debug)) - zdev->debugfs_debug = NULL; } void zpci_debug_exit_device(struct zpci_dev *zdev) { debugfs_remove(zdev->debugfs_perf); - debugfs_remove(zdev->debugfs_debug); debugfs_remove(zdev->debugfs_dev); } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f8e69d5bc0a9..a2343c1f6e04 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -263,7 +263,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long nr_pages, iommu_page_index; unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; @@ -304,7 +304,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long iommu_page_index; int npages; @@ -323,7 +323,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); struct page *page; unsigned long pa; dma_addr_t map; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a42cce69d0a0..e99a2557f186 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -15,40 +15,36 @@ static ssize_t show_fid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fid); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fid); } static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL); static ssize_t show_fh(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fh); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fh); } static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL); static ssize_t show_pchid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%04x\n", zdev->pchid); - return strlen(buf); + return sprintf(buf, "0x%04x\n", zdev->pchid); } static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL); static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%02x\n", zdev->pfgid); - return strlen(buf); + return sprintf(buf, "0x%02x\n", zdev->pfgid); } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index cebaff8069a1..e1c7bb999b06 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -3,3 +3,4 @@ header-y += generic-y += clkdev.h generic-y += trace_clock.h +generic-y += xor.h diff --git a/arch/score/include/asm/dma-mapping.h b/arch/score/include/asm/dma-mapping.h deleted file mode 100644 index f9c0193c7a53..000000000000 --- a/arch/score/include/asm/dma-mapping.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_DMA_MAPPING_H -#define _ASM_SCORE_DMA_MAPPING_H - -#include <asm-generic/dma-mapping-broken.h> - -#endif /* _ASM_SCORE_DMA_MAPPING_H */ diff --git a/arch/score/kernel/vmlinux.lds.S b/arch/score/kernel/vmlinux.lds.S index eebcbaa4e978..7274b5c4287e 100644 --- a/arch/score/kernel/vmlinux.lds.S +++ b/arch/score/kernel/vmlinux.lds.S @@ -49,6 +49,7 @@ SECTIONS } . = ALIGN(16); + _sdata = .; /* Start of data section */ RODATA EXCEPTION_TABLE(16) diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 47b600e4b2c5..6b18fb0189ae 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -172,10 +172,10 @@ out_of_memory: down_read(&mm->mmap_sem); goto survive; } - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - goto no_context; + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 0940682ab38b..9fbce49ad3bd 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -75,40 +75,19 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long tmp, ram = 0; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_page(); /* Setup zeroed pages. */ - reservedpages = 0; - - for (tmp = 0; tmp < max_low_pfn; tmp++) - if (page_is_ram(tmp)) { - ram++; - if (PageReserved(pfn_to_page(tmp))) - reservedpages++; - } - - num_physpages = ram; - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), codesize >> 10, - reservedpages << (PAGE_SHIFT-10), datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); + + mem_init_print_info(NULL); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sh/boards/board-espt.c b/arch/sh/boards/board-espt.c index d71a0bcf8145..4d94dff9015c 100644 --- a/arch/sh/boards/board-espt.c +++ b/arch/sh/boards/board-espt.c @@ -85,7 +85,7 @@ static struct sh_eth_plat_data sh7763_eth_pdata = { }; static struct platform_device espt_eth_device = { - .name = "sh-eth", + .name = "sh7763-gether", .resource = sh_eth_resources, .num_resources = ARRAY_SIZE(sh_eth_resources), .dev = { diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c index 41f86702eb9f..4f114d1cd019 100644 --- a/arch/sh/boards/board-sh7757lcr.c +++ b/arch/sh/boards/board-sh7757lcr.c @@ -82,7 +82,7 @@ static struct sh_eth_plat_data sh7757_eth0_pdata = { }; static struct platform_device sh7757_eth0_device = { - .name = "sh-eth", + .name = "sh7757-ether", .resource = sh_eth0_resources, .id = 0, .num_resources = ARRAY_SIZE(sh_eth0_resources), @@ -111,7 +111,7 @@ static struct sh_eth_plat_data sh7757_eth1_pdata = { }; static struct platform_device sh7757_eth1_device = { - .name = "sh-eth", + .name = "sh7757-ether", .resource = sh_eth1_resources, .id = 1, .num_resources = ARRAY_SIZE(sh_eth1_resources), @@ -157,7 +157,7 @@ static struct sh_eth_plat_data sh7757_eth_giga0_pdata = { }; static struct platform_device sh7757_eth_giga0_device = { - .name = "sh-eth", + .name = "sh7757-gether", .resource = sh_eth_giga0_resources, .id = 2, .num_resources = ARRAY_SIZE(sh_eth_giga0_resources), @@ -192,7 +192,7 @@ static struct sh_eth_plat_data sh7757_eth_giga1_pdata = { }; static struct platform_device sh7757_eth_giga1_device = { - .name = "sh-eth", + .name = "sh7757-gether", .resource = sh_eth_giga1_resources, .id = 3, .num_resources = ARRAY_SIZE(sh_eth_giga1_resources), diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 764530c85aa9..61fade0ffa96 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -165,8 +165,8 @@ static struct sh_eth_plat_data sh_eth_plat = { }; static struct platform_device sh_eth_device = { - .name = "sh-eth", - .id = 0, + .name = "sh7724-ether", + .id = 0, .dev = { .platform_data = &sh_eth_plat, }, diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c index 9759d6ba7ffb..658326f44df8 100644 --- a/arch/sh/boards/mach-se/770x/setup.c +++ b/arch/sh/boards/mach-se/770x/setup.c @@ -128,8 +128,8 @@ static struct resource sh_eth0_resources[] = { }; static struct platform_device sh_eth0_device = { - .name = "sh-eth", - .id = 0, + .name = "sh771x-ether", + .id = 0, .dev = { .platform_data = PHY_ID, }, @@ -151,8 +151,8 @@ static struct resource sh_eth1_resources[] = { }; static struct platform_device sh_eth1_device = { - .name = "sh-eth", - .id = 1, + .name = "sh771x-ether", + .id = 1, .dev = { .platform_data = PHY_ID, }, diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 4010e63e82d8..b70180ef3e29 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -380,8 +380,8 @@ static struct sh_eth_plat_data sh_eth_plat = { }; static struct platform_device sh_eth_device = { - .name = "sh-eth", - .id = 0, + .name = "sh7724-ether", + .id = 0, .dev = { .platform_data = &sh_eth_plat, }, diff --git a/arch/sh/boards/mach-sh7763rdp/setup.c b/arch/sh/boards/mach-sh7763rdp/setup.c index b7c75298dfb5..50ba481fa240 100644 --- a/arch/sh/boards/mach-sh7763rdp/setup.c +++ b/arch/sh/boards/mach-sh7763rdp/setup.c @@ -93,7 +93,7 @@ static struct sh_eth_plat_data sh7763_eth_pdata = { }; static struct platform_device sh7763rdp_eth_device = { - .name = "sh-eth", + .name = "sh7763-gether", .resource = sh_eth_resources, .num_resources = ARRAY_SIZE(sh_eth_resources), .dev = { diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c index e0b740c831c7..bb11e1925178 100644 --- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c +++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c @@ -124,8 +124,8 @@ static struct resource eth_resources[] = { }; static struct platform_device eth_device = { - .name = "sh-eth", - .id = -1, + .name = "sh7619-ether", + .id = -1, .dev = { .platform_data = (void *)1, }, diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c index 5f30f805d2f2..0128af3399b7 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c @@ -329,7 +329,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[HWBLK_IIC0]), CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[HWBLK_IIC1]), CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[HWBLK_MMC]), - CLKDEV_DEV_ID("sh-eth.0", &mstp_clks[HWBLK_ETHER]), + CLKDEV_DEV_ID("sh7724-ether.0", &mstp_clks[HWBLK_ETHER]), CLKDEV_CON_ID("atapi0", &mstp_clks[HWBLK_ATAPI]), CLKDEV_CON_ID("tpu0", &mstp_clks[HWBLK_TPU]), CLKDEV_CON_ID("irda0", &mstp_clks[HWBLK_IRDA]), diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7734.c b/arch/sh/kernel/cpu/sh4a/clock-sh7734.c index deb683abacf0..ed9501519ab3 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7734.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7734.c @@ -238,7 +238,7 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("adc0", &mstp_clks[MSTP313]), CLKDEV_CON_ID("mtu0", &mstp_clks[MSTP312]), CLKDEV_CON_ID("iebus0", &mstp_clks[MSTP304]), - CLKDEV_DEV_ID("sh-eth.0", &mstp_clks[MSTP114]), + CLKDEV_DEV_ID("sh7734-gether.0", &mstp_clks[MSTP114]), CLKDEV_CON_ID("rtc0", &mstp_clks[MSTP303]), CLKDEV_CON_ID("hif0", &mstp_clks[MSTP302]), CLKDEV_CON_ID("stif0", &mstp_clks[MSTP301]), diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 81f999a672f6..668c81631c08 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -117,11 +117,7 @@ void user_enable_single_step(struct task_struct *child) set_tsk_thread_flag(child, TIF_SINGLESTEP); - if (ptrace_get_breakpoints(child) < 0) - return; - set_single_step(child, pc); - ptrace_put_breakpoints(child); } void user_disable_single_step(struct task_struct *child) diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 20f9ead650d3..33890fd267cb 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -407,30 +407,16 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - int codesize, datasize, initsize; - int nid; + pg_data_t *pgdat; iommu_init(); - num_physpages = 0; high_memory = NULL; + for_each_online_pgdat(pgdat) + high_memory = max_t(void *, high_memory, + __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - void *node_high_memory; - - num_physpages += pgdat->node_present_pages; - - if (pgdat->node_spanned_pages) - totalram_pages += free_all_bootmem_node(pgdat); - - - node_high_memory = (void *)__va((pgdat->node_start_pfn + - pgdat->node_spanned_pages) << - PAGE_SHIFT); - if (node_high_memory > high_memory) - high_memory = node_high_memory; - } + free_all_bootmem(); /* Set this up early, so we can take care of the zero page */ cpu_cache_init(); @@ -441,19 +427,8 @@ void __init mem_init(void) vsyscall_init(); - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10); - - printk(KERN_INFO "virtual kernel memory layout:\n" + mem_init_print_info(NULL); + pr_info("virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" @@ -499,13 +474,13 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 79c214efa3fe..36760317814f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -853,10 +853,11 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #endif /* Encode and de-code a swap entry */ diff --git a/arch/sparc/include/uapi/asm/fcntl.h b/arch/sparc/include/uapi/asm/fcntl.h index d0b83f66f356..d73e5e008b0d 100644 --- a/arch/sparc/include/uapi/asm/fcntl.h +++ b/arch/sparc/include/uapi/asm/fcntl.h @@ -35,6 +35,7 @@ #define O_SYNC (__O_SYNC|O_DSYNC) #define O_PATH 0x1000000 +#define O_TMPFILE 0x2000000 #define F_GETOWN 5 /* for sockets. */ #define F_SETOWN 6 /* for sockets. */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 89f49b68a21c..b46c3fa0b265 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -70,6 +70,8 @@ #define SO_SELECT_ERR_QUEUE 0x0029 +#define SO_LL 0x0030 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 6cfc1b09ec25..d7aa524b7283 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -254,15 +254,12 @@ void __init leon_smp_done(void) /* Free unneeded trap tables */ if (!cpu_present(1)) { free_reserved_page(virt_to_page(&trapbase_cpu1)); - num_physpages++; } if (!cpu_present(2)) { free_reserved_page(virt_to_page(&trapbase_cpu2)); - num_physpages++; } if (!cpu_present(3)) { free_reserved_page(virt_to_page(&trapbase_cpu3)); - num_physpages++; } /* Ok, they are spinning and ready to go. */ smp_processors_ready = 1; diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 2031c65fd4ea..bc4d3f5d2e5d 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -254,7 +254,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, const char *type; u32 class; - dev = alloc_pci_dev(); + dev = pci_alloc_dev(bus); if (!dev) return NULL; @@ -281,7 +281,6 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, printk(" create device, devfn: %x, type: %s\n", devfn, type); - dev->bus = bus; dev->sysdata = node; dev->dev.parent = bus->bridge; dev->dev.bus = &pci_bus_type; @@ -327,7 +326,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE) pci_set_master(dev); - dev->current_state = 4; /* unknown power state */ + dev->current_state = PCI_UNKNOWN; /* unknown power state */ dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index af472cf7c69a..db6987082805 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -288,10 +288,6 @@ static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) void __init mem_init(void) { - int codepages = 0; - int datapages = 0; - int initpages = 0; - int reservedpages = 0; int i; if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { @@ -323,15 +319,12 @@ void __init mem_init(void) max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(max_low_pfn << PAGE_SHIFT); - - totalram_pages = free_all_bootmem(); + free_all_bootmem(); for (i = 0; sp_banks[i].num_bytes != 0; i++) { unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - num_physpages += sp_banks[i].num_bytes >> PAGE_SHIFT; - if (end_pfn <= highstart_pfn) continue; @@ -341,39 +334,19 @@ void __init mem_init(void) map_high_region(start_pfn, end_pfn); } - codepages = (((unsigned long) &_etext) - ((unsigned long)&_start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) &_edata) - ((unsigned long)&_etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - /* Ignore memory holes for the purpose of counting reserved pages */ - for (i=0; i < max_low_pfn; i++) - if (test_bit(i >> (20 - PAGE_SHIFT), sparc_valid_addr_bitmap) - && PageReserved(pfn_to_page(i))) - reservedpages++; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT - 10), - codepages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT - 10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); } void free_initmem (void) { - num_physpages += free_initmem_default(POISON_FREE_INITMEM); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM, - "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 04fd55a6e461..a9c42a7ffb6a 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2045,7 +2045,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - unsigned long codepages, datapages, initpages; unsigned long addr, last; addr = PAGE_OFFSET + kern_base; @@ -2061,12 +2060,7 @@ void __init mem_init(void) high_memory = __va(last_valid_pfn << PAGE_SHIFT); register_page_bootmem_info(); - totalram_pages = free_all_bootmem(); - - /* We subtract one to account for the mem_map_zero page - * allocated below. - */ - num_physpages = totalram_pages - 1; + free_all_bootmem(); /* * Set up the zero page, mark it reserved, so that page count @@ -2079,19 +2073,7 @@ void __init mem_init(void) } mark_page_reserved(mem_map_zero); - codepages = (((unsigned long) _etext) - ((unsigned long) _start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) _edata) - ((unsigned long) _etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) __init_end) - ((unsigned long) __init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - printk("Memory: %luk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n", - nr_free_pages() << (PAGE_SHIFT-10), - codepages << (PAGE_SHIFT-10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - PAGE_OFFSET, (last_valid_pfn << PAGE_SHIFT)); + mem_init_print_info(NULL); if (tlb_type == cheetah || tlb_type == cheetah_plus) cheetah_ecache_flush_init(); @@ -2131,8 +2113,8 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM, - "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 37e7bc4c95b3..7a91f288c708 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -188,7 +188,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -202,7 +203,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index d36a85ebb5e0..9c7be59e6f5a 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -785,9 +785,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; break; } if (proglen == oldproglen) { - image = module_alloc(max_t(unsigned int, - proglen, - sizeof(struct work_struct))); + image = module_alloc(proglen); if (!image) goto out; } @@ -806,20 +804,8 @@ out: return; } -static void jit_free_defer(struct work_struct *arg) -{ - module_free(NULL, arg); -} - -/* run from softirq, we must use a work_struct to call - * module_free() from process context - */ void bpf_jit_free(struct sk_filter *fp) { - if (fp->bpf_func != sk_run_filter) { - struct work_struct *work = (struct work_struct *)fp->bpf_func; - - INIT_WORK(work, jit_free_defer); - schedule_work(work); - } + if (fp->bpf_func != sk_run_filter) + module_free(NULL, fp->bpf_func); } diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 3aa37669ff8c..24565a7ffe6d 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -25,6 +25,7 @@ config TILE select HAVE_ARCH_TRACEHOOK select HAVE_SYSCALL_TRACEPOINTS select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select HAVE_DEBUG_STACKOVERFLOW # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug index ddbfc3322d7f..9165ea979e85 100644 --- a/arch/tile/Kconfig.debug +++ b/arch/tile/Kconfig.debug @@ -14,13 +14,6 @@ config EARLY_PRINTK with klogd/syslogd. You should normally N here, unless you want to debug such a crash. -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - help - This option will cause messages to be printed if free stack space - drops below a certain limit. - config DEBUG_EXTRA_FLAGS string "Additional compiler arguments when building with '-g'" depends on DEBUG_INFO diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index d062d463fca9..7d8a935a9238 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -34,7 +34,7 @@ extern char __sys_cmpxchg_grab_lock[]; extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; #endif -/* Handle the discontiguity between _sdata and _stext. */ +/* Handle the discontiguity between _sdata and _text. */ static inline int arch_is_kernel_data(unsigned long addr) { return addr >= (unsigned long)_sdata && diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 7a5aa1a7864e..68b542677f6a 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -307,8 +307,8 @@ static void __cpuinit store_permanent_mappings(void) hv_store_mapping(addr, pages << PAGE_SHIFT, pa); } - hv_store_mapping((HV_VirtAddr)_stext, - (uint32_t)(_einittext - _stext), 0); + hv_store_mapping((HV_VirtAddr)_text, + (uint32_t)(_einittext - _text), 0); } /* @@ -329,6 +329,7 @@ static void __init setup_memory(void) #if defined(CONFIG_HIGHMEM) || defined(__tilegx__) long lowmem_pages; #endif + unsigned long physpages = 0; /* We are using a char to hold the cpu_2_node[] mapping */ BUILD_BUG_ON(MAX_NUMNODES > 127); @@ -388,8 +389,8 @@ static void __init setup_memory(void) continue; } } - if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) { - int max_size = maxmem_pfn - num_physpages; + if (physpages + PFN_DOWN(range.size) > maxmem_pfn) { + int max_size = maxmem_pfn - physpages; if (max_size > 0) { pr_err("Maxmem reduced node %d to %d pages\n", i, max_size); @@ -446,7 +447,7 @@ static void __init setup_memory(void) node_start_pfn[i] = start; node_end_pfn[i] = end; node_controller[i] = range.controller; - num_physpages += size; + physpages += size; max_pfn = end; /* Mark node as online */ @@ -465,7 +466,7 @@ static void __init setup_memory(void) * we're willing to use at 8 million pages (32GB of 4KB pages). */ cap = 8 * 1024 * 1024; /* 8 million pages */ - if (num_physpages > cap) { + if (physpages > cap) { int num_nodes = num_online_nodes(); int cap_each = cap / num_nodes; unsigned long dropped_pages = 0; @@ -476,10 +477,10 @@ static void __init setup_memory(void) node_end_pfn[i] = node_start_pfn[i] + cap_each; } } - num_physpages -= dropped_pages; + physpages -= dropped_pages; pr_warning("Only using %ldMB memory;" " ignoring %ldMB.\n", - num_physpages >> (20 - PAGE_SHIFT), + physpages >> (20 - PAGE_SHIFT), dropped_pages >> (20 - PAGE_SHIFT)); pr_warning("Consider using a larger page size.\n"); } @@ -497,7 +498,7 @@ static void __init setup_memory(void) lowmem_pages = (mappable_physpages > MAXMEM_PFN) ? MAXMEM_PFN : mappable_physpages; - highmem_pages = (long) (num_physpages - lowmem_pages); + highmem_pages = (long) (physpages - lowmem_pages); pr_notice("%ldMB HIGHMEM available.\n", pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); @@ -514,7 +515,6 @@ static void __init setup_memory(void) pr_warning("Use a HIGHMEM enabled kernel.\n"); max_low_pfn = MAXMEM_PFN; max_pfn = MAXMEM_PFN; - num_physpages = MAXMEM_PFN; node_end_pfn[0] = MAXMEM_PFN; } else { pr_notice("%ldMB memory available.\n", diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 631f10de12fe..a13ed902afbb 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -27,7 +27,6 @@ SECTIONS .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ { _text = .; - _stext = .; *(.intrpt1) } :intrpt1 =0 @@ -36,6 +35,7 @@ SECTIONS /* Now the real code */ . = ALIGN(0x20000); + _stext = .; .text : AT (ADDR(.text) - LOAD_OFFSET) { HEAD_TEXT SCHED_TEXT @@ -58,11 +58,13 @@ SECTIONS #define LOAD_OFFSET PAGE_OFFSET . = ALIGN(PAGE_SIZE); + __init_begin = .; VMLINUX_SYMBOL(_sinitdata) = .; INIT_DATA_SECTION(16) :data =0 PERCPU_SECTION(L2_CACHE_BYTES) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; + __init_end = .; _sdata = .; /* Start of data section */ diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 3d2b81c163a6..f7f99f90cbe0 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -573,10 +573,10 @@ out_of_memory: down_read(&mm->mmap_sem); goto survive; } - pr_alert("VM: killing process %s\n", tsk->comm); - if (!is_kernel_mode) - do_group_exit(SIGKILL); - goto no_context; + if (is_kernel_mode) + goto no_context; + pagefault_out_of_memory(); + return 0; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 2749515a0547..e182958c707d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -562,7 +562,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) prot = ktext_set_nocache(prot); } - BUG_ON(address != (unsigned long)_stext); + BUG_ON(address != (unsigned long)_text); pte = NULL; for (; address < (unsigned long)_einittext; pfn++, address += PAGE_SIZE) { @@ -720,7 +720,7 @@ static void __init init_free_pfn_range(unsigned long start, unsigned long end) } init_page_count(page); __free_pages(page, order); - totalram_pages += count; + adjust_managed_page_count(page, count); page += count; pfn += count; @@ -821,7 +821,6 @@ static void __init set_max_mapnr_init(void) void __init mem_init(void) { - int codesize, datasize, initsize; int i; #ifndef __tilegx__ void *last; @@ -846,26 +845,14 @@ void __init mem_init(void) set_max_mapnr_init(); /* this will put all bootmem onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #ifndef CONFIG_64BIT /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ set_non_bootmem_pages_init(); #endif - codesize = (unsigned long)&_etext - (unsigned long)&_text; - datasize = (unsigned long)&_end - (unsigned long)&_sdata; - initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; - initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; - - pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + mem_init_print_info(NULL); /* * In debug mode, dump some interesting memory mappings. @@ -1024,16 +1011,13 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) pte_clear(&init_mm, addr, ptep); continue; } - __ClearPageReserved(page); - init_page_count(page); if (pte_huge(*ptep)) BUG_ON(!kdata_huge); else set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; + free_reserved_page(page); } pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 4938de5512d2..1dd5bd8a8c59 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -57,7 +57,6 @@ *(.uml.initcall.init) __uml_initcall_end = .; } - __init_end = .; SECURITY_INIT diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index fb8fd6fb6563..adde088aeeff 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -14,8 +14,6 @@ SECTIONS __binary_start = .; . = ALIGN(4096); /* Init code and data */ _text = .; - _stext = .; - __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) . = ALIGN(PAGE_SIZE); @@ -67,6 +65,7 @@ SECTIONS } =0x90909090 .plt : { *(.plt) } .text : { + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -91,7 +90,9 @@ SECTIONS #include <asm/common.lds.S> + __init_begin = .; init.data : { INIT_DATA } + __init_end = .; /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but @@ -155,6 +156,7 @@ SECTIONS . = ALIGN(32 / 8); . = ALIGN(32 / 8); } + __bss_stop = .; _end = .; PROVIDE (end = .); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 9df292b270a8..7ddb64baf327 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -65,15 +65,13 @@ void __init mem_init(void) uml_reserved = brk_end; /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); max_low_pfn = totalram_pages; #ifdef CONFIG_HIGHMEM setup_highmem(end_iomem, highmem); #endif - num_physpages = totalram_pages; max_pfn = totalram_pages; - printk(KERN_INFO "Memory: %luk available\n", - nr_free_pages() << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); kmalloc_ok = 1; } @@ -244,7 +242,7 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index ff65fb4f1a95..6899195602b7 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -20,13 +20,12 @@ SECTIONS . = START + SIZEOF_HEADERS; _text = .; - _stext = .; - __init_begin = .; INIT_TEXT_SECTION(0) . = ALIGN(PAGE_SIZE); .text : { + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -62,7 +61,10 @@ SECTIONS #include <asm/common.lds.S> + __init_begin = .; init.data : { INIT_DATA } + __init_end = .; + .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) @@ -97,6 +99,7 @@ SECTIONS PROVIDE(_bss_start = .); SBSS(0) BSS(0) + __bss_stop = .; _end = .; PROVIDE (end = .); diff --git a/arch/unicore32/boot/compressed/Makefile b/arch/unicore32/boot/compressed/Makefile index 950a9afa38f8..96494fb646f7 100644 --- a/arch/unicore32/boot/compressed/Makefile +++ b/arch/unicore32/boot/compressed/Makefile @@ -17,7 +17,7 @@ OBJS := misc.o # font.c and font.o CFLAGS_font.o := -Dstatic= -$(obj)/font.c: $(srctree)/drivers/video/console/font_8x8.c +$(obj)/font.c: $(srctree)/lib/fonts/font_8x8.c $(call cmd,shipped) # piggy.S and piggy.o diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h index 5eddb997defe..debafc40200a 100644 --- a/arch/unicore32/include/asm/memory.h +++ b/arch/unicore32/include/asm/memory.h @@ -98,12 +98,6 @@ /* * Conversion between a struct page and a physical address. * - * Note: when converting an unknown physical address to a - * struct page, the resulting pointer must be validated - * using VALID_PAGE(). It must return an invalid struct page - * for any physical address not corresponding to a system - * RAM address. - * * page_to_pfn(page) convert a struct page * to a PFN number * pfn_to_page(pfn) convert a _valid_ PFN number to struct page * * diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c index ef69c0c82825..374a055a8e6b 100644 --- a/arch/unicore32/kernel/pci.c +++ b/arch/unicore32/kernel/pci.c @@ -277,11 +277,6 @@ static int __init pci_common_init(void) pci_bus_assign_resources(puv3_bus); } - /* - * Tell drivers about devices found. - */ - pci_bus_add_devices(puv3_bus); - return 0; } subsys_initcall(pci_common_init); diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index c9447691bdac..778ebba80827 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -51,16 +51,6 @@ void arch_cpu_idle(void) local_irq_enable(); } -static char reboot_mode = 'h'; - -int __init reboot_setup(char *str) -{ - reboot_mode = str[0]; - return 1; -} - -__setup("reboot=", reboot_setup); - void machine_halt(void) { gpio_set_value(GPO_SOFT_OFF, 0); @@ -88,7 +78,7 @@ void machine_restart(char *cmd) * we may need it to insert some 1:1 mappings so that * soft boot works. */ - setup_mm_for_reboot(reboot_mode); + setup_mm_for_reboot(); /* Clean and invalidate caches */ flush_cache_all(); @@ -102,7 +92,7 @@ void machine_restart(char *cmd) /* * Now handle reboot code. */ - if (reboot_mode == 's') { + if (reboot_mode == REBOOT_SOFT) { /* Jump into ROM at address 0xffff0000 */ cpu_reset(VECTORS_BASE); } else { diff --git a/arch/unicore32/kernel/setup.h b/arch/unicore32/kernel/setup.h index 30f749da8f73..f5c51b85ad24 100644 --- a/arch/unicore32/kernel/setup.h +++ b/arch/unicore32/kernel/setup.h @@ -22,7 +22,7 @@ extern void puv3_ps2_init(void); extern void pci_puv3_preinit(void); extern void __init puv3_init_gpio(void); -extern void setup_mm_for_reboot(char mode); +extern void setup_mm_for_reboot(void); extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 63df12d71ce3..ae6bc036db92 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -383,59 +383,14 @@ static void __init free_unused_memmap(struct meminfo *mi) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - int i; - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; free_unused_memmap(&meminfo); /* this will put all unused low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reserved_pages = free_pages = 0; - - for_each_bank(i, &meminfo) { - struct membank *bank = &meminfo.bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system - */ - printk(KERN_INFO "Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + free_all_bootmem(); + mem_init_print_info(NULL); printk(KERN_NOTICE "Virtual kernel memory layout:\n" " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" @@ -464,7 +419,7 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); BUG_ON(TASK_SIZE > MODULES_VADDR); - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { /* * On a machine this small we won't get * anywhere without overcommit, so turn @@ -476,7 +431,7 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -486,7 +441,7 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 43c20b40e444..4f5a532bee13 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -445,7 +445,7 @@ void __init paging_init(void) * the user-mode pages. This will then ensure that we have predictable * results when turning the mmu off */ -void setup_mm_for_reboot(char mode) +void setup_mm_for_reboot(void) { unsigned long base_pmdval; pgd_t *pgd; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d98b665e6536..b32ebf92b0ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -65,6 +65,7 @@ config X86 select HAVE_KERNEL_LZMA select HAVE_KERNEL_XZ select HAVE_KERNEL_LZO + select HAVE_KERNEL_LZ4 select HAVE_HW_BREAKPOINT select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS @@ -102,6 +103,7 @@ config X86 select HAVE_ARCH_SECCOMP_FILTER select BUILDTIME_EXTABLE_SORT select GENERIC_CMOS_UPDATE + select HAVE_ARCH_SOFT_DIRTY select CLOCKSOURCE_WATCHDOG select GENERIC_CLOCKEVENTS select ARCH_CLOCKSOURCE_DATA if X86_64 @@ -121,6 +123,7 @@ config X86 select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION select RTC_LIB + select HAVE_DEBUG_STACKOVERFLOW config INSTRUCTION_DECODER def_bool y @@ -207,6 +210,12 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + config ZONE_DMA32 bool default X86_64 @@ -2252,11 +2261,11 @@ source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" config RAPIDIO - bool "RapidIO support" + tristate "RapidIO support" depends on PCI default n help - If you say Y here, the kernel will include drivers and + If enabled this option will include drivers and the core infrastructure code to support RapidIO interconnect devices. source "drivers/rapidio/Kconfig" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c963881de0d0..78d91afb8e50 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -59,16 +59,6 @@ config EARLY_PRINTK_DBGP with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. You need usb debug device. -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL - ---help--- - Say Y here if you want to check the overflows of kernel, IRQ - and exception stacks. This option will cause messages of the - stacks in detail when free stack space drops below a certain - limit. - If in doubt, say "N". - config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 5ef205c5f37b..dcd90df10ab4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,8 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -63,12 +64,15 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lz4) suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7cb56c6ca351..0319c88290a5 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -145,6 +145,10 @@ static int lines, cols; #include "../../../../lib/decompress_unlzo.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + static void scroll(void) { int i; diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a3a0ed80f17c..7d6ba9db1be9 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -3,8 +3,6 @@ # avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o @@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -42,10 +41,8 @@ endif # These modules require assembler to support AVX2. ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o - obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o endif aes-i586-y := aes-i586-asm_32.o aes_glue.o @@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes) endif ifeq ($(avx2_supported),yes) - blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o - twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o @@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o +crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S deleted file mode 100644 index 784452e0d05d..000000000000 --- a/arch/x86/crypto/blowfish-avx2-asm_64.S +++ /dev/null @@ -1,449 +0,0 @@ -/* - * x86_64/AVX2 assembler optimized version of Blowfish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/linkage.h> - -.file "blowfish-avx2-asm_64.S" - -.data -.align 32 - -.Lprefetch_mask: -.long 0*64 -.long 1*64 -.long 2*64 -.long 3*64 -.long 4*64 -.long 5*64 -.long 6*64 -.long 7*64 - -.Lbswap32_mask: -.long 0x00010203 -.long 0x04050607 -.long 0x08090a0b -.long 0x0c0d0e0f - -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.Lbswap_iv_mask: - .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 - -.text -/* structure of crypto context */ -#define p 0 -#define s0 ((16 + 2) * 4) -#define s1 ((16 + 2 + (1 * 256)) * 4) -#define s2 ((16 + 2 + (2 * 256)) * 4) -#define s3 ((16 + 2 + (3 * 256)) * 4) - -/* register macros */ -#define CTX %rdi -#define RIO %rdx - -#define RS0 %rax -#define RS1 %r8 -#define RS2 %r9 -#define RS3 %r10 - -#define RLOOP %r11 -#define RLOOPd %r11d - -#define RXr0 %ymm8 -#define RXr1 %ymm9 -#define RXr2 %ymm10 -#define RXr3 %ymm11 -#define RXl0 %ymm12 -#define RXl1 %ymm13 -#define RXl2 %ymm14 -#define RXl3 %ymm15 - -/* temp regs */ -#define RT0 %ymm0 -#define RT0x %xmm0 -#define RT1 %ymm1 -#define RT1x %xmm1 -#define RIDX0 %ymm2 -#define RIDX1 %ymm3 -#define RIDX1x %xmm3 -#define RIDX2 %ymm4 -#define RIDX3 %ymm5 - -/* vpgatherdd mask and '-1' */ -#define RNOT %ymm6 - -/* byte mask, (-1 >> 24) */ -#define RBYTE %ymm7 - -/*********************************************************************** - * 32-way AVX2 blowfish - ***********************************************************************/ -#define F(xl, xr) \ - vpsrld $24, xl, RIDX0; \ - vpsrld $16, xl, RIDX1; \ - vpsrld $8, xl, RIDX2; \ - vpand RBYTE, RIDX1, RIDX1; \ - vpand RBYTE, RIDX2, RIDX2; \ - vpand RBYTE, xl, RIDX3; \ - \ - vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpcmpeqd RIDX0, RIDX0, RIDX0; \ - \ - vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \ - vpcmpeqd RIDX1, RIDX1, RIDX1; \ - vpaddd RT0, RT1, RT0; \ - \ - vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \ - vpxor RT0, RT1, RT0; \ - \ - vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpaddd RT0, RT1, RT0; \ - \ - vpxor RT0, xr, xr; - -#define add_roundkey(xl, nmem) \ - vpbroadcastd nmem, RT0; \ - vpxor RT0, xl ## 0, xl ## 0; \ - vpxor RT0, xl ## 1, xl ## 1; \ - vpxor RT0, xl ## 2, xl ## 2; \ - vpxor RT0, xl ## 3, xl ## 3; - -#define round_enc() \ - add_roundkey(RXr, p(CTX,RLOOP,4)); \ - F(RXl0, RXr0); \ - F(RXl1, RXr1); \ - F(RXl2, RXr2); \ - F(RXl3, RXr3); \ - \ - add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ - F(RXr0, RXl0); \ - F(RXr1, RXl1); \ - F(RXr2, RXl2); \ - F(RXr3, RXl3); - -#define round_dec() \ - add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \ - F(RXl0, RXr0); \ - F(RXl1, RXr1); \ - F(RXl2, RXr2); \ - F(RXl3, RXr3); \ - \ - add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ - F(RXr0, RXl0); \ - F(RXr1, RXl1); \ - F(RXr2, RXl2); \ - F(RXr3, RXl3); - -#define init_round_constants() \ - vpcmpeqd RNOT, RNOT, RNOT; \ - leaq s0(CTX), RS0; \ - leaq s1(CTX), RS1; \ - leaq s2(CTX), RS2; \ - leaq s3(CTX), RS3; \ - vpsrld $24, RNOT, RBYTE; - -#define transpose_2x2(x0, x1, t0) \ - vpunpckldq x0, x1, t0; \ - vpunpckhdq x0, x1, x1; \ - \ - vpunpcklqdq t0, x1, x0; \ - vpunpckhqdq t0, x1, x1; - -#define read_block(xl, xr) \ - vbroadcasti128 .Lbswap32_mask, RT1; \ - \ - vpshufb RT1, xl ## 0, xl ## 0; \ - vpshufb RT1, xr ## 0, xr ## 0; \ - vpshufb RT1, xl ## 1, xl ## 1; \ - vpshufb RT1, xr ## 1, xr ## 1; \ - vpshufb RT1, xl ## 2, xl ## 2; \ - vpshufb RT1, xr ## 2, xr ## 2; \ - vpshufb RT1, xl ## 3, xl ## 3; \ - vpshufb RT1, xr ## 3, xr ## 3; \ - \ - transpose_2x2(xl ## 0, xr ## 0, RT0); \ - transpose_2x2(xl ## 1, xr ## 1, RT0); \ - transpose_2x2(xl ## 2, xr ## 2, RT0); \ - transpose_2x2(xl ## 3, xr ## 3, RT0); - -#define write_block(xl, xr) \ - vbroadcasti128 .Lbswap32_mask, RT1; \ - \ - transpose_2x2(xl ## 0, xr ## 0, RT0); \ - transpose_2x2(xl ## 1, xr ## 1, RT0); \ - transpose_2x2(xl ## 2, xr ## 2, RT0); \ - transpose_2x2(xl ## 3, xr ## 3, RT0); \ - \ - vpshufb RT1, xl ## 0, xl ## 0; \ - vpshufb RT1, xr ## 0, xr ## 0; \ - vpshufb RT1, xl ## 1, xl ## 1; \ - vpshufb RT1, xr ## 1, xr ## 1; \ - vpshufb RT1, xl ## 2, xl ## 2; \ - vpshufb RT1, xr ## 2, xr ## 2; \ - vpshufb RT1, xl ## 3, xl ## 3; \ - vpshufb RT1, xr ## 3, xr ## 3; - -.align 8 -__blowfish_enc_blk32: - /* input: - * %rdi: ctx, CTX - * RXl0..4, RXr0..4: plaintext - * output: - * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped) - */ - init_round_constants(); - - read_block(RXl, RXr); - - movl $1, RLOOPd; - add_roundkey(RXl, p+4*(0)(CTX)); - -.align 4 -.L__enc_loop: - round_enc(); - - leal 2(RLOOPd), RLOOPd; - cmpl $17, RLOOPd; - jne .L__enc_loop; - - add_roundkey(RXr, p+4*(17)(CTX)); - - write_block(RXl, RXr); - - ret; -ENDPROC(__blowfish_enc_blk32) - -.align 8 -__blowfish_dec_blk32: - /* input: - * %rdi: ctx, CTX - * RXl0..4, RXr0..4: ciphertext - * output: - * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped) - */ - init_round_constants(); - - read_block(RXl, RXr); - - movl $14, RLOOPd; - add_roundkey(RXl, p+4*(17)(CTX)); - -.align 4 -.L__dec_loop: - round_dec(); - - addl $-2, RLOOPd; - jns .L__dec_loop; - - add_roundkey(RXr, p+4*(0)(CTX)); - - write_block(RXl, RXr); - - ret; -ENDPROC(__blowfish_dec_blk32) - -ENTRY(blowfish_ecb_enc_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - - vmovdqu 0*32(%rdx), RXl0; - vmovdqu 1*32(%rdx), RXr0; - vmovdqu 2*32(%rdx), RXl1; - vmovdqu 3*32(%rdx), RXr1; - vmovdqu 4*32(%rdx), RXl2; - vmovdqu 5*32(%rdx), RXr2; - vmovdqu 6*32(%rdx), RXl3; - vmovdqu 7*32(%rdx), RXr3; - - call __blowfish_enc_blk32; - - vmovdqu RXr0, 0*32(%rsi); - vmovdqu RXl0, 1*32(%rsi); - vmovdqu RXr1, 2*32(%rsi); - vmovdqu RXl1, 3*32(%rsi); - vmovdqu RXr2, 4*32(%rsi); - vmovdqu RXl2, 5*32(%rsi); - vmovdqu RXr3, 6*32(%rsi); - vmovdqu RXl3, 7*32(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_ecb_enc_32way) - -ENTRY(blowfish_ecb_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - - vmovdqu 0*32(%rdx), RXl0; - vmovdqu 1*32(%rdx), RXr0; - vmovdqu 2*32(%rdx), RXl1; - vmovdqu 3*32(%rdx), RXr1; - vmovdqu 4*32(%rdx), RXl2; - vmovdqu 5*32(%rdx), RXr2; - vmovdqu 6*32(%rdx), RXl3; - vmovdqu 7*32(%rdx), RXr3; - - call __blowfish_dec_blk32; - - vmovdqu RXr0, 0*32(%rsi); - vmovdqu RXl0, 1*32(%rsi); - vmovdqu RXr1, 2*32(%rsi); - vmovdqu RXl1, 3*32(%rsi); - vmovdqu RXr2, 4*32(%rsi); - vmovdqu RXl2, 5*32(%rsi); - vmovdqu RXr3, 6*32(%rsi); - vmovdqu RXl3, 7*32(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_ecb_dec_32way) - -ENTRY(blowfish_cbc_dec_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - - vmovdqu 0*32(%rdx), RXl0; - vmovdqu 1*32(%rdx), RXr0; - vmovdqu 2*32(%rdx), RXl1; - vmovdqu 3*32(%rdx), RXr1; - vmovdqu 4*32(%rdx), RXl2; - vmovdqu 5*32(%rdx), RXr2; - vmovdqu 6*32(%rdx), RXl3; - vmovdqu 7*32(%rdx), RXr3; - - call __blowfish_dec_blk32; - - /* xor with src */ - vmovq (%rdx), RT0x; - vpshufd $0x4f, RT0x, RT0x; - vinserti128 $1, 8(%rdx), RT0, RT0; - vpxor RT0, RXr0, RXr0; - vpxor 0*32+24(%rdx), RXl0, RXl0; - vpxor 1*32+24(%rdx), RXr1, RXr1; - vpxor 2*32+24(%rdx), RXl1, RXl1; - vpxor 3*32+24(%rdx), RXr2, RXr2; - vpxor 4*32+24(%rdx), RXl2, RXl2; - vpxor 5*32+24(%rdx), RXr3, RXr3; - vpxor 6*32+24(%rdx), RXl3, RXl3; - - vmovdqu RXr0, (0*32)(%rsi); - vmovdqu RXl0, (1*32)(%rsi); - vmovdqu RXr1, (2*32)(%rsi); - vmovdqu RXl1, (3*32)(%rsi); - vmovdqu RXr2, (4*32)(%rsi); - vmovdqu RXl2, (5*32)(%rsi); - vmovdqu RXr3, (6*32)(%rsi); - vmovdqu RXl3, (7*32)(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_cbc_dec_32way) - -ENTRY(blowfish_ctr_32way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: iv (big endian, 64bit) - */ - - vzeroupper; - - vpcmpeqd RT0, RT0, RT0; - vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */ - - vpcmpeqd RT1x, RT1x, RT1x; - vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */ - vpxor RIDX0, RIDX0, RIDX0; - vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */ - - vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */ - - vpcmpeqd RT1, RT1, RT1; - vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */ - vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */ - - vbroadcasti128 .Lbswap_iv_mask, RIDX0; - vbroadcasti128 .Lbswap128_mask, RIDX1; - - /* load IV and byteswap */ - vmovq (%rcx), RT1x; - vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */ - vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */ - - /* construct IVs */ - vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */ - vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */ - vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */ - vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */ - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXl1; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXr1; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXl2; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXr2; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXl3; - vpsubq RIDX2, RT1, RT1; - vpshufb RIDX1, RT1, RXr3; - - /* store last IV */ - vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */ - vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */ - vmovq RT1x, (%rcx); - - call __blowfish_enc_blk32; - - /* dst = src ^ iv */ - vpxor 0*32(%rdx), RXr0, RXr0; - vpxor 1*32(%rdx), RXl0, RXl0; - vpxor 2*32(%rdx), RXr1, RXr1; - vpxor 3*32(%rdx), RXl1, RXl1; - vpxor 4*32(%rdx), RXr2, RXr2; - vpxor 5*32(%rdx), RXl2, RXl2; - vpxor 6*32(%rdx), RXr3, RXr3; - vpxor 7*32(%rdx), RXl3, RXl3; - vmovdqu RXr0, (0*32)(%rsi); - vmovdqu RXl0, (1*32)(%rsi); - vmovdqu RXr1, (2*32)(%rsi); - vmovdqu RXl1, (3*32)(%rsi); - vmovdqu RXr2, (4*32)(%rsi); - vmovdqu RXl2, (5*32)(%rsi); - vmovdqu RXr3, (6*32)(%rsi); - vmovdqu RXl3, (7*32)(%rsi); - - vzeroupper; - - ret; -ENDPROC(blowfish_ctr_32way) diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c deleted file mode 100644 index 4417e9aea78d..000000000000 --- a/arch/x86/crypto/blowfish_avx2_glue.c +++ /dev/null @@ -1,585 +0,0 @@ -/* - * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/algapi.h> -#include <crypto/blowfish.h> -#include <crypto/cryptd.h> -#include <crypto/ctr.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> -#include <asm/crypto/blowfish.h> -#include <asm/crypto/ablk_helper.h> -#include <crypto/scatterwalk.h> - -#define BF_AVX2_PARALLEL_BLOCKS 32 - -/* 32-way AVX2 parallel cipher functions */ -asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src, - __be64 *iv); - -static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - if (fpu_enabled) - return true; - - /* FPU is only used when chunk to be processed is large enough, so - * do not enable FPU until it is necessary. - */ - if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS) - return false; - - kernel_fpu_begin(); - return true; -} - -static inline void bf_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) -{ - bool fpu_enabled = false; - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); - - /* Process multi-block AVX2 batch */ - if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { - do { - if (enc) - blowfish_ecb_enc_32way(ctx, wdst, wsrc); - else - blowfish_ecb_dec_32way(ctx, wdst, wsrc); - - wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS; - wdst += bsize * BF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process multi-block batch */ - if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { - do { - if (enc) - blowfish_enc_blk_4way(ctx, wdst, wsrc); - else - blowfish_dec_blk_4way(ctx, wdst, wsrc); - - wsrc += bsize * BF_PARALLEL_BLOCKS; - wdst += bsize * BF_PARALLEL_BLOCKS; - nbytes -= bsize * BF_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (enc) - blowfish_enc_blk(ctx, wdst, wsrc); - else - blowfish_dec_blk(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - bf_fpu_end(fpu_enabled); - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 last_iv; - int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block AVX2 batch */ - if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1); - src -= BF_AVX2_PARALLEL_BLOCKS - 1; - dst -= BF_AVX2_PARALLEL_BLOCKS - 1; - - blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process multi-block batch */ - if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { - u64 ivs[BF_PARALLEL_BLOCKS - 1]; - - do { - nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1); - src -= BF_PARALLEL_BLOCKS - 1; - dst -= BF_PARALLEL_BLOCKS - 1; - - for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) - ivs[i] = src[i]; - - blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); - - for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) - dst[i + 1] ^= ivs[i]; - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } - -done: - *dst ^= *(u64 *)walk->iv; - *(u64 *)walk->iv = last_iv; - - return nbytes; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes)) { - fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - bf_fpu_end(fpu_enabled); - return err; -} - -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[BF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - blowfish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, BF_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - int i; - - /* Process multi-block AVX2 batch */ - if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { - do { - blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src, - (__be64 *)walk->iv); - - src += BF_AVX2_PARALLEL_BLOCKS; - dst += BF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Process four block batch */ - if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { - __be64 ctrblocks[BF_PARALLEL_BLOCKS]; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - - do { - /* create ctrblks for parallel encrypt */ - for (i = 0; i < BF_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - ctrblocks[i] = cpu_to_be64(ctrblk++); - } - - blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += BF_PARALLEL_BLOCKS; - dst += BF_PARALLEL_BLOCKS; - nbytes -= bsize * BF_PARALLEL_BLOCKS; - } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); - - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - u64 ctrblk; - - if (dst != src) - *dst = *src; - - ctrblk = *(u64 *)walk->iv; - be64_add_cpu((__be64 *)walk->iv, 1); - - blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - bf_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static struct crypto_alg bf_algs[6] = { { - .cra_name = "__ecb-blowfish-avx2", - .cra_driver_name = "__driver-ecb-blowfish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-blowfish-avx2", - .cra_driver_name = "__driver-cbc-blowfish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-blowfish-avx2", - .cra_driver_name = "__driver-ctr-blowfish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-avx2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-avx2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(blowfish)", - .cra_driver_name = "ctr-blowfish-avx2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -} }; - - -static int __init init(void) -{ - u64 xcr0; - - if (!cpu_has_avx2 || !cpu_has_osxsave) { - pr_info("AVX2 instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - return -ENODEV; - } - - return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("blowfish"); -MODULE_ALIAS("blowfish-asm"); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 3548d76dbaa9..50ec333b70e6 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -1,7 +1,7 @@ /* * Glue Code for assembler optimized version of Blowfish * - * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> @@ -32,24 +32,40 @@ #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> -#include <asm/crypto/blowfish.h> /* regular block cipher functions */ asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, bool xor); -EXPORT_SYMBOL_GPL(__blowfish_enc_blk); - asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(blowfish_dec_blk); /* 4-way parallel cipher functions */ asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src, bool xor); -EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way); - asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); + +static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, true); +} + +static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, true); +} static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 91a1878fcc3e..0e0b8863a34b 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -51,16 +51,6 @@ #define ymm14_x xmm14 #define ymm15_x xmm15 -/* - * AES-NI instructions do not support ymmX registers, so we need splitting and - * merging. - */ -#define vaesenclast256(zero, yreg, tmp) \ - vextracti128 $1, yreg, tmp##_x; \ - vaesenclast zero##_x, yreg##_x, yreg##_x; \ - vaesenclast zero##_x, tmp##_x, tmp##_x; \ - vinserti128 $1, tmp##_x, yreg, yreg; - /********************************************************************** 32-way camellia **********************************************************************/ @@ -79,46 +69,70 @@ * S-function with AES subbytes \ */ \ vbroadcasti128 .Linv_shift_row, t4; \ - vpbroadcastb .L0f0f0f0f, t7; \ - vbroadcasti128 .Lpre_tf_lo_s1, t0; \ - vbroadcasti128 .Lpre_tf_hi_s1, t1; \ + vpbroadcastd .L0f0f0f0f, t7; \ + vbroadcasti128 .Lpre_tf_lo_s1, t5; \ + vbroadcasti128 .Lpre_tf_hi_s1, t6; \ + vbroadcasti128 .Lpre_tf_lo_s4, t2; \ + vbroadcasti128 .Lpre_tf_hi_s4, t3; \ \ /* AES inverse shift rows */ \ vpshufb t4, x0, x0; \ vpshufb t4, x7, x7; \ - vpshufb t4, x1, x1; \ - vpshufb t4, x4, x4; \ - vpshufb t4, x2, x2; \ - vpshufb t4, x5, x5; \ vpshufb t4, x3, x3; \ vpshufb t4, x6, x6; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ \ /* prefilter sboxes 1, 2 and 3 */ \ - vbroadcasti128 .Lpre_tf_lo_s4, t2; \ - vbroadcasti128 .Lpre_tf_hi_s4, t3; \ - filter_8bit(x0, t0, t1, t7, t6); \ - filter_8bit(x7, t0, t1, t7, t6); \ - filter_8bit(x1, t0, t1, t7, t6); \ - filter_8bit(x4, t0, t1, t7, t6); \ - filter_8bit(x2, t0, t1, t7, t6); \ - filter_8bit(x5, t0, t1, t7, t6); \ - \ /* prefilter sbox 4 */ \ + filter_8bit(x0, t5, t6, t7, t4); \ + filter_8bit(x7, t5, t6, t7, t4); \ + vextracti128 $1, x0, t0##_x; \ + vextracti128 $1, x7, t1##_x; \ + filter_8bit(x3, t2, t3, t7, t4); \ + filter_8bit(x6, t2, t3, t7, t4); \ + vextracti128 $1, x3, t3##_x; \ + vextracti128 $1, x6, t2##_x; \ + filter_8bit(x2, t5, t6, t7, t4); \ + filter_8bit(x5, t5, t6, t7, t4); \ + filter_8bit(x1, t5, t6, t7, t4); \ + filter_8bit(x4, t5, t6, t7, t4); \ + \ vpxor t4##_x, t4##_x, t4##_x; \ - filter_8bit(x3, t2, t3, t7, t6); \ - filter_8bit(x6, t2, t3, t7, t6); \ \ /* AES subbytes + AES shift rows */ \ + vextracti128 $1, x2, t6##_x; \ + vextracti128 $1, x5, t5##_x; \ + vaesenclast t4##_x, x0##_x, x0##_x; \ + vaesenclast t4##_x, t0##_x, t0##_x; \ + vinserti128 $1, t0##_x, x0, x0; \ + vaesenclast t4##_x, x7##_x, x7##_x; \ + vaesenclast t4##_x, t1##_x, t1##_x; \ + vinserti128 $1, t1##_x, x7, x7; \ + vaesenclast t4##_x, x3##_x, x3##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vinserti128 $1, t3##_x, x3, x3; \ + vaesenclast t4##_x, x6##_x, x6##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t2##_x, x6, x6; \ + vextracti128 $1, x1, t3##_x; \ + vextracti128 $1, x4, t2##_x; \ vbroadcasti128 .Lpost_tf_lo_s1, t0; \ vbroadcasti128 .Lpost_tf_hi_s1, t1; \ - vaesenclast256(t4, x0, t5); \ - vaesenclast256(t4, x7, t5); \ - vaesenclast256(t4, x1, t5); \ - vaesenclast256(t4, x4, t5); \ - vaesenclast256(t4, x2, t5); \ - vaesenclast256(t4, x5, t5); \ - vaesenclast256(t4, x3, t5); \ - vaesenclast256(t4, x6, t5); \ + vaesenclast t4##_x, x2##_x, x2##_x; \ + vaesenclast t4##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x2, x2; \ + vaesenclast t4##_x, x5##_x, x5##_x; \ + vaesenclast t4##_x, t5##_x, t5##_x; \ + vinserti128 $1, t5##_x, x5, x5; \ + vaesenclast t4##_x, x1##_x, x1##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vinserti128 $1, t3##_x, x1, x1; \ + vaesenclast t4##_x, x4##_x, x4##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t2##_x, x4, x4; \ \ /* postfilter sboxes 1 and 4 */ \ vbroadcasti128 .Lpost_tf_lo_s3, t2; \ @@ -139,22 +153,12 @@ /* postfilter sbox 2 */ \ filter_8bit(x1, t4, t5, t7, t2); \ filter_8bit(x4, t4, t5, t7, t2); \ + vpxor t7, t7, t7; \ \ vpsrldq $1, t0, t1; \ vpsrldq $2, t0, t2; \ + vpshufb t7, t1, t1; \ vpsrldq $3, t0, t3; \ - vpsrldq $4, t0, t4; \ - vpsrldq $5, t0, t5; \ - vpsrldq $6, t0, t6; \ - vpsrldq $7, t0, t7; \ - vpbroadcastb t0##_x, t0; \ - vpbroadcastb t1##_x, t1; \ - vpbroadcastb t2##_x, t2; \ - vpbroadcastb t3##_x, t3; \ - vpbroadcastb t4##_x, t4; \ - vpbroadcastb t6##_x, t6; \ - vpbroadcastb t5##_x, t5; \ - vpbroadcastb t7##_x, t7; \ \ /* P-function */ \ vpxor x5, x0, x0; \ @@ -162,11 +166,21 @@ vpxor x7, x2, x2; \ vpxor x4, x3, x3; \ \ + vpshufb t7, t2, t2; \ + vpsrldq $4, t0, t4; \ + vpshufb t7, t3, t3; \ + vpsrldq $5, t0, t5; \ + vpshufb t7, t4, t4; \ + \ vpxor x2, x4, x4; \ vpxor x3, x5, x5; \ vpxor x0, x6, x6; \ vpxor x1, x7, x7; \ \ + vpsrldq $6, t0, t6; \ + vpshufb t7, t5, t5; \ + vpshufb t7, t6, t6; \ + \ vpxor x7, x0, x0; \ vpxor x4, x1, x1; \ vpxor x5, x2, x2; \ @@ -179,12 +193,16 @@ \ /* Add key material and result to CD (x becomes new CD) */ \ \ - vpxor t7, x0, x0; \ - vpxor 4 * 32(mem_cd), x0, x0; \ - \ vpxor t6, x1, x1; \ vpxor 5 * 32(mem_cd), x1, x1; \ \ + vpsrldq $7, t0, t6; \ + vpshufb t7, t0, t0; \ + vpshufb t7, t6, t7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 32(mem_cd), x0, x0; \ + \ vpxor t5, x2, x2; \ vpxor 6 * 32(mem_cd), x2, x2; \ \ @@ -204,7 +222,7 @@ vpxor 3 * 32(mem_cd), x7, x7; /* - * Size optimization... with inlined roundsm16 binary would be over 5 times + * Size optimization... with inlined roundsm32 binary would be over 5 times * larger and would only marginally faster. */ .align 8 @@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) */ \ vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ vpxor tt0, tt0, tt0; \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpand l0, t0, t0; \ vpand l1, t1, t1; \ @@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ \ vpxor l4, t0, l4; \ + vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ vmovdqu l4, 4 * 32(l); \ vpxor l5, t1, l5; \ vmovdqu l5, 5 * 32(l); \ @@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) * rl ^= t2; \ */ \ \ - vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpor 4 * 32(r), t0, t0; \ vpor 5 * 32(r), t1, t1; \ @@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vpxor 2 * 32(r), t2, t2; \ vpxor 3 * 32(r), t3, t3; \ vmovdqu t0, 0 * 32(r); \ + vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ vmovdqu t1, 1 * 32(r); \ vmovdqu t2, 2 * 32(r); \ vmovdqu t3, 3 * 32(r); \ @@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) * t2 &= rl; \ * rr ^= rol32(t2, 1); \ */ \ - vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpand 0 * 32(r), t0, t0; \ vpand 1 * 32(r), t1, t1; \ @@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vpxor 6 * 32(r), t2, t2; \ vpxor 7 * 32(r), t3, t3; \ vmovdqu t0, 4 * 32(r); \ + vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ vmovdqu t1, 5 * 32(r); \ vmovdqu t2, 6 * 32(r); \ vmovdqu t3, 7 * 32(r); \ @@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) * ll ^= t0; \ */ \ \ - vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ - vpbroadcastb t0##_x, t3; \ + vpshufb tt0, t0, t3; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t2; \ + vpshufb tt0, t0, t2; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t1; \ + vpshufb tt0, t0, t1; \ vpsrldq $1, t0, t0; \ - vpbroadcastb t0##_x, t0; \ + vpshufb tt0, t0, t0; \ \ vpor l4, t0, t0; \ vpor l5, t1, t1; \ diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S new file mode 100644 index 000000000000..35e97569d05f --- /dev/null +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -0,0 +1,643 @@ +######################################################################## +# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions +# +# Copyright (c) 2013, Intel Corporation +# +# Authors: +# Erdinc Ozturk <erdinc.ozturk@intel.com> +# Vinodh Gopal <vinodh.gopal@intel.com> +# James Guilford <james.guilford@intel.com> +# Tim Chen <tim.c.chen@linux.intel.com> +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# +# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## +# Function API: +# UINT16 crc_t10dif_pcl( +# UINT16 init_crc, //initial CRC value, 16 bits +# const unsigned char *buf, //buffer pointer to calculate CRC on +# UINT64 len //buffer length in bytes (64-bit data) +# ); +# +# Reference paper titled "Fast CRC Computation for Generic +# Polynomials Using PCLMULQDQ Instruction" +# URL: http://www.intel.com/content/dam/www/public/us/en/documents +# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +# +# + +#include <linux/linkage.h> + +.text + +#define arg1 %rdi +#define arg2 %rsi +#define arg3 %rdx + +#define arg1_low32 %edi + +ENTRY(crc_t10dif_pcl) +.align 16 + + # adjust the 16-bit initial_crc value, scale it to 32 bits + shl $16, arg1_low32 + + # Allocate Stack Space + mov %rsp, %rcx + sub $16*2, %rsp + # align stack to 16 byte boundary + and $~(0x10 - 1), %rsp + + # check if smaller than 256 + cmp $256, arg3 + + # for sizes less than 128, we can't fold 64B at a time... + jl _less_than_128 + + + # load the initial crc value + movd arg1_low32, %xmm10 # initial crc + + # crc value does not need to be byte-reflected, but it needs + # to be moved to the high part of the register. + # because data will be byte-reflected and will align with + # initial crc at correct place. + pslldq $12, %xmm10 + + movdqa SHUF_MASK(%rip), %xmm11 + # receive the initial 64B data, xor the initial crc value + movdqu 16*0(arg2), %xmm0 + movdqu 16*1(arg2), %xmm1 + movdqu 16*2(arg2), %xmm2 + movdqu 16*3(arg2), %xmm3 + movdqu 16*4(arg2), %xmm4 + movdqu 16*5(arg2), %xmm5 + movdqu 16*6(arg2), %xmm6 + movdqu 16*7(arg2), %xmm7 + + pshufb %xmm11, %xmm0 + # XOR the initial_crc value + pxor %xmm10, %xmm0 + pshufb %xmm11, %xmm1 + pshufb %xmm11, %xmm2 + pshufb %xmm11, %xmm3 + pshufb %xmm11, %xmm4 + pshufb %xmm11, %xmm5 + pshufb %xmm11, %xmm6 + pshufb %xmm11, %xmm7 + + movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 + #imm value of pclmulqdq instruction + #will determine which constant to use + + ################################################################# + # we subtract 256 instead of 128 to save one instruction from the loop + sub $256, arg3 + + # at this section of the code, there is 64*x+y (0<=y<64) bytes of + # buffer. The _fold_64_B_loop will fold 64B at a time + # until we have 64+y Bytes of buffer + + + # fold 64B at a time. This section of the code folds 4 xmm + # registers in parallel +_fold_64_B_loop: + + # update the buffer pointer + add $128, arg2 # buf += 64# + + movdqu 16*0(arg2), %xmm9 + movdqu 16*1(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm0, %xmm8 + movdqa %xmm1, %xmm13 + pclmulqdq $0x0 , %xmm10, %xmm0 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0 , %xmm10, %xmm1 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm0 + xorps %xmm8 , %xmm0 + pxor %xmm12, %xmm1 + xorps %xmm13, %xmm1 + + movdqu 16*2(arg2), %xmm9 + movdqu 16*3(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm2, %xmm8 + movdqa %xmm3, %xmm13 + pclmulqdq $0x0, %xmm10, %xmm2 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0, %xmm10, %xmm3 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm2 + xorps %xmm8 , %xmm2 + pxor %xmm12, %xmm3 + xorps %xmm13, %xmm3 + + movdqu 16*4(arg2), %xmm9 + movdqu 16*5(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm4, %xmm8 + movdqa %xmm5, %xmm13 + pclmulqdq $0x0, %xmm10, %xmm4 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0, %xmm10, %xmm5 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm4 + xorps %xmm8 , %xmm4 + pxor %xmm12, %xmm5 + xorps %xmm13, %xmm5 + + movdqu 16*6(arg2), %xmm9 + movdqu 16*7(arg2), %xmm12 + pshufb %xmm11, %xmm9 + pshufb %xmm11, %xmm12 + movdqa %xmm6 , %xmm8 + movdqa %xmm7 , %xmm13 + pclmulqdq $0x0 , %xmm10, %xmm6 + pclmulqdq $0x11, %xmm10, %xmm8 + pclmulqdq $0x0 , %xmm10, %xmm7 + pclmulqdq $0x11, %xmm10, %xmm13 + pxor %xmm9 , %xmm6 + xorps %xmm8 , %xmm6 + pxor %xmm12, %xmm7 + xorps %xmm13, %xmm7 + + sub $128, arg3 + + # check if there is another 64B in the buffer to be able to fold + jge _fold_64_B_loop + ################################################################## + + + add $128, arg2 + # at this point, the buffer pointer is pointing at the last y Bytes + # of the buffer the 64B of folded data is in 4 of the xmm + # registers: xmm0, xmm1, xmm2, xmm3 + + + # fold the 8 xmm registers to 1 xmm register with different constants + + movdqa rk9(%rip), %xmm10 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm0 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm0, %xmm7 + + movdqa rk11(%rip), %xmm10 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm1 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm1, %xmm7 + + movdqa rk13(%rip), %xmm10 + movdqa %xmm2, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm2 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm2, %xmm7 + + movdqa rk15(%rip), %xmm10 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm3 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm3, %xmm7 + + movdqa rk17(%rip), %xmm10 + movdqa %xmm4, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm4 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm4, %xmm7 + + movdqa rk19(%rip), %xmm10 + movdqa %xmm5, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm5 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + xorps %xmm5, %xmm7 + + movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 + #imm value of pclmulqdq instruction + #will determine which constant to use + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm6 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm6, %xmm7 + + + # instead of 64, we add 48 to the loop counter to save 1 instruction + # from the loop instead of a cmp instruction, we use the negative + # flag with the jl instruction + add $128-16, arg3 + jl _final_reduction_for_128 + + # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 + # and the rest is in memory. We can fold 16 bytes at a time if y>=16 + # continue folding 16B at a time + +_16B_reduction_loop: + movdqa %xmm7, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm7 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + movdqu (arg2), %xmm0 + pshufb %xmm11, %xmm0 + pxor %xmm0 , %xmm7 + add $16, arg2 + sub $16, arg3 + # instead of a cmp instruction, we utilize the flags with the + # jge instruction equivalent of: cmp arg3, 16-16 + # check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + #now we have 16+z bytes left to reduce, where 0<= z < 16. + #first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + # check if any more data to fold. If not, compute the CRC of + # the final 128 bits + add $16, arg3 + je _128_done + + # here we are getting data that is less than 16 bytes. + # since we know that there was data before the pointer, we can + # offset the input pointer before the actual point, to receive + # exactly 16 bytes. after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa %xmm7, %xmm2 + + movdqu -16(arg2, arg3), %xmm1 + pshufb %xmm11, %xmm1 + + # get rid of the extra data that was loaded before + # load the shift constant + lea pshufb_shf_table+16(%rip), %rax + sub arg3, %rax + movdqu (%rax), %xmm0 + + # shift xmm2 to the left by arg3 bytes + pshufb %xmm0, %xmm2 + + # shift xmm7 to the right by 16-arg3 bytes + pxor mask1(%rip), %xmm0 + pshufb %xmm0, %xmm7 + pblendvb %xmm2, %xmm1 #xmm0 is implicit + + # fold 16 Bytes + movdqa %xmm1, %xmm2 + movdqa %xmm7, %xmm8 + pclmulqdq $0x11, %xmm10, %xmm7 + pclmulqdq $0x0 , %xmm10, %xmm8 + pxor %xmm8, %xmm7 + pxor %xmm2, %xmm7 + +_128_done: + # compute crc of a 128-bit value + movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 + movdqa %xmm7, %xmm0 + + #64b fold + pclmulqdq $0x1, %xmm10, %xmm7 + pslldq $8 , %xmm0 + pxor %xmm0, %xmm7 + + #32b fold + movdqa %xmm7, %xmm0 + + pand mask2(%rip), %xmm0 + + psrldq $12, %xmm7 + pclmulqdq $0x10, %xmm10, %xmm7 + pxor %xmm0, %xmm7 + + #barrett reduction +_barrett: + movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 + movdqa %xmm7, %xmm0 + pclmulqdq $0x01, %xmm10, %xmm7 + pslldq $4, %xmm7 + pclmulqdq $0x11, %xmm10, %xmm7 + + pslldq $4, %xmm7 + pxor %xmm0, %xmm7 + pextrd $1, %xmm7, %eax + +_cleanup: + # scale the result back to 16 bits + shr $16, %eax + mov %rcx, %rsp + ret + +######################################################################## + +.align 16 +_less_than_128: + + # check if there is enough buffer to be able to fold 16B at a time + cmp $32, arg3 + jl _less_than_32 + movdqa SHUF_MASK(%rip), %xmm11 + + # now if there is, load the constants + movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 + + movd arg1_low32, %xmm0 # get the initial crc value + pslldq $12, %xmm0 # align it to its correct place + movdqu (arg2), %xmm7 # load the plaintext + pshufb %xmm11, %xmm7 # byte-reflect the plaintext + pxor %xmm0, %xmm7 + + + # update the buffer pointer + add $16, arg2 + + # update the counter. subtract 32 instead of 16 to save one + # instruction from the loop + sub $32, arg3 + + jmp _16B_reduction_loop + + +.align 16 +_less_than_32: + # mov initial crc to the return value. this is necessary for + # zero-length buffers. + mov arg1_low32, %eax + test arg3, arg3 + je _cleanup + + movdqa SHUF_MASK(%rip), %xmm11 + + movd arg1_low32, %xmm0 # get the initial crc value + pslldq $12, %xmm0 # align it to its correct place + + cmp $16, arg3 + je _exact_16_left + jl _less_than_16_left + + movdqu (arg2), %xmm7 # load the plaintext + pshufb %xmm11, %xmm7 # byte-reflect the plaintext + pxor %xmm0 , %xmm7 # xor the initial crc value + add $16, arg2 + sub $16, arg3 + movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +.align 16 +_less_than_16_left: + # use stack space to load data less than 16 bytes, zero-out + # the 16B in memory first. + + pxor %xmm1, %xmm1 + mov %rsp, %r11 + movdqa %xmm1, (%r11) + + cmp $4, arg3 + jl _only_less_than_4 + + # backup the counter value + mov arg3, %r9 + cmp $8, arg3 + jl _less_than_8_left + + # load 8 Bytes + mov (arg2), %rax + mov %rax, (%r11) + add $8, %r11 + sub $8, arg3 + add $8, arg2 +_less_than_8_left: + + cmp $4, arg3 + jl _less_than_4_left + + # load 4 Bytes + mov (arg2), %eax + mov %eax, (%r11) + add $4, %r11 + sub $4, arg3 + add $4, arg2 +_less_than_4_left: + + cmp $2, arg3 + jl _less_than_2_left + + # load 2 Bytes + mov (arg2), %ax + mov %ax, (%r11) + add $2, %r11 + sub $2, arg3 + add $2, arg2 +_less_than_2_left: + cmp $1, arg3 + jl _zero_left + + # load 1 Byte + mov (arg2), %al + mov %al, (%r11) +_zero_left: + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + # shl r9, 4 + lea pshufb_shf_table+16(%rip), %rax + sub %r9, %rax + movdqu (%rax), %xmm0 + pxor mask1(%rip), %xmm0 + + pshufb %xmm0, %xmm7 + jmp _128_done + +.align 16 +_exact_16_left: + movdqu (arg2), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp $3, arg3 + jl _only_less_than_3 + + # load 3 Bytes + mov (arg2), %al + mov %al, (%r11) + + mov 1(arg2), %al + mov %al, 1(%r11) + + mov 2(arg2), %al + mov %al, 2(%r11) + + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + psrldq $5, %xmm7 + + jmp _barrett +_only_less_than_3: + cmp $2, arg3 + jl _only_less_than_2 + + # load 2 Bytes + mov (arg2), %al + mov %al, (%r11) + + mov 1(arg2), %al + mov %al, 1(%r11) + + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + psrldq $6, %xmm7 + + jmp _barrett +_only_less_than_2: + + # load 1 Byte + mov (arg2), %al + mov %al, (%r11) + + movdqa (%rsp), %xmm7 + pshufb %xmm11, %xmm7 + pxor %xmm0 , %xmm7 # xor the initial crc value + + psrldq $7, %xmm7 + + jmp _barrett + +ENDPROC(crc_t10dif_pcl) + +.data + +# precomputed constants +# these constants are precomputed from the poly: +# 0x8bb70000 (0x8bb7 scaled to 32 bits) +.align 16 +# Q = 0x18BB70000 +# rk1 = 2^(32*3) mod Q << 32 +# rk2 = 2^(32*5) mod Q << 32 +# rk3 = 2^(32*15) mod Q << 32 +# rk4 = 2^(32*17) mod Q << 32 +# rk5 = 2^(32*3) mod Q << 32 +# rk6 = 2^(32*2) mod Q << 32 +# rk7 = floor(2^64/Q) +# rk8 = Q +rk1: +.quad 0x2d56000000000000 +rk2: +.quad 0x06df000000000000 +rk3: +.quad 0x9d9d000000000000 +rk4: +.quad 0x7cf5000000000000 +rk5: +.quad 0x2d56000000000000 +rk6: +.quad 0x1368000000000000 +rk7: +.quad 0x00000001f65a57f8 +rk8: +.quad 0x000000018bb70000 + +rk9: +.quad 0xceae000000000000 +rk10: +.quad 0xbfd6000000000000 +rk11: +.quad 0x1e16000000000000 +rk12: +.quad 0x713c000000000000 +rk13: +.quad 0xf7f9000000000000 +rk14: +.quad 0x80a6000000000000 +rk15: +.quad 0x044c000000000000 +rk16: +.quad 0xe658000000000000 +rk17: +.quad 0xad18000000000000 +rk18: +.quad 0xa497000000000000 +rk19: +.quad 0x6ee3000000000000 +rk20: +.quad 0xe7b5000000000000 + + + +mask1: +.octa 0x80808080808080808080808080808080 +mask2: +.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF + +SHUF_MASK: +.octa 0x000102030405060708090A0B0C0D0E0F + +pshufb_shf_table: +# use these values for shift constants for the pshufb instruction +# different alignments result in values as shown: +# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 +# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 +# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 +# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 +# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 +# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 +# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 +# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 +# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 +# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 +# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 +# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 +# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 +# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 +# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 +.octa 0x8f8e8d8c8b8a89888786858483828100 +.octa 0x000e0d0c0b0a09080706050403020100 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c new file mode 100644 index 000000000000..7845d7fd54c0 --- /dev/null +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -0,0 +1,151 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions + * + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <asm/i387.h> +#include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> + +asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, + size_t len); + +struct chksum_desc_ctx { + __u16 crc; +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + if (irq_fpu_usable()) { + kernel_fpu_begin(); + ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); + kernel_fpu_end(); + } else + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + if (irq_fpu_usable()) { + kernel_fpu_begin(); + *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + kernel_fpu_end(); + } else + *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-pclmul", + .cra_priority = 200, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static const struct x86_cpu_id crct10dif_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); + +static int __init crct10dif_intel_mod_init(void) +{ + if (!x86_match_cpu(crct10dif_cpu_id)) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit crct10dif_intel_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crct10dif_intel_mod_init); +module_exit(crct10dif_intel_mod_fini); + +MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); +MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crct10dif"); +MODULE_ALIAS("crct10dif-pclmul"); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 597d4da69656..50226c4b86ed 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in) return 0; } -static struct shash_alg alg = { +static int sha224_ssse3_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_ssse3_final(desc, D); + + memcpy(hash, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); + + return 0; +} + +static struct shash_alg algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_ssse3_init, .update = sha256_ssse3_update, @@ -204,7 +233,24 @@ static struct shash_alg alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } -}; +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_ssse3_init, + .update = sha256_ssse3_update, + .final = sha224_ssse3_final, + .export = sha256_ssse3_export, + .import = sha256_ssse3_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ssse3", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) @@ -227,7 +273,7 @@ static bool __init avx_usable(void) static int __init sha256_ssse3_mod_init(void) { - /* test for SSE3 first */ + /* test for SSSE3 first */ if (cpu_has_ssse3) sha256_transform_asm = sha256_transform_ssse3; @@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void) else #endif pr_info("Using SSSE3 optimized SHA-256 implementation\n"); - return crypto_register_shash(&alg); + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } pr_info("Neither AVX nor SSSE3 is available/usable.\n"); @@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void) static void __exit sha256_ssse3_mod_fini(void) { - crypto_unregister_shash(&alg); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_init(sha256_ssse3_mod_init); @@ -273,3 +319,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS("sha256"); +MODULE_ALIAS("sha384"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 6cbd8df348d2..f30cd10293f0 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in) return 0; } -static struct shash_alg alg = { +static int sha384_ssse3_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA512_DIGEST_SIZE]; + + sha512_ssse3_final(desc, D); + + memcpy(hash, D, SHA384_DIGEST_SIZE); + memset(D, 0, SHA512_DIGEST_SIZE); + + return 0; +} + +static struct shash_alg algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_ssse3_init, .update = sha512_ssse3_update, @@ -211,7 +241,24 @@ static struct shash_alg alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } -}; +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_ssse3_init, + .update = sha512_ssse3_update, + .final = sha384_ssse3_final, + .export = sha512_ssse3_export, + .import = sha512_ssse3_import, + .descsize = sizeof(struct sha512_state), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-ssse3", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) @@ -234,7 +281,7 @@ static bool __init avx_usable(void) static int __init sha512_ssse3_mod_init(void) { - /* test for SSE3 first */ + /* test for SSSE3 first */ if (cpu_has_ssse3) sha512_transform_asm = sha512_transform_ssse3; @@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void) else #endif pr_info("Using SSSE3 optimized SHA-512 implementation\n"); - return crypto_register_shash(&alg); + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } pr_info("Neither AVX nor SSSE3 is available/usable.\n"); @@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void) static void __exit sha512_ssse3_mod_fini(void) { - crypto_unregister_shash(&alg); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_init(sha512_ssse3_mod_init); @@ -280,3 +327,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS("sha512"); +MODULE_ALIAS("sha384"); diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S deleted file mode 100644 index e1a83b9cd389..000000000000 --- a/arch/x86/crypto/twofish-avx2-asm_64.S +++ /dev/null @@ -1,600 +0,0 @@ -/* - * x86_64/AVX2 assembler optimized version of Twofish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/linkage.h> -#include "glue_helper-asm-avx2.S" - -.file "twofish-avx2-asm_64.S" - -.data -.align 16 - -.Lvpshufb_mask0: -.long 0x80808000 -.long 0x80808004 -.long 0x80808008 -.long 0x8080800c - -.Lbswap128_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -.Lxts_gf128mul_and_shl1_mask_0: - .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 -.Lxts_gf128mul_and_shl1_mask_1: - .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 - -.text - -/* structure of crypto context */ -#define s0 0 -#define s1 1024 -#define s2 2048 -#define s3 3072 -#define w 4096 -#define k 4128 - -/* register macros */ -#define CTX %rdi - -#define RS0 CTX -#define RS1 %r8 -#define RS2 %r9 -#define RS3 %r10 -#define RK %r11 -#define RW %rax -#define RROUND %r12 -#define RROUNDd %r12d - -#define RA0 %ymm8 -#define RB0 %ymm9 -#define RC0 %ymm10 -#define RD0 %ymm11 -#define RA1 %ymm12 -#define RB1 %ymm13 -#define RC1 %ymm14 -#define RD1 %ymm15 - -/* temp regs */ -#define RX0 %ymm0 -#define RY0 %ymm1 -#define RX1 %ymm2 -#define RY1 %ymm3 -#define RT0 %ymm4 -#define RIDX %ymm5 - -#define RX0x %xmm0 -#define RY0x %xmm1 -#define RX1x %xmm2 -#define RY1x %xmm3 -#define RT0x %xmm4 - -/* vpgatherdd mask and '-1' */ -#define RNOT %ymm6 - -/* byte mask, (-1 >> 24) */ -#define RBYTE %ymm7 - -/********************************************************************** - 16-way AVX2 twofish - **********************************************************************/ -#define init_round_constants() \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpsrld $24, RNOT, RBYTE; \ - leaq k(CTX), RK; \ - leaq w(CTX), RW; \ - leaq s1(CTX), RS1; \ - leaq s2(CTX), RS2; \ - leaq s3(CTX), RS3; \ - -#define g16(ab, rs0, rs1, rs2, rs3, xy) \ - vpand RBYTE, ab ## 0, RIDX; \ - vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - \ - vpand RBYTE, ab ## 1, RIDX; \ - vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - \ - vpsrld $8, ab ## 0, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 0, xy ## 0; \ - \ - vpsrld $8, ab ## 1, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 1, xy ## 1; \ - \ - vpsrld $16, ab ## 0, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 0, xy ## 0; \ - \ - vpsrld $16, ab ## 1, RIDX; \ - vpand RBYTE, RIDX, RIDX; \ - vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 1, xy ## 1; \ - \ - vpsrld $24, ab ## 0, RIDX; \ - vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 0, xy ## 0; \ - \ - vpsrld $24, ab ## 1, RIDX; \ - vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ - vpcmpeqd RNOT, RNOT, RNOT; \ - vpxor RT0, xy ## 1, xy ## 1; - -#define g1_16(a, x) \ - g16(a, RS0, RS1, RS2, RS3, x); - -#define g2_16(b, y) \ - g16(b, RS1, RS2, RS3, RS0, y); - -#define encrypt_round_end16(a, b, c, d, nk) \ - vpaddd RY0, RX0, RX0; \ - vpaddd RX0, RY0, RY0; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX0, RX0; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY0, RY0; \ - \ - vpxor RY0, d ## 0, d ## 0; \ - \ - vpxor RX0, c ## 0, c ## 0; \ - vpsrld $1, c ## 0, RT0; \ - vpslld $31, c ## 0, c ## 0; \ - vpor RT0, c ## 0, c ## 0; \ - \ - vpaddd RY1, RX1, RX1; \ - vpaddd RX1, RY1, RY1; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX1, RX1; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY1, RY1; \ - \ - vpxor RY1, d ## 1, d ## 1; \ - \ - vpxor RX1, c ## 1, c ## 1; \ - vpsrld $1, c ## 1, RT0; \ - vpslld $31, c ## 1, c ## 1; \ - vpor RT0, c ## 1, c ## 1; \ - -#define encrypt_round16(a, b, c, d, nk) \ - g2_16(b, RY); \ - \ - vpslld $1, b ## 0, RT0; \ - vpsrld $31, b ## 0, b ## 0; \ - vpor RT0, b ## 0, b ## 0; \ - \ - vpslld $1, b ## 1, RT0; \ - vpsrld $31, b ## 1, b ## 1; \ - vpor RT0, b ## 1, b ## 1; \ - \ - g1_16(a, RX); \ - \ - encrypt_round_end16(a, b, c, d, nk); - -#define encrypt_round_first16(a, b, c, d, nk) \ - vpslld $1, d ## 0, RT0; \ - vpsrld $31, d ## 0, d ## 0; \ - vpor RT0, d ## 0, d ## 0; \ - \ - vpslld $1, d ## 1, RT0; \ - vpsrld $31, d ## 1, d ## 1; \ - vpor RT0, d ## 1, d ## 1; \ - \ - encrypt_round16(a, b, c, d, nk); - -#define encrypt_round_last16(a, b, c, d, nk) \ - g2_16(b, RY); \ - \ - g1_16(a, RX); \ - \ - encrypt_round_end16(a, b, c, d, nk); - -#define decrypt_round_end16(a, b, c, d, nk) \ - vpaddd RY0, RX0, RX0; \ - vpaddd RX0, RY0, RY0; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX0, RX0; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY0, RY0; \ - \ - vpxor RX0, c ## 0, c ## 0; \ - \ - vpxor RY0, d ## 0, d ## 0; \ - vpsrld $1, d ## 0, RT0; \ - vpslld $31, d ## 0, d ## 0; \ - vpor RT0, d ## 0, d ## 0; \ - \ - vpaddd RY1, RX1, RX1; \ - vpaddd RX1, RY1, RY1; \ - vpbroadcastd nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RX1, RX1; \ - vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ - vpaddd RT0, RY1, RY1; \ - \ - vpxor RX1, c ## 1, c ## 1; \ - \ - vpxor RY1, d ## 1, d ## 1; \ - vpsrld $1, d ## 1, RT0; \ - vpslld $31, d ## 1, d ## 1; \ - vpor RT0, d ## 1, d ## 1; - -#define decrypt_round16(a, b, c, d, nk) \ - g1_16(a, RX); \ - \ - vpslld $1, a ## 0, RT0; \ - vpsrld $31, a ## 0, a ## 0; \ - vpor RT0, a ## 0, a ## 0; \ - \ - vpslld $1, a ## 1, RT0; \ - vpsrld $31, a ## 1, a ## 1; \ - vpor RT0, a ## 1, a ## 1; \ - \ - g2_16(b, RY); \ - \ - decrypt_round_end16(a, b, c, d, nk); - -#define decrypt_round_first16(a, b, c, d, nk) \ - vpslld $1, c ## 0, RT0; \ - vpsrld $31, c ## 0, c ## 0; \ - vpor RT0, c ## 0, c ## 0; \ - \ - vpslld $1, c ## 1, RT0; \ - vpsrld $31, c ## 1, c ## 1; \ - vpor RT0, c ## 1, c ## 1; \ - \ - decrypt_round16(a, b, c, d, nk) - -#define decrypt_round_last16(a, b, c, d, nk) \ - g1_16(a, RX); \ - \ - g2_16(b, RY); \ - \ - decrypt_round_end16(a, b, c, d, nk); - -#define encrypt_cycle16() \ - encrypt_round16(RA, RB, RC, RD, 0); \ - encrypt_round16(RC, RD, RA, RB, 8); - -#define encrypt_cycle_first16() \ - encrypt_round_first16(RA, RB, RC, RD, 0); \ - encrypt_round16(RC, RD, RA, RB, 8); - -#define encrypt_cycle_last16() \ - encrypt_round16(RA, RB, RC, RD, 0); \ - encrypt_round_last16(RC, RD, RA, RB, 8); - -#define decrypt_cycle16(n) \ - decrypt_round16(RC, RD, RA, RB, 8); \ - decrypt_round16(RA, RB, RC, RD, 0); - -#define decrypt_cycle_first16(n) \ - decrypt_round_first16(RC, RD, RA, RB, 8); \ - decrypt_round16(RA, RB, RC, RD, 0); - -#define decrypt_cycle_last16(n) \ - decrypt_round16(RC, RD, RA, RB, 8); \ - decrypt_round_last16(RA, RB, RC, RD, 0); - -#define transpose_4x4(x0,x1,x2,x3,t1,t2) \ - vpunpckhdq x1, x0, t2; \ - vpunpckldq x1, x0, x0; \ - \ - vpunpckldq x3, x2, t1; \ - vpunpckhdq x3, x2, x2; \ - \ - vpunpckhqdq t1, x0, x1; \ - vpunpcklqdq t1, x0, x0; \ - \ - vpunpckhqdq x2, t2, x3; \ - vpunpcklqdq x2, t2, x2; - -#define read_blocks8(offs,a,b,c,d) \ - transpose_4x4(a, b, c, d, RX0, RY0); - -#define write_blocks8(offs,a,b,c,d) \ - transpose_4x4(a, b, c, d, RX0, RY0); - -#define inpack_enc8(a,b,c,d) \ - vpbroadcastd 4*0(RW), RT0; \ - vpxor RT0, a, a; \ - \ - vpbroadcastd 4*1(RW), RT0; \ - vpxor RT0, b, b; \ - \ - vpbroadcastd 4*2(RW), RT0; \ - vpxor RT0, c, c; \ - \ - vpbroadcastd 4*3(RW), RT0; \ - vpxor RT0, d, d; - -#define outunpack_enc8(a,b,c,d) \ - vpbroadcastd 4*4(RW), RX0; \ - vpbroadcastd 4*5(RW), RY0; \ - vpxor RX0, c, RX0; \ - vpxor RY0, d, RY0; \ - \ - vpbroadcastd 4*6(RW), RT0; \ - vpxor RT0, a, c; \ - vpbroadcastd 4*7(RW), RT0; \ - vpxor RT0, b, d; \ - \ - vmovdqa RX0, a; \ - vmovdqa RY0, b; - -#define inpack_dec8(a,b,c,d) \ - vpbroadcastd 4*4(RW), RX0; \ - vpbroadcastd 4*5(RW), RY0; \ - vpxor RX0, a, RX0; \ - vpxor RY0, b, RY0; \ - \ - vpbroadcastd 4*6(RW), RT0; \ - vpxor RT0, c, a; \ - vpbroadcastd 4*7(RW), RT0; \ - vpxor RT0, d, b; \ - \ - vmovdqa RX0, c; \ - vmovdqa RY0, d; - -#define outunpack_dec8(a,b,c,d) \ - vpbroadcastd 4*0(RW), RT0; \ - vpxor RT0, a, a; \ - \ - vpbroadcastd 4*1(RW), RT0; \ - vpxor RT0, b, b; \ - \ - vpbroadcastd 4*2(RW), RT0; \ - vpxor RT0, c, c; \ - \ - vpbroadcastd 4*3(RW), RT0; \ - vpxor RT0, d, d; - -#define read_blocks16(a,b,c,d) \ - read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ - read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); - -#define write_blocks16(a,b,c,d) \ - write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ - write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); - -#define xor_blocks16(a,b,c,d) \ - xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ - xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); - -#define inpack_enc16(a,b,c,d) \ - inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ - inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); - -#define outunpack_enc16(a,b,c,d) \ - outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ - outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); - -#define inpack_dec16(a,b,c,d) \ - inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ - inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); - -#define outunpack_dec16(a,b,c,d) \ - outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ - outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); - -.align 8 -__twofish_enc_blk16: - /* input: - * %rdi: ctx, CTX - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext - * output: - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext - */ - init_round_constants(); - - read_blocks16(RA, RB, RC, RD); - inpack_enc16(RA, RB, RC, RD); - - xorl RROUNDd, RROUNDd; - encrypt_cycle_first16(); - movl $2, RROUNDd; - -.align 4 -.L__enc_loop: - encrypt_cycle16(); - - addl $2, RROUNDd; - cmpl $14, RROUNDd; - jne .L__enc_loop; - - encrypt_cycle_last16(); - - outunpack_enc16(RA, RB, RC, RD); - write_blocks16(RA, RB, RC, RD); - - ret; -ENDPROC(__twofish_enc_blk16) - -.align 8 -__twofish_dec_blk16: - /* input: - * %rdi: ctx, CTX - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext - * output: - * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext - */ - init_round_constants(); - - read_blocks16(RA, RB, RC, RD); - inpack_dec16(RA, RB, RC, RD); - - movl $14, RROUNDd; - decrypt_cycle_first16(); - movl $12, RROUNDd; - -.align 4 -.L__dec_loop: - decrypt_cycle16(); - - addl $-2, RROUNDd; - jnz .L__dec_loop; - - decrypt_cycle_last16(); - - outunpack_dec16(RA, RB, RC, RD); - write_blocks16(RA, RB, RC, RD); - - ret; -ENDPROC(__twofish_dec_blk16) - -ENTRY(twofish_ecb_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - pushq %r12; - - load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - call __twofish_enc_blk16; - - store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_ecb_enc_16way) - -ENTRY(twofish_ecb_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - pushq %r12; - - load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - call __twofish_dec_blk16; - - store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_ecb_dec_16way) - -ENTRY(twofish_cbc_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - vzeroupper; - pushq %r12; - - load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - call __twofish_dec_blk16; - - store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1, - RX0); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_cbc_dec_16way) - -ENTRY(twofish_ctr_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (little endian, 128bit) - */ - - vzeroupper; - pushq %r12; - - load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1, - RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, - RBYTE); - - call __twofish_enc_blk16; - - store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_ctr_16way) - -.align 8 -twofish_xts_crypt_16way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16 - */ - - vzeroupper; - pushq %r12; - - load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, - RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, - .Lxts_gf128mul_and_shl1_mask_0, - .Lxts_gf128mul_and_shl1_mask_1); - - call *%r8; - - store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); - - popq %r12; - vzeroupper; - - ret; -ENDPROC(twofish_xts_crypt_16way) - -ENTRY(twofish_xts_enc_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - leaq __twofish_enc_blk16, %r8; - jmp twofish_xts_crypt_16way; -ENDPROC(twofish_xts_enc_16way) - -ENTRY(twofish_xts_dec_16way) - /* input: - * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) - * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) - */ - leaq __twofish_dec_blk16, %r8; - jmp twofish_xts_crypt_16way; -ENDPROC(twofish_xts_dec_16way) diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c deleted file mode 100644 index ce33b5be64ee..000000000000 --- a/arch/x86/crypto/twofish_avx2_glue.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Glue Code for x86_64/AVX2 assembler optimized version of Twofish - * - * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/twofish.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/xcr.h> -#include <asm/xsave.h> -#include <asm/crypto/twofish.h> -#include <asm/crypto/ablk_helper.h> -#include <asm/crypto/glue_helper.h> -#include <crypto/scatterwalk.h> - -#define TF_AVX2_PARALLEL_BLOCKS 16 - -/* 16-way AVX2 parallel cipher functions */ -asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); - -asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src, - le128 *iv); - -asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, false); -} - -static const struct common_glue_ctx twofish_enc = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) } - }, { - .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } - }, { - .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } - }, { - .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } - } } -}; - -static const struct common_glue_ctx twofish_ctr = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) } - }, { - .num_blocks = 8, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } - }, { - .num_blocks = 3, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } - }, { - .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } - } } -}; - -static const struct common_glue_ctx twofish_enc_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) } - }, { - .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } - }, { - .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } - } } -}; - -static const struct common_glue_ctx twofish_dec = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) } - }, { - .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } - }, { - .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } - }, { - .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } - } } -}; - -static const struct common_glue_ctx twofish_dec_cbc = { - .num_funcs = 4, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) } - }, { - .num_blocks = 8, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } - }, { - .num_blocks = 3, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } - }, { - .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } - } } -}; - -static const struct common_glue_ctx twofish_dec_xts = { - .num_funcs = 3, - .fpu_blocks_limit = 8, - - .funcs = { { - .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) } - }, { - .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } - }, { - .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } - } } -}; - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); -} - -static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ - return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); -} - -static inline void twofish_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -struct crypt_priv { - struct twofish_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { - twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= 8 * bsize) { - twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 8; - nbytes -= bsize * 8; - } - - while (nbytes >= 3 * bsize) { - twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 3; - nbytes -= bsize * 3; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { - twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= 8 * bsize) { - twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 8; - nbytes -= bsize * 8; - } - - while (nbytes >= 3 * bsize) { - twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * 3; - nbytes -= bsize * 3; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TF_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TF_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg tf_algs[10] = { { - .cra_name = "__ecb-twofish-avx2", - .cra_driver_name = "__driver-ecb-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-twofish-avx2", - .cra_driver_name = "__driver-cbc-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-twofish-avx2", - .cra_driver_name = "__driver-ctr-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-twofish-avx2", - .cra_driver_name = "__driver-lrw-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-twofish-avx2", - .cra_driver_name = "__driver-xts-twofish-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; - -static int __init init(void) -{ - u64 xcr0; - - if (!cpu_has_avx2 || !cpu_has_osxsave) { - pr_info("AVX2 instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX2 detected but unusable.\n"); - return -ENODEV; - } - - return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 2047a562f6b3..a62ba541884e 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -50,26 +50,18 @@ /* 8-way parallel cipher functions */ asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way); - asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way); asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way); - asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(twofish_ctr_8way); asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(twofish_xts_enc_8way); asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); -EXPORT_SYMBOL_GPL(twofish_xts_dec_8way); static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) @@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, __twofish_enc_blk_3way(ctx, dst, src, false); } -void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(twofish_enc_blk)); } -EXPORT_SYMBOL_GPL(twofish_xts_enc); -void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(twofish_dec_blk)); } -EXPORT_SYMBOL_GPL(twofish_xts_dec); static const struct common_glue_ctx twofish_enc = { diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index b31bf97775fc..2dfac58f3b11 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -111,7 +111,7 @@ static inline void acpi_disable_pci(void) } /* Low-level suspend routine. */ -extern int acpi_suspend_lowlevel(void); +extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ #define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h deleted file mode 100644 index f097b2face10..000000000000 --- a/arch/x86/include/asm/crypto/blowfish.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef ASM_X86_BLOWFISH_H -#define ASM_X86_BLOWFISH_H - -#include <linux/crypto.h> -#include <crypto/blowfish.h> - -#define BF_PARALLEL_BLOCKS 4 - -/* regular block cipher functions */ -asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, - bool xor); -asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); - -/* 4-way parallel cipher functions */ -asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, false); -} - -static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, true); -} - -static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, false); -} - -static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, true); -} - -#endif diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index e655c6029b45..878c51ceebb5 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src); -/* 8-way parallel cipher functions */ -asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - /* helpers from twofish_x86_64-3way module */ extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, @@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); -/* helpers from twofish-avx module */ -extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); - #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index 75ce3f47d204..77a99ac06d00 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -1,18 +1,6 @@ #ifndef _ASM_X86_EMERGENCY_RESTART_H #define _ASM_X86_EMERGENCY_RESTART_H -enum reboot_type { - BOOT_TRIPLE = 't', - BOOT_KBD = 'k', - BOOT_BIOS = 'b', - BOOT_ACPI = 'a', - BOOT_EFI = 'e', - BOOT_CF9 = 'p', - BOOT_CF9_COND = 'q', -}; - -extern enum reboot_type reboot_type; - extern void machine_emergency_restart(void); #endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d8e8eefbe24c..34f69cb9350a 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -345,4 +345,11 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, #define IO_SPACE_LIMIT 0xffff +#ifdef CONFIG_MTRR +extern int __must_check arch_phys_wc_add(unsigned long base, + unsigned long size); +extern void arch_phys_wc_del(int handle); +#define arch_phys_wc_add arch_phys_wc_add +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index af9c5525434d..f87f7fcefa0a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -222,14 +222,22 @@ struct kvm_mmu_page { int root_count; /* Currently serving as active root */ unsigned int unsync_children; unsigned long parent_ptes; /* Reverse mapping for parent_pte */ + + /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ + unsigned long mmu_valid_gen; + DECLARE_BITMAP(unsync_child_bitmap, 512); #ifdef CONFIG_X86_32 + /* + * Used out of the mmu-lock to avoid reading spte values while an + * update is in progress; see the comments in __get_spte_lockless(). + */ int clear_spte_count; #endif + /* Number of writes since the last time traversal visited this page. */ int write_flooding_count; - bool mmio_cached; }; struct kvm_pio_request { @@ -529,11 +537,14 @@ struct kvm_arch { unsigned int n_requested_mmu_pages; unsigned int n_max_mmu_pages; unsigned int indirect_shadow_pages; + unsigned long mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; + struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; int iommu_flags; @@ -769,7 +780,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); -void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index d354fb781c57..a55c7efcc4ed 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(unsigned long nowtime); -extern unsigned long mach_get_cmos_time(void); +extern int mach_set_rtc_mmss(const struct timespec *now); +extern void mach_get_cmos_time(struct timespec *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h index 73668abdbedf..1e69a75412a4 100644 --- a/arch/x86/include/asm/mrst-vrtc.h +++ b/arch/x86/include/asm/mrst-vrtc.h @@ -3,7 +3,7 @@ extern unsigned char vrtc_cmos_read(unsigned char reg); extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern unsigned long vrtc_get_time(void); -extern int vrtc_set_mmss(unsigned long nowtime); +extern void vrtc_get_time(struct timespec *now); +extern int vrtc_set_mmss(const struct timespec *now); #endif diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index e235582f9930..f768f6298419 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -26,7 +26,10 @@ #include <uapi/asm/mtrr.h> -/* The following functions are for use by other drivers */ +/* + * The following functions are for use by other drivers that cannot use + * arch_phys_wc_add and arch_phys_wc_del. + */ # ifdef CONFIG_MTRR extern u8 mtrr_type_lookup(u64 addr, u64 end); extern void mtrr_save_fixed_ranges(void *); @@ -45,6 +48,7 @@ extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); +extern int phys_wc_to_mtrr_index(int handle); # else static inline u8 mtrr_type_lookup(u64 addr, u64 end) { @@ -80,6 +84,10 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } +static inline int phys_wc_to_mtrr_index(int handle) +{ + return -1; +} #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5b0818bc8963..7dc305a46058 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -207,7 +207,7 @@ static inline pte_t pte_mkexec(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return pte_set_flags(pte, _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) @@ -271,7 +271,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_DIRTY); + return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mkhuge(pmd_t pmd) @@ -294,6 +294,26 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_PRESENT); } +static inline int pte_soft_dirty(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SOFT_DIRTY; +} + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); +} + /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e6423002c10b..c98ac63aae48 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -55,6 +55,18 @@ #define _PAGE_HIDDEN (_AT(pteval_t, 0)) #endif +/* + * The same hidden bit is used by kmemcheck, but since kmemcheck + * works on kernel pages while soft-dirty engine on user space, + * they do not conflict with each other. + */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) +#else +#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1df6e84691f..27811190cbd7 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,7 +89,6 @@ struct thread_info { #define TIF_FORK 18 /* ret_from_fork */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ -#define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ @@ -113,7 +112,6 @@ struct thread_info { #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) #define _TIF_NOHZ (1 << TIF_NOHZ) -#define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) @@ -154,7 +152,7 @@ struct thread_info { (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define PREEMPT_ACTIVE 0x10000000 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d8d99222b36a..828a1565ba57 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,8 @@ struct x86_cpuinit_ops { void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; +struct timespec; + /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC @@ -156,8 +158,8 @@ struct x86_cpuinit_ops { */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long nowtime); + void (*get_wallclock)(struct timespec *ts); + int (*set_wallclock)(const struct timespec *ts); void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 230c8ea878e5..d81a972dd506 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -44,6 +44,7 @@ #include <asm/mpspec.h> #include <asm/smp.h> +#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ static int __initdata acpi_force = 0; u32 acpi_rsdt_forced; int acpi_disabled; @@ -559,6 +560,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; +#ifdef CONFIG_ACPI_SLEEP +int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel; +#else +int (*acpi_suspend_lowlevel)(void); +#endif + /* * success: return IRQ number (>=0) * failure: return < 0 diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index b44577bc9744..2a34aaf3c8f1 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -26,12 +26,12 @@ static char temp_stack[4096]; #endif /** - * acpi_suspend_lowlevel - save kernel state + * x86_acpi_suspend_lowlevel - save kernel state * * Create an identity mapped page table and copy the wakeup routine to * low memory. */ -int acpi_suspend_lowlevel(void) +int x86_acpi_suspend_lowlevel(void) { struct wakeup_header *header = (struct wakeup_header *) __va(real_mode_header->wakeup_header); diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 67f59f8c6956..c9c2c982d5e4 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -15,3 +15,5 @@ extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); extern void do_suspend_lowlevel(void); + +extern int x86_acpi_suspend_lowlevel(void); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 39cc7f7acab3..63092afb142e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -25,6 +25,7 @@ #include <linux/kdebug.h> #include <linux/delay.h> #include <linux/crash_dump.h> +#include <linux/reboot.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -36,7 +37,6 @@ #include <asm/ipi.h> #include <asm/smp.h> #include <asm/x86_init.h> -#include <asm/emergency-restart.h> #include <asm/nmi.h> /* BMC sets a bit this MMR non-zero before sending an NMI */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5013a48d1aff..c587a8757227 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -90,7 +90,7 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) { u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); + int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); if (c->x86_model < 6) { /* Based on AMD doc 20734R - June 2000 */ diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2f3a7995e56a..98f2083832eb 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -182,11 +182,6 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - else - printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package", - state->count); return 1; } if (old_event) { @@ -194,10 +189,6 @@ static int therm_throt_process(bool new_event, int event, int level) printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); - else - printk(KERN_INFO "CPU%d: %s power limit normal\n", - this_cpu, - level == CORE_LEVEL ? "Core" : "Package"); return 1; } @@ -220,6 +211,15 @@ static int thresh_event_valid(int event) return 1; } +static bool int_pln_enable; +static int __init int_pln_enable_setup(char *s) +{ + int_pln_enable = true; + + return 1; +} +__setup("int_pln_enable", int_pln_enable_setup); + #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ static __cpuinit int thermal_throttle_add_dev(struct device *dev, @@ -232,7 +232,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev, if (err) return err; - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_core_power_limit_count.attr, thermal_attr_group.name); @@ -240,7 +240,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev, err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_package_throttle_count.attr, thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) err = sysfs_add_file_to_group(&dev->kobj, &dev_attr_package_power_limit_count.attr, thermal_attr_group.name); @@ -353,7 +353,7 @@ static void intel_thermal_interrupt(void) CORE_LEVEL) != 0) mce_log_therm_throt_event(msr_val); - if (this_cpu_has(X86_FEATURE_PLN)) + if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, CORE_LEVEL); @@ -363,7 +363,7 @@ static void intel_thermal_interrupt(void) therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, THERMAL_THROTTLING_EVENT, PACKAGE_LEVEL); - if (this_cpu_has(X86_FEATURE_PLN)) + if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & PACKAGE_THERM_STATUS_POWER_LIMIT, POWER_LIMIT_EVENT, @@ -482,9 +482,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c) apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE + (l | (THERM_INT_LOW_ENABLE + | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h); + else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) + wrmsr(MSR_IA32_THERM_INTERRUPT, + l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); else wrmsr(MSR_IA32_THERM_INTERRUPT, @@ -492,9 +496,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_PTS)) { rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); - if (cpu_has(c, X86_FEATURE_PLN)) + if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, - l | (PACKAGE_THERM_INT_LOW_ENABLE + (l | (PACKAGE_THERM_INT_LOW_ENABLE + | PACKAGE_THERM_INT_HIGH_ENABLE)) + & ~PACKAGE_THERM_INT_PLN_ENABLE, h); + else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | (PACKAGE_THERM_INT_LOW_ENABLE | PACKAGE_THERM_INT_HIGH_ENABLE | PACKAGE_THERM_INT_PLN_ENABLE), h); else diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ca22b73aaa25..f961de9964c7 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -51,9 +51,13 @@ #include <asm/e820.h> #include <asm/mtrr.h> #include <asm/msr.h> +#include <asm/pat.h> #include "mtrr.h" +/* arch_phys_wc_add returns an MTRR register index plus this offset. */ +#define MTRR_TO_PHYS_WC_OFFSET 1000 + u32 num_var_ranges; unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; @@ -525,6 +529,73 @@ int mtrr_del(int reg, unsigned long base, unsigned long size) } EXPORT_SYMBOL(mtrr_del); +/** + * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable + * @base: Physical base address + * @size: Size of region + * + * If PAT is available, this does nothing. If PAT is unavailable, it + * attempts to add a WC MTRR covering size bytes starting at base and + * logs an error if this fails. + * + * Drivers must store the return value to pass to mtrr_del_wc_if_needed, + * but drivers should not try to interpret that return value. + */ +int arch_phys_wc_add(unsigned long base, unsigned long size) +{ + int ret; + + if (pat_enabled) + return 0; /* Success! (We don't need to do anything.) */ + + ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); + if (ret < 0) { + pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.", + (void *)base, (void *)(base + size - 1)); + return ret; + } + return ret + MTRR_TO_PHYS_WC_OFFSET; +} +EXPORT_SYMBOL(arch_phys_wc_add); + +/* + * arch_phys_wc_del - undoes arch_phys_wc_add + * @handle: Return value from arch_phys_wc_add + * + * This cleans up after mtrr_add_wc_if_needed. + * + * The API guarantees that mtrr_del_wc_if_needed(error code) and + * mtrr_del_wc_if_needed(0) do nothing. + */ +void arch_phys_wc_del(int handle) +{ + if (handle >= 1) { + WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET); + mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0); + } +} +EXPORT_SYMBOL(arch_phys_wc_del); + +/* + * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value + * @handle: Return value from arch_phys_wc_add + * + * This will turn the return value from arch_phys_wc_add into an mtrr + * index suitable for debugging. + * + * Note: There is no legitimate use for this function, except possibly + * in printk line. Alas there is an illegitimate use in some ancient + * drm ioctls. + */ +int phys_wc_to_mtrr_index(int handle) +{ + if (handle < MTRR_TO_PHYS_WC_OFFSET) + return -1; + else + return handle - MTRR_TO_PHYS_WC_OFFSET; +} +EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); + /* * HACK ALERT! * These should be called implicitly, but we can't yet until all the initcall diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 7b3fe56b1c21..31f0f335ed22 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c @@ -11,10 +11,10 @@ const char *const x86_power_flags[32] = { "fid", /* frequency id control */ "vid", /* voltage id control */ "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", + "tm", /* hardware thermal control */ + "stc", /* software thermal control */ + "100mhzsteps", /* 100 MHz multiplier control */ + "hwpstate", /* hardware P-state control */ "", /* tsc invariant mapped to constant_tsc */ "cpb", /* core performance boost */ "eff_freq_ro", /* Readonly aperf/mperf */ diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b1581527a236..4934890e4db2 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -364,9 +364,7 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num, * and assigned so we can keep the 1:1 mapping which the ioapic * is having. */ - ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); - if (ret) - pr_err("Error mapping legacy IRQs: %d\n", ret); + irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); if (num > NR_IRQS_LEGACY) { ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 02f07634d265..f66ff162dce8 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -393,6 +393,9 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) unregister_hw_breakpoint(t->ptrace_bps[i]); t->ptrace_bps[i] = NULL; } + + t->debugreg6 = 0; + t->ptrace_dr7 = 0; } void hw_breakpoint_restore(void) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3dd37ebd591b..1f354f4b602b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock; * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -static unsigned long kvm_get_wallclock(void) +static void kvm_get_wallclock(struct timespec *now) { struct pvclock_vcpu_time_info *vcpu_time; - struct timespec ts; int low, high; int cpu; @@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void) cpu = smp_processor_id(); vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + pvclock_read_wallclock(&wall_clock, vcpu_time, now); preempt_enable(); - - return ts.tv_sec; } -static int kvm_set_wallclock(unsigned long now) +static int kvm_set_wallclock(const struct timespec *now) { return -1; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 29a8120e6fe8..7461f50d5bb1 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -601,30 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } -static int -ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, - struct task_struct *tsk, int disabled) +static int ptrace_fill_bp_fields(struct perf_event_attr *attr, + int len, int type, bool disabled) +{ + int err, bp_len, bp_type; + + err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); + if (!err) { + attr->bp_len = bp_len; + attr->bp_type = bp_type; + attr->disabled = disabled; + } + + return err; +} + +static struct perf_event * +ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, + unsigned long addr, bool disabled) { - int err; - int gen_len, gen_type; struct perf_event_attr attr; + int err; - /* - * We should have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) - return -EINVAL; + ptrace_breakpoint_init(&attr); + attr.bp_addr = addr; - err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + err = ptrace_fill_bp_fields(&attr, len, type, disabled); if (err) - return err; + return ERR_PTR(err); + + return register_user_hw_breakpoint(&attr, ptrace_triggered, + NULL, tsk); +} - attr = bp->attr; - attr.bp_len = gen_len; - attr.bp_type = gen_type; - attr.disabled = disabled; +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + int disabled) +{ + struct perf_event_attr attr = bp->attr; + int err; + + err = ptrace_fill_bp_fields(&attr, len, type, disabled); + if (err) + return err; return modify_user_hw_breakpoint(bp, &attr); } @@ -634,67 +652,50 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *thread = &tsk->thread; unsigned long old_dr7; - int i, orig_ret = 0, rc = 0; - int enabled, second_pass = 0; - unsigned len, type; - struct perf_event *bp; - - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; + bool second_pass = false; + int i, rc, ret = 0; data &= ~DR_CONTROL_RESERVED; old_dr7 = ptrace_get_dr7(thread->ptrace_bps); + restore: - /* - * Loop through all the hardware breakpoints, making the - * appropriate changes to each. - */ + rc = 0; for (i = 0; i < HBP_NUM; i++) { - enabled = decode_dr7(data, i, &len, &type); - bp = thread->ptrace_bps[i]; - - if (!enabled) { - if (bp) { - /* - * Don't unregister the breakpoints right-away, - * unless all register_user_hw_breakpoint() - * requests have succeeded. This prevents - * any window of opportunity for debug - * register grabbing by other users. - */ - if (!second_pass) - continue; - - rc = ptrace_modify_breakpoint(bp, len, type, - tsk, 1); - if (rc) - break; + unsigned len, type; + bool disabled = !decode_dr7(data, i, &len, &type); + struct perf_event *bp = thread->ptrace_bps[i]; + + if (!bp) { + if (disabled) + continue; + + bp = ptrace_register_breakpoint(tsk, + len, type, 0, disabled); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + break; } + + thread->ptrace_bps[i] = bp; continue; } - rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + rc = ptrace_modify_breakpoint(bp, len, type, disabled); if (rc) break; } - /* - * Make a second pass to free the remaining unused breakpoints - * or to restore the original breakpoints if an error occurred. - */ - if (!second_pass) { - second_pass = 1; - if (rc < 0) { - orig_ret = rc; - data = old_dr7; - } + + /* Restore if the first pass failed, second_pass shouldn't fail. */ + if (rc && !WARN_ON(second_pass)) { + ret = rc; + data = old_dr7; + second_pass = true; goto restore; } - ptrace_put_breakpoints(tsk); - - return ((orig_ret < 0) ? orig_ret : rc); + return ret; } /* @@ -702,25 +703,17 @@ restore: */ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *thread = &tsk->thread; unsigned long val = 0; if (n < HBP_NUM) { - struct perf_event *bp; + struct perf_event *bp = thread->ptrace_bps[n]; - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - bp = thread->ptrace_bps[n]; - if (!bp) - val = 0; - else + if (bp) val = bp->hw.info.address; - - ptrace_put_breakpoints(tsk); } else if (n == 6) { val = thread->debugreg6; - } else if (n == 7) { + } else if (n == 7) { val = thread->ptrace_dr7; } return val; @@ -729,29 +722,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, unsigned long addr) { - struct perf_event *bp; struct thread_struct *t = &tsk->thread; - struct perf_event_attr attr; + struct perf_event *bp = t->ptrace_bps[nr]; int err = 0; - if (ptrace_get_breakpoints(tsk) < 0) - return -ESRCH; - - if (!t->ptrace_bps[nr]) { - ptrace_breakpoint_init(&attr); - /* - * Put stub len and type to register (reserve) an inactive but - * correct bp - */ - attr.bp_addr = addr; - attr.bp_len = HW_BREAKPOINT_LEN_1; - attr.bp_type = HW_BREAKPOINT_W; - attr.disabled = 1; - - bp = register_user_hw_breakpoint(&attr, ptrace_triggered, - NULL, tsk); - + if (!bp) { /* + * Put stub len and type to create an inactive but correct bp. + * * CHECKME: the previous code returned -EIO if the addr wasn't * a valid task virtual addr. The new one will return -EINVAL in * this case. @@ -760,22 +738,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, * writing for the user. And anyway this is the previous * behaviour. */ - if (IS_ERR(bp)) { + bp = ptrace_register_breakpoint(tsk, + X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, + addr, true); + if (IS_ERR(bp)) err = PTR_ERR(bp); - goto put; - } - - t->ptrace_bps[nr] = bp; + else + t->ptrace_bps[nr] = bp; } else { - bp = t->ptrace_bps[nr]; + struct perf_event_attr attr = bp->attr; - attr = bp->attr; attr.bp_addr = addr; err = modify_user_hw_breakpoint(bp, &attr); } -put: - ptrace_put_breakpoints(tsk); return err; } @@ -785,30 +761,20 @@ put: static int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) { - struct thread_struct *thread = &(tsk->thread); - int rc = 0; - + struct thread_struct *thread = &tsk->thread; /* There are no DR4 or DR5 registers */ - if (n == 4 || n == 5) - return -EIO; + int rc = -EIO; - if (n == 6) { - thread->debugreg6 = val; - goto ret_path; - } if (n < HBP_NUM) { rc = ptrace_set_breakpoint_addr(tsk, n, val); - if (rc) - return rc; - } - /* All that's left is DR7 */ - if (n == 7) { + } else if (n == 6) { + thread->debugreg6 = val; + rc = 0; + } else if (n == 7) { rc = ptrace_write_dr7(tsk, val); if (!rc) thread->ptrace_dr7 = val; } - -ret_path: return rc; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 76fa1e9a2b39..563ed91e6faa 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -36,22 +36,6 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; -static int reboot_mode; -enum reboot_type reboot_type = BOOT_ACPI; -int reboot_force; - -/* - * This variable is used privately to keep track of whether or not - * reboot_type is still set to its default value (i.e., reboot= hasn't - * been set on the command line). This is needed so that we can - * suppress DMI scanning for reboot quirks. Without it, it's - * impossible to override a faulty reboot quirk without recompiling. - */ -static int reboot_default = 1; - -#ifdef CONFIG_SMP -static int reboot_cpu = -1; -#endif /* * This is set if we need to go through the 'emergency' path. @@ -64,79 +48,6 @@ static int reboot_emergency; bool port_cf9_safe = false; /* - * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] - * warm Don't set the cold reboot flag - * cold Set the cold reboot flag - * bios Reboot by jumping through the BIOS - * smp Reboot by executing reset on BSP or other CPU - * triple Force a triple fault (init) - * kbd Use the keyboard controller. cold reset (default) - * acpi Use the RESET_REG in the FADT - * efi Use efi reset_system runtime service - * pci Use the so-called "PCI reset register", CF9 - * force Avoid anything that could hang. - */ -static int __init reboot_setup(char *str) -{ - for (;;) { - /* - * Having anything passed on the command line via - * reboot= will cause us to disable DMI checking - * below. - */ - reboot_default = 0; - - switch (*str) { - case 'w': - reboot_mode = 0x1234; - break; - - case 'c': - reboot_mode = 0; - break; - -#ifdef CONFIG_SMP - case 's': - if (isdigit(*(str+1))) { - reboot_cpu = (int) (*(str+1) - '0'); - if (isdigit(*(str+2))) - reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); - } - /* - * We will leave sorting out the final value - * when we are ready to reboot, since we might not - * have detected BSP APIC ID or smp_num_cpu - */ - break; -#endif /* CONFIG_SMP */ - - case 'b': - case 'a': - case 'k': - case 't': - case 'e': - case 'p': - reboot_type = *str; - break; - - case 'f': - reboot_force = 1; - break; - } - - str = strchr(str, ','); - if (str) - str++; - else - break; - } - return 1; -} - -__setup("reboot=", reboot_setup); - - -/* * Reboot options and system auto-detection code provided by * Dell Inc. so their systems "just work". :-) */ @@ -536,6 +447,7 @@ static void native_machine_emergency_restart(void) int i; int attempt = 0; int orig_reboot_type = reboot_type; + unsigned short mode; if (reboot_emergency) emergency_vmx_disable_all(); @@ -543,7 +455,8 @@ static void native_machine_emergency_restart(void) tboot_shutdown(TB_SHUTDOWN_REBOOT); /* Tell the BIOS if we want cold or warm reboot */ - *((unsigned short *)__va(0x472)) = reboot_mode; + mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; + *((unsigned short *)__va(0x472)) = mode; for (;;) { /* Could also try the reset bit in the Hammer NB */ @@ -585,7 +498,7 @@ static void native_machine_emergency_restart(void) case BOOT_EFI: if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi.reset_system(reboot_mode ? + efi.reset_system(reboot_mode == REBOOT_WARM ? EFI_RESET_WARM : EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); @@ -614,26 +527,10 @@ void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP - - /* The boot cpu is always logical cpu 0 */ - int reboot_cpu_id = 0; - - /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && - cpu_online(reboot_cpu)) - reboot_cpu_id = reboot_cpu; - - /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) - reboot_cpu_id = smp_processor_id(); - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); - /* - * O.K Now that I'm on the appropriate processor, stop all of the - * others. Also disable the local irq to not receive the per-cpu - * timer interrupt which may trigger scheduler's load balance. + * Stop all of the others. Also disable the local irq to + * not receive the per-cpu timer interrupt which may trigger + * scheduler's load balance. */ local_irq_disable(); stop_other_cpus(); diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 198eb201ed3b..0aa29394ed6f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock); * jump to the next second precisely 500 ms later. Check the Motorola * MC146818A or Dallas DS12887 data sheet for details. */ -int mach_set_rtc_mmss(unsigned long nowtime) +int mach_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; struct rtc_time tm; int retval = 0; @@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) return retval; } -unsigned long mach_get_cmos_time(void) +void mach_get_cmos_time(struct timespec *now) { unsigned int status, year, mon, day, hour, min, sec, century = 0; unsigned long flags; @@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void) } else year += CMOS_YEARS_OFFS; - return mktime(year, mon, day, hour, min, sec); + now->tv_sec = mktime(year, mon, day, hour, min, sec); + now->tv_nsec = 0; } /* Routines for accessing the CMOS RAM/RTC. */ @@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write); int update_persistent_clock(struct timespec now) { - return x86_platform.set_wallclock(now.tv_sec); + return x86_platform.set_wallclock(&now); } /* not static: needed by APM */ void read_persistent_clock(struct timespec *ts) { - unsigned long retval; - - retval = x86_platform.get_wallclock(); - - ts->tv_sec = retval; - ts->tv_nsec = 0; + x86_platform.get_wallclock(ts); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 56f7fcfe7fa2..e68709da8251 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1040,8 +1040,6 @@ void __init setup_arch(char **cmdline_p) /* max_low_pfn get updated here */ find_low_pfn_range(); #else - num_physpages = max_pfn; - check_x2apic(); /* How many end-of-memory variables you have, grandma! */ diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index f4fe0b8879e0..cdaa347dfcad 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -265,23 +265,30 @@ void smp_reschedule_interrupt(struct pt_regs *regs) */ } -void smp_trace_reschedule_interrupt(struct pt_regs *regs) +static inline void smp_entering_irq(void) { ack_APIC_irq(); + irq_enter(); +} + +void smp_trace_reschedule_interrupt(struct pt_regs *regs) +{ + /* + * Need to call irq_enter() before calling the trace point. + * __smp_reschedule_interrupt() calls irq_enter/exit() too (in + * scheduler_ipi(). This is OK, since those functions are allowed + * to nest. + */ + smp_entering_irq(); trace_reschedule_entry(RESCHEDULE_VECTOR); __smp_reschedule_interrupt(); trace_reschedule_exit(RESCHEDULE_VECTOR); + exiting_irq(); /* * KVM uses this interrupt to force a cpu out of guest mode */ } -static inline void call_function_entering_irq(void) -{ - ack_APIC_irq(); - irq_enter(); -} - static inline void __smp_call_function_interrupt(void) { generic_smp_call_function_interrupt(); @@ -290,14 +297,14 @@ static inline void __smp_call_function_interrupt(void) void smp_call_function_interrupt(struct pt_regs *regs) { - call_function_entering_irq(); + smp_entering_irq(); __smp_call_function_interrupt(); exiting_irq(); } void smp_trace_call_function_interrupt(struct pt_regs *regs) { - call_function_entering_irq(); + smp_entering_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); __smp_call_function_interrupt(); trace_call_function_exit(CALL_FUNCTION_VECTOR); @@ -312,14 +319,14 @@ static inline void __smp_call_function_single_interrupt(void) void smp_call_function_single_interrupt(struct pt_regs *regs) { - call_function_entering_irq(); + smp_entering_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } void smp_trace_call_function_single_interrupt(struct pt_regs *regs) { - call_function_entering_irq(); + smp_entering_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); __smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d609e1d84048..bf4fb04d0112 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -5,12 +5,13 @@ CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. -kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o eventfd.o \ - irqchip.o) -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ - assigned-dev.o iommu.o) -kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) +KVM := ../../../virt/kvm + +kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ + $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ + $(KVM)/eventfd.o $(KVM)/irqchip.o +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o +kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o cpuid.o pmu.o diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5953dcea752d..2bc1e81045b0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -61,6 +61,8 @@ #define OpMem8 26ull /* 8-bit zero extended memory operand */ #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */ +#define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */ +#define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */ #define OpBits 5 /* Width of operand field */ #define OpMask ((1ull << OpBits) - 1) @@ -86,6 +88,7 @@ #define DstMem64 (OpMem64 << DstShift) #define DstImmUByte (OpImmUByte << DstShift) #define DstDX (OpDX << DstShift) +#define DstAccLo (OpAccLo << DstShift) #define DstMask (OpMask << DstShift) /* Source operand type. */ #define SrcShift 6 @@ -108,6 +111,7 @@ #define SrcImm64 (OpImm64 << SrcShift) #define SrcDX (OpDX << SrcShift) #define SrcMem8 (OpMem8 << SrcShift) +#define SrcAccHi (OpAccHi << SrcShift) #define SrcMask (OpMask << SrcShift) #define BitOp (1<<11) #define MemAbs (1<<12) /* Memory operand is absolute displacement */ @@ -138,6 +142,7 @@ /* Source 2 operand type */ #define Src2Shift (31) #define Src2None (OpNone << Src2Shift) +#define Src2Mem (OpMem << Src2Shift) #define Src2CL (OpCL << Src2Shift) #define Src2ImmByte (OpImmByte << Src2Shift) #define Src2One (OpOne << Src2Shift) @@ -155,6 +160,9 @@ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ #define NoWrite ((u64)1 << 45) /* No writeback */ +#define SrcWrite ((u64)1 << 46) /* Write back src operand */ + +#define DstXacc (DstAccLo | SrcAccHi | SrcWrite) #define X2(x...) x, x #define X3(x...) X2(x), x @@ -171,10 +179,11 @@ /* * fastop functions have a special calling convention: * - * dst: [rdx]:rax (in/out) - * src: rbx (in/out) + * dst: rax (in/out) + * src: rdx (in/out) * src2: rcx (in) * flags: rflags (in/out) + * ex: rsi (in:fastop pointer, out:zero if exception) * * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump @@ -276,174 +285,17 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) } /* - * Instruction emulation: - * Most instructions are emulated directly via a fragment of inline assembly - * code. This allows us to save/restore EFLAGS and thus very easily pick up - * any modified flags. - */ - -#if defined(CONFIG_X86_64) -#define _LO32 "k" /* force 32-bit operand */ -#define _STK "%%rsp" /* stack pointer */ -#elif defined(__i386__) -#define _LO32 "" /* force 32-bit operand */ -#define _STK "%%esp" /* stack pointer */ -#endif - -/* * These EFLAGS bits are restored from saved value during emulation, and * any changes are written back to the saved value after emulation. */ #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) -/* Before executing instruction: restore necessary bits in EFLAGS. */ -#define _PRE_EFLAGS(_sav, _msk, _tmp) \ - /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \ - "movl %"_sav",%"_LO32 _tmp"; " \ - "push %"_tmp"; " \ - "push %"_tmp"; " \ - "movl %"_msk",%"_LO32 _tmp"; " \ - "andl %"_LO32 _tmp",("_STK"); " \ - "pushf; " \ - "notl %"_LO32 _tmp"; " \ - "andl %"_LO32 _tmp",("_STK"); " \ - "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \ - "pop %"_tmp"; " \ - "orl %"_LO32 _tmp",("_STK"); " \ - "popf; " \ - "pop %"_sav"; " - -/* After executing instruction: write-back necessary bits in EFLAGS. */ -#define _POST_EFLAGS(_sav, _msk, _tmp) \ - /* _sav |= EFLAGS & _msk; */ \ - "pushf; " \ - "pop %"_tmp"; " \ - "andl %"_msk",%"_LO32 _tmp"; " \ - "orl %"_LO32 _tmp",%"_sav"; " - #ifdef CONFIG_X86_64 #define ON64(x) x #else #define ON64(x) #endif -#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \ - do { \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op _suffix " %"_x"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" ((ctxt)->eflags), \ - "+q" (*(_dsttype*)&(ctxt)->dst.val), \ - "=&r" (_tmp) \ - : _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \ - } while (0) - - -/* Raw emulation: instruction has two explicit operands. */ -#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \ - do { \ - unsigned long _tmp; \ - \ - switch ((ctxt)->dst.bytes) { \ - case 2: \ - ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \ - break; \ - case 4: \ - ____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \ - break; \ - case 8: \ - ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \ - break; \ - } \ - } while (0) - -#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ - do { \ - unsigned long _tmp; \ - switch ((ctxt)->dst.bytes) { \ - case 1: \ - ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \ - break; \ - default: \ - __emulate_2op_nobyte(ctxt, _op, \ - _wx, _wy, _lx, _ly, _qx, _qy); \ - break; \ - } \ - } while (0) - -/* Source operand is byte-sized and may be restricted to just %cl. */ -#define emulate_2op_SrcB(ctxt, _op) \ - __emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c") - -/* Source operand is byte, word, long or quad sized. */ -#define emulate_2op_SrcV(ctxt, _op) \ - __emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r") - -/* Source operand is word, long or quad sized. */ -#define emulate_2op_SrcV_nobyte(ctxt, _op) \ - __emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r") - -/* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(ctxt, _op, _suffix, _type) \ - do { \ - unsigned long _tmp; \ - _type _clv = (ctxt)->src2.val; \ - _type _srcv = (ctxt)->src.val; \ - _type _dstv = (ctxt)->dst.val; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "2") \ - _op _suffix " %4,%1 \n" \ - _POST_EFLAGS("0", "5", "2") \ - : "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \ - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ - ); \ - \ - (ctxt)->src2.val = (unsigned long) _clv; \ - (ctxt)->src2.val = (unsigned long) _srcv; \ - (ctxt)->dst.val = (unsigned long) _dstv; \ - } while (0) - -#define emulate_2op_cl(ctxt, _op) \ - do { \ - switch ((ctxt)->dst.bytes) { \ - case 2: \ - __emulate_2op_cl(ctxt, _op, "w", u16); \ - break; \ - case 4: \ - __emulate_2op_cl(ctxt, _op, "l", u32); \ - break; \ - case 8: \ - ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \ - break; \ - } \ - } while (0) - -#define __emulate_1op(ctxt, _op, _suffix) \ - do { \ - unsigned long _tmp; \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op _suffix " %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - } while (0) - -/* Instruction has only one explicit operand (no source operand). */ -#define emulate_1op(ctxt, _op) \ - do { \ - switch ((ctxt)->dst.bytes) { \ - case 1: __emulate_1op(ctxt, _op, "b"); break; \ - case 2: __emulate_1op(ctxt, _op, "w"); break; \ - case 4: __emulate_1op(ctxt, _op, "l"); break; \ - case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \ - } \ - } while (0) - static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" @@ -462,7 +314,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); #define FOPNOP() FOP_ALIGN FOP_RET #define FOP1E(op, dst) \ - FOP_ALIGN #op " %" #dst " \n\t" FOP_RET + FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET + +#define FOP1EEX(op, dst) \ + FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception) #define FASTOP1(op) \ FOP_START(op) \ @@ -472,24 +327,42 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); ON64(FOP1E(op##q, rax)) \ FOP_END +/* 1-operand, using src2 (for MUL/DIV r/m) */ +#define FASTOP1SRC2(op, name) \ + FOP_START(name) \ + FOP1E(op, cl) \ + FOP1E(op, cx) \ + FOP1E(op, ecx) \ + ON64(FOP1E(op, rcx)) \ + FOP_END + +/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ +#define FASTOP1SRC2EX(op, name) \ + FOP_START(name) \ + FOP1EEX(op, cl) \ + FOP1EEX(op, cx) \ + FOP1EEX(op, ecx) \ + ON64(FOP1EEX(op, rcx)) \ + FOP_END + #define FOP2E(op, dst, src) \ FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET #define FASTOP2(op) \ FOP_START(op) \ - FOP2E(op##b, al, bl) \ - FOP2E(op##w, ax, bx) \ - FOP2E(op##l, eax, ebx) \ - ON64(FOP2E(op##q, rax, rbx)) \ + FOP2E(op##b, al, dl) \ + FOP2E(op##w, ax, dx) \ + FOP2E(op##l, eax, edx) \ + ON64(FOP2E(op##q, rax, rdx)) \ FOP_END /* 2 operand, word only */ #define FASTOP2W(op) \ FOP_START(op) \ FOPNOP() \ - FOP2E(op##w, ax, bx) \ - FOP2E(op##l, eax, ebx) \ - ON64(FOP2E(op##q, rax, rbx)) \ + FOP2E(op##w, ax, dx) \ + FOP2E(op##l, eax, edx) \ + ON64(FOP2E(op##q, rax, rdx)) \ FOP_END /* 2 operand, src is CL */ @@ -508,14 +381,17 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); #define FASTOP3WCL(op) \ FOP_START(op) \ FOPNOP() \ - FOP3E(op##w, ax, bx, cl) \ - FOP3E(op##l, eax, ebx, cl) \ - ON64(FOP3E(op##q, rax, rbx, cl)) \ + FOP3E(op##w, ax, dx, cl) \ + FOP3E(op##l, eax, edx, cl) \ + ON64(FOP3E(op##q, rax, rdx, cl)) \ FOP_END /* Special case for SETcc - 1 instruction per cc */ #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" +asm(".global kvm_fastop_exception \n" + "kvm_fastop_exception: xor %esi, %esi; ret"); + FOP_START(setcc) FOP_SETCC(seto) FOP_SETCC(setno) @@ -538,47 +414,6 @@ FOP_END; FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET FOP_END; -#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ - do { \ - unsigned long _tmp; \ - ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \ - ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \ - \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "5", "1") \ - "1: \n\t" \ - _op _suffix " %6; " \ - "2: \n\t" \ - _POST_EFLAGS("0", "5", "1") \ - ".pushsection .fixup,\"ax\" \n\t" \ - "3: movb $1, %4 \n\t" \ - "jmp 2b \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(1b, 3b) \ - : "=m" ((ctxt)->eflags), "=&r" (_tmp), \ - "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \ - : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \ - } while (0) - -/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(ctxt, _op, _ex) \ - do { \ - switch((ctxt)->src.bytes) { \ - case 1: \ - __emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \ - break; \ - case 2: \ - __emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \ - break; \ - case 4: \ - __emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \ - break; \ - case 8: ON64( \ - __emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \ - break; \ - } \ - } while (0) - static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, enum x86_intercept intercept, enum x86_intercept_stage stage) @@ -988,6 +823,11 @@ FASTOP2(xor); FASTOP2(cmp); FASTOP2(test); +FASTOP1SRC2(mul, mul_ex); +FASTOP1SRC2(imul, imul_ex); +FASTOP1SRC2EX(div, div_ex); +FASTOP1SRC2EX(idiv, idiv_ex); + FASTOP3WCL(shld); FASTOP3WCL(shrd); @@ -1013,6 +853,8 @@ FASTOP2W(bts); FASTOP2W(btr); FASTOP2W(btc); +FASTOP2(xadd); + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -1726,45 +1568,42 @@ static void write_register_operand(struct operand *op) } } -static int writeback(struct x86_emulate_ctxt *ctxt) +static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) { int rc; - if (ctxt->d & NoWrite) - return X86EMUL_CONTINUE; - - switch (ctxt->dst.type) { + switch (op->type) { case OP_REG: - write_register_operand(&ctxt->dst); + write_register_operand(op); break; case OP_MEM: if (ctxt->lock_prefix) rc = segmented_cmpxchg(ctxt, - ctxt->dst.addr.mem, - &ctxt->dst.orig_val, - &ctxt->dst.val, - ctxt->dst.bytes); + op->addr.mem, + &op->orig_val, + &op->val, + op->bytes); else rc = segmented_write(ctxt, - ctxt->dst.addr.mem, - &ctxt->dst.val, - ctxt->dst.bytes); + op->addr.mem, + &op->val, + op->bytes); if (rc != X86EMUL_CONTINUE) return rc; break; case OP_MEM_STR: rc = segmented_write(ctxt, - ctxt->dst.addr.mem, - ctxt->dst.data, - ctxt->dst.bytes * ctxt->dst.count); + op->addr.mem, + op->data, + op->bytes * op->count); if (rc != X86EMUL_CONTINUE) return rc; break; case OP_XMM: - write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); + write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); break; case OP_MM: - write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm); + write_mmx_reg(ctxt, &op->mm_val, op->addr.mm); break; case OP_NONE: /* no writeback */ @@ -2117,42 +1956,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_mul_ex(struct x86_emulate_ctxt *ctxt) -{ - u8 ex = 0; - - emulate_1op_rax_rdx(ctxt, "mul", ex); - return X86EMUL_CONTINUE; -} - -static int em_imul_ex(struct x86_emulate_ctxt *ctxt) -{ - u8 ex = 0; - - emulate_1op_rax_rdx(ctxt, "imul", ex); - return X86EMUL_CONTINUE; -} - -static int em_div_ex(struct x86_emulate_ctxt *ctxt) -{ - u8 de = 0; - - emulate_1op_rax_rdx(ctxt, "div", de); - if (de) - return emulate_de(ctxt); - return X86EMUL_CONTINUE; -} - -static int em_idiv_ex(struct x86_emulate_ctxt *ctxt) -{ - u8 de = 0; - - emulate_1op_rax_rdx(ctxt, "idiv", de); - if (de) - return emulate_de(ctxt); - return X86EMUL_CONTINUE; -} - static int em_grp45(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; @@ -3734,10 +3537,10 @@ static const struct opcode group3[] = { F(DstMem | SrcImm | NoWrite, em_test), F(DstMem | SrcNone | Lock, em_not), F(DstMem | SrcNone | Lock, em_neg), - I(SrcMem, em_mul_ex), - I(SrcMem, em_imul_ex), - I(SrcMem, em_div_ex), - I(SrcMem, em_idiv_ex), + F(DstXacc | Src2Mem, em_mul_ex), + F(DstXacc | Src2Mem, em_imul_ex), + F(DstXacc | Src2Mem, em_div_ex), + F(DstXacc | Src2Mem, em_idiv_ex), }; static const struct opcode group4[] = { @@ -4064,7 +3867,7 @@ static const struct opcode twobyte_table[256] = { F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ - D2bv(DstMem | SrcReg | ModRM | Lock), + F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), N, D(DstMem | SrcReg | ModRM | Mov), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ @@ -4172,6 +3975,24 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, fetch_register_operand(op); op->orig_val = op->val; break; + case OpAccLo: + op->type = OP_REG; + op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes; + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); + fetch_register_operand(op); + op->orig_val = op->val; + break; + case OpAccHi: + if (ctxt->d & ByteOp) { + op->type = OP_NONE; + break; + } + op->type = OP_REG; + op->bytes = ctxt->op_bytes; + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); + fetch_register_operand(op); + op->orig_val = op->val; + break; case OpDI: op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; @@ -4553,11 +4374,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) { ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; + if (!(ctxt->d & ByteOp)) + fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" - : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) - : "c"(ctxt->src2.val), [fastop]"S"(fop)); + : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), + [fastop]"+S"(fop) + : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); + if (!fop) /* exception is returned in fop variable */ + return emulate_de(ctxt); return X86EMUL_CONTINUE; } @@ -4773,9 +4598,17 @@ special_insn: goto done; writeback: - rc = writeback(ctxt); - if (rc != X86EMUL_CONTINUE) - goto done; + if (!(ctxt->d & NoWrite)) { + rc = writeback(ctxt, &ctxt->dst); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if (ctxt->d & SrcWrite) { + BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); + rc = writeback(ctxt, &ctxt->src); + if (rc != X86EMUL_CONTINUE) + goto done; + } /* * restore dst type in case the decoding will be reused @@ -4872,12 +4705,6 @@ twobyte_insn: ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : (s16) ctxt->src.val; break; - case 0xc0 ... 0xc1: /* xadd */ - fastop(ctxt, em_add); - /* Write back the register source. */ - ctxt->src.val = ctxt->dst.orig_val; - write_register_operand(&ctxt->src); - break; case 0xc3: /* movnti */ ctxt->dst.bytes = ctxt->op_bytes; ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0eee2c8b64d1..afc11245827c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1608,8 +1608,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) return; if (atomic_read(&apic->lapic_timer.pending) > 0) { - if (kvm_apic_local_deliver(apic, APIC_LVTT)) - atomic_dec(&apic->lapic_timer.pending); + kvm_apic_local_deliver(apic, APIC_LVTT); + atomic_set(&apic->lapic_timer.pending, 0); } } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 004cc87b781c..0d094da49541 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -197,15 +197,63 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); -static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) +/* + * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, + * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation + * number. + */ +#define MMIO_SPTE_GEN_LOW_SHIFT 3 +#define MMIO_SPTE_GEN_HIGH_SHIFT 52 + +#define MMIO_GEN_SHIFT 19 +#define MMIO_GEN_LOW_SHIFT 9 +#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) +#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) +#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) + +static u64 generation_mmio_spte_mask(unsigned int gen) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); + u64 mask; + + WARN_ON(gen > MMIO_MAX_GEN); + + mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; + mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; + return mask; +} + +static unsigned int get_mmio_spte_generation(u64 spte) +{ + unsigned int gen; + + spte &= ~shadow_mmio_mask; + + gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK; + gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT; + return gen; +} + +static unsigned int kvm_current_mmio_generation(struct kvm *kvm) +{ + /* + * Init kvm generation close to MMIO_MAX_GEN to easily test the + * code of handling generation number wrap-around. + */ + return (kvm_memslots(kvm)->generation + + MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; +} + +static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, + unsigned access) +{ + unsigned int gen = kvm_current_mmio_generation(kvm); + u64 mask = generation_mmio_spte_mask(gen); access &= ACC_WRITE_MASK | ACC_USER_MASK; + mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT; - sp->mmio_cached = true; - trace_mark_mmio_spte(sptep, gfn, access); - mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); + trace_mark_mmio_spte(sptep, gfn, access, gen); + mmu_spte_set(sptep, mask); } static bool is_mmio_spte(u64 spte) @@ -215,24 +263,38 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT; + u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; + return (spte & ~mask) >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) { - return (spte & ~shadow_mmio_mask) & ~PAGE_MASK; + u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; + return (spte & ~mask) & ~PAGE_MASK; } -static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access) +static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, + pfn_t pfn, unsigned access) { if (unlikely(is_noslot_pfn(pfn))) { - mark_mmio_spte(sptep, gfn, access); + mark_mmio_spte(kvm, sptep, gfn, access); return true; } return false; } +static bool check_mmio_spte(struct kvm *kvm, u64 spte) +{ + unsigned int kvm_gen, spte_gen; + + kvm_gen = kvm_current_mmio_generation(kvm); + spte_gen = get_mmio_spte_generation(spte); + + trace_check_mmio_spte(spte, kvm_gen, spte_gen); + return likely(kvm_gen == spte_gen); +} + static inline u64 rsvd_bits(int s, int e) { return ((1ULL << (e - s + 1)) - 1) << s; @@ -404,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) /* * The idea using the light way get the spte on x86_32 guest is from * gup_get_pte(arch/x86/mm/gup.c). - * The difference is we can not catch the spte tlb flush if we leave - * guest mode, so we emulate it by increase clear_spte_count when spte - * is cleared. + * + * An spte tlb flush may be pending, because kvm_set_pte_rmapp + * coalesces them and we are running out of the MMU lock. Therefore + * we need to protect against in-progress updates of the spte. + * + * Reading the spte while an update is in progress may get the old value + * for the high part of the spte. The race is fine for a present->non-present + * change (because the high part of the spte is ignored for non-present spte), + * but for a present->present change we must reread the spte. + * + * All such changes are done in two steps (present->non-present and + * non-present->present), hence it is enough to count the number of + * present->non-present updates: if it changed while reading the spte, + * we might have hit the race. This is done using clear_spte_count. */ static u64 __get_spte_lockless(u64 *sptep) { @@ -1511,6 +1584,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, if (!direct) sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); + + /* + * The active_mmu_pages list is the FIFO list, do not move the + * page until it is zapped. kvm_zap_obsolete_pages depends on + * this feature. See the comments in kvm_zap_obsolete_pages(). + */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); sp->parent_ptes = 0; mmu_page_add_parent_pte(vcpu, sp, parent_pte); @@ -1648,6 +1727,16 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); +/* + * NOTE: we should pay more attention on the zapped-obsolete page + * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk + * since it has been deleted from active_mmu_pages but still can be found + * at hast list. + * + * for_each_gfn_indirect_valid_sp has skipped that kind of page and + * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped + * all the obsolete pages. + */ #define for_each_gfn_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ @@ -1838,6 +1927,11 @@ static void clear_sp_write_flooding_count(u64 *spte) __clear_sp_write_flooding_count(sp); } +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); +} + static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gaddr, @@ -1864,6 +1958,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.quadrant = quadrant; } for_each_gfn_sp(vcpu->kvm, sp, gfn) { + if (is_obsolete_sp(vcpu->kvm, sp)) + continue; + if (!need_sync && sp->unsync) need_sync = true; @@ -1900,6 +1997,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, account_shadowed(vcpu->kvm, gfn); } + sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; init_shadow_page_table(sp); trace_kvm_mmu_get_page(sp, true); return sp; @@ -2070,8 +2168,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, ret = mmu_zap_unsync_children(kvm, sp, invalid_list); kvm_mmu_page_unlink_children(kvm, sp); kvm_mmu_unlink_parents(kvm, sp); + if (!sp->role.invalid && !sp->role.direct) unaccount_shadowed(kvm, sp->gfn); + if (sp->unsync) kvm_unlink_unsync_page(kvm, sp); if (!sp->root_count) { @@ -2081,7 +2181,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, kvm_mod_used_mmu_pages(kvm, -1); } else { list_move(&sp->link, &kvm->arch.active_mmu_pages); - kvm_reload_remote_mmus(kvm); + + /* + * The obsolete pages can not be used on any vcpus. + * See the comments in kvm_mmu_invalidate_zap_all_pages(). + */ + if (!sp->role.invalid && !is_obsolete_sp(kvm, sp)) + kvm_reload_remote_mmus(kvm); } sp->role.invalid = 1; @@ -2331,7 +2437,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte; int ret = 0; - if (set_mmio_spte(sptep, gfn, pfn, pte_access)) + if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access)) return 0; spte = PT_PRESENT_MASK; @@ -2869,22 +2975,25 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - spin_lock(&vcpu->kvm->mmu_lock); + if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || vcpu->arch.mmu.direct_map)) { hpa_t root = vcpu->arch.mmu.root_hpa; + spin_lock(&vcpu->kvm->mmu_lock); sp = page_header(root); --sp->root_count; if (!sp->root_count && sp->role.invalid) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); } - vcpu->arch.mmu.root_hpa = INVALID_PAGE; spin_unlock(&vcpu->kvm->mmu_lock); + vcpu->arch.mmu.root_hpa = INVALID_PAGE; return; } + + spin_lock(&vcpu->kvm->mmu_lock); for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -3148,17 +3257,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) return spte; } -/* - * If it is a real mmio page fault, return 1 and emulat the instruction - * directly, return 0 to let CPU fault again on the address, -1 is - * returned if bug is detected. - */ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; if (quickly_check_mmio_pf(vcpu, addr, direct)) - return 1; + return RET_MMIO_PF_EMULATE; spte = walk_shadow_page_get_mmio_spte(vcpu, addr); @@ -3166,12 +3270,15 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) gfn_t gfn = get_mmio_spte_gfn(spte); unsigned access = get_mmio_spte_access(spte); + if (!check_mmio_spte(vcpu->kvm, spte)) + return RET_MMIO_PF_INVALID; + if (direct) addr = 0; trace_handle_mmio_page_fault(addr, gfn, access); vcpu_cache_mmio_info(vcpu, addr, gfn, access); - return 1; + return RET_MMIO_PF_EMULATE; } /* @@ -3179,13 +3286,13 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) * it's a BUG if the gfn is not a mmio page. */ if (direct && !check_direct_spte_mmio_pf(spte)) - return -1; + return RET_MMIO_PF_BUG; /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ - return 0; + return RET_MMIO_PF_RETRY; } EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); @@ -3195,7 +3302,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, int ret; ret = handle_mmio_page_fault_common(vcpu, addr, direct); - WARN_ON(ret < 0); + WARN_ON(ret == RET_MMIO_PF_BUG); return ret; } @@ -3207,8 +3314,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); - if (unlikely(error_code & PFERR_RSVD_MASK)) - return handle_mmio_page_fault(vcpu, gva, error_code, true); + if (unlikely(error_code & PFERR_RSVD_MASK)) { + r = handle_mmio_page_fault(vcpu, gva, error_code, true); + + if (likely(r != RET_MMIO_PF_INVALID)) + return r; + } r = mmu_topup_memory_caches(vcpu); if (r) @@ -3284,8 +3395,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); - if (unlikely(error_code & PFERR_RSVD_MASK)) - return handle_mmio_page_fault(vcpu, gpa, error_code, true); + if (unlikely(error_code & PFERR_RSVD_MASK)) { + r = handle_mmio_page_fault(vcpu, gpa, error_code, true); + + if (likely(r != RET_MMIO_PF_INVALID)) + return r; + } r = mmu_topup_memory_caches(vcpu); if (r) @@ -3391,8 +3506,8 @@ static inline void protect_clean_gpte(unsigned *access, unsigned gpte) *access &= mask; } -static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, - int *nr_present) +static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, + unsigned access, int *nr_present) { if (unlikely(is_mmio_spte(*sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { @@ -3401,7 +3516,7 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, } (*nr_present)++; - mark_mmio_spte(sptep, gfn, access); + mark_mmio_spte(kvm, sptep, gfn, access); return true; } @@ -3764,9 +3879,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) if (r) goto out; r = mmu_alloc_roots(vcpu); - spin_lock(&vcpu->kvm->mmu_lock); - mmu_sync_roots(vcpu); - spin_unlock(&vcpu->kvm->mmu_lock); + kvm_mmu_sync_roots(vcpu); if (r) goto out; /* set_cr3() should ensure TLB has been flushed */ @@ -4179,39 +4292,107 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) spin_unlock(&kvm->mmu_lock); } -void kvm_mmu_zap_all(struct kvm *kvm) +#define BATCH_ZAP_PAGES 10 +static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; - LIST_HEAD(invalid_list); + int batch = 0; - spin_lock(&kvm->mmu_lock); restart: - list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) - if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) + list_for_each_entry_safe_reverse(sp, node, + &kvm->arch.active_mmu_pages, link) { + int ret; + + /* + * No obsolete page exists before new created page since + * active_mmu_pages is the FIFO list. + */ + if (!is_obsolete_sp(kvm, sp)) + break; + + /* + * Since we are reversely walking the list and the invalid + * list will be moved to the head, skip the invalid page + * can help us to avoid the infinity list walking. + */ + if (sp->role.invalid) + continue; + + /* + * Need not flush tlb since we only zap the sp with invalid + * generation number. + */ + if (batch >= BATCH_ZAP_PAGES && + cond_resched_lock(&kvm->mmu_lock)) { + batch = 0; + goto restart; + } + + ret = kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages); + batch += ret; + + if (ret) goto restart; + } - kvm_mmu_commit_zap_page(kvm, &invalid_list); - spin_unlock(&kvm->mmu_lock); + /* + * Should flush tlb before free page tables since lockless-walking + * may use the pages. + */ + kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); } -void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) +/* + * Fast invalidate all shadow pages and use lock-break technique + * to zap obsolete pages. + * + * It's required when memslot is being deleted or VM is being + * destroyed, in these cases, we should ensure that KVM MMU does + * not use any resource of the being-deleted slot or all slots + * after calling the function. + */ +void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) { - struct kvm_mmu_page *sp, *node; - LIST_HEAD(invalid_list); - spin_lock(&kvm->mmu_lock); -restart: - list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { - if (!sp->mmio_cached) - continue; - if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) - goto restart; - } + trace_kvm_mmu_invalidate_zap_all_pages(kvm); + kvm->arch.mmu_valid_gen++; - kvm_mmu_commit_zap_page(kvm, &invalid_list); + /* + * Notify all vcpus to reload its shadow page table + * and flush TLB. Then all vcpus will switch to new + * shadow page table with the new mmu_valid_gen. + * + * Note: we should do this under the protection of + * mmu-lock, otherwise, vcpu would purge shadow page + * but miss tlb flush. + */ + kvm_reload_remote_mmus(kvm); + + kvm_zap_obsolete_pages(kvm); spin_unlock(&kvm->mmu_lock); } +static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) +{ + return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); +} + +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) +{ + /* + * The very rare case: if the generation-number is round, + * zap all shadow pages. + * + * The max value is MMIO_MAX_GEN - 1 since it is not called + * when mark memslot invalid. + */ + if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) { + printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); + kvm_mmu_invalidate_zap_all_pages(kvm); + } +} + static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; @@ -4240,15 +4421,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) * want to shrink a VM that only started to populate its MMU * anyway. */ - if (!kvm->arch.n_used_mmu_pages) + if (!kvm->arch.n_used_mmu_pages && + !kvm_has_zapped_obsolete_pages(kvm)) continue; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); + if (kvm_has_zapped_obsolete_pages(kvm)) { + kvm_mmu_commit_zap_page(kvm, + &kvm->arch.zapped_obsolete_pages); + goto unlock; + } + prepare_zap_oldest_mmu_page(kvm, &invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); +unlock: spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 2adcbc2cac6d..5b59c573aba7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -52,6 +52,23 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); + +/* + * Return values of handle_mmio_page_fault_common: + * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction + * directly. + * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page + * fault path update the mmio spte. + * RET_MMIO_PF_RETRY: let CPU fault again on the address. + * RET_MMIO_PF_BUG: bug is detected. + */ +enum { + RET_MMIO_PF_EMULATE = 1, + RET_MMIO_PF_INVALID = 2, + RET_MMIO_PF_RETRY = 0, + RET_MMIO_PF_BUG = -1 +}; + int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); @@ -97,4 +114,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, return (mmu->permissions[pfec >> 1] >> pte_access) & 1; } +void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); #endif diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index b8f6172f4174..9d2e0ffcb190 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -7,16 +7,18 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu -#define KVM_MMU_PAGE_FIELDS \ - __field(__u64, gfn) \ - __field(__u32, role) \ - __field(__u32, root_count) \ +#define KVM_MMU_PAGE_FIELDS \ + __field(unsigned long, mmu_valid_gen) \ + __field(__u64, gfn) \ + __field(__u32, role) \ + __field(__u32, root_count) \ __field(bool, unsync) -#define KVM_MMU_PAGE_ASSIGN(sp) \ - __entry->gfn = sp->gfn; \ - __entry->role = sp->role.word; \ - __entry->root_count = sp->root_count; \ +#define KVM_MMU_PAGE_ASSIGN(sp) \ + __entry->mmu_valid_gen = sp->mmu_valid_gen; \ + __entry->gfn = sp->gfn; \ + __entry->role = sp->role.word; \ + __entry->root_count = sp->root_count; \ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ @@ -28,8 +30,8 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ - " %snxe root %u %s%c", \ + trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \ + " %snxe root %u %s%c", __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ role.cr4_pae ? " pae" : "", \ role.quadrant, \ @@ -197,23 +199,25 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, TRACE_EVENT( mark_mmio_spte, - TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), - TP_ARGS(sptep, gfn, access), + TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen), + TP_ARGS(sptep, gfn, access, gen), TP_STRUCT__entry( __field(void *, sptep) __field(gfn_t, gfn) __field(unsigned, access) + __field(unsigned int, gen) ), TP_fast_assign( __entry->sptep = sptep; __entry->gfn = gfn; __entry->access = access; + __entry->gen = gen; ), - TP_printk("sptep:%p gfn %llx access %x", __entry->sptep, __entry->gfn, - __entry->access) + TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep, + __entry->gfn, __entry->access, __entry->gen) ); TRACE_EVENT( @@ -274,6 +278,50 @@ TRACE_EVENT( __spte_satisfied(old_spte), __spte_satisfied(new_spte) ) ); + +TRACE_EVENT( + kvm_mmu_invalidate_zap_all_pages, + TP_PROTO(struct kvm *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(unsigned long, mmu_valid_gen) + __field(unsigned int, mmu_used_pages) + ), + + TP_fast_assign( + __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; + __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; + ), + + TP_printk("kvm-mmu-valid-gen %lx used_pages %x", + __entry->mmu_valid_gen, __entry->mmu_used_pages + ) +); + + +TRACE_EVENT( + check_mmio_spte, + TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), + TP_ARGS(spte, kvm_gen, spte_gen), + + TP_STRUCT__entry( + __field(unsigned int, kvm_gen) + __field(unsigned int, spte_gen) + __field(u64, spte) + ), + + TP_fast_assign( + __entry->kvm_gen = kvm_gen; + __entry->spte_gen = spte_gen; + __entry->spte = spte; + ), + + TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte, + __entry->kvm_gen, __entry->spte_gen, + __entry->kvm_gen == __entry->spte_gen + ) +); #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index da20860b457a..7769699d48a8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); - if (unlikely(error_code & PFERR_RSVD_MASK)) - return handle_mmio_page_fault(vcpu, addr, error_code, + if (unlikely(error_code & PFERR_RSVD_MASK)) { + r = handle_mmio_page_fault(vcpu, addr, error_code, mmu_is_nested(vcpu)); + if (likely(r != RET_MMIO_PF_INVALID)) + return r; + }; r = mmu_topup_memory_caches(vcpu); if (r) @@ -792,7 +795,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) pte_access &= gpte_access(vcpu, gpte); protect_clean_gpte(&pte_access, gpte); - if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) + if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, + &nr_present)) continue; if (gfn != sp->gfns[i]) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a14a6eaf871d..c0bc80391e40 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1026,7 +1026,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) g_tsc_offset = svm->vmcb->control.tsc_offset - svm->nested.hsave->control.tsc_offset; svm->nested.hsave->control.tsc_offset = offset; - } + } else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + svm->vmcb->control.tsc_offset, + offset); svm->vmcb->control.tsc_offset = offset + g_tsc_offset; @@ -1044,6 +1047,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; + else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + svm->vmcb->control.tsc_offset - adjustment, + svm->vmcb->control.tsc_offset); + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index fe5e00ed7036..545245d7cc63 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -756,6 +756,27 @@ TRACE_EVENT( __entry->gpa_match ? "GPA" : "GVA") ); +TRACE_EVENT(kvm_write_tsc_offset, + TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset, + __u64 next_tsc_offset), + TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset), + + TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( __u64, previous_tsc_offset ) + __field( __u64, next_tsc_offset ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->previous_tsc_offset = previous_tsc_offset; + __entry->next_tsc_offset = next_tsc_offset; + ), + + TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id, + __entry->previous_tsc_offset, __entry->next_tsc_offset) +); + #ifdef CONFIG_X86_64 #define host_clocks \ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b30f5a54a2ab..a7e18551c968 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2096,6 +2096,8 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? vmcs12->tsc_offset : 0)); } else { + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + vmcs_read64(TSC_OFFSET), offset); vmcs_write64(TSC_OFFSET, offset); } } @@ -2103,11 +2105,14 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) { u64 offset = vmcs_read64(TSC_OFFSET); + vmcs_write64(TSC_OFFSET, offset + adjustment); if (is_guest_mode(vcpu)) { /* Even when running L2, the adjustment needs to apply to L1 */ to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; - } + } else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset, + offset + adjustment); } static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) @@ -4176,10 +4181,10 @@ static void ept_set_mmio_spte_mask(void) /* * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). - * Also, magic bits (0xffull << 49) is set to quickly identify mmio + * Also, magic bits (0x3ull << 62) is set to quickly identify mmio * spte. */ - kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull); + kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); } /* @@ -5366,10 +5371,14 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); ret = handle_mmio_page_fault_common(vcpu, gpa, true); - if (likely(ret == 1)) + if (likely(ret == RET_MMIO_PF_EMULATE)) return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == EMULATE_DONE; - if (unlikely(!ret)) + + if (unlikely(ret == RET_MMIO_PF_INVALID)) + return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0); + + if (unlikely(ret == RET_MMIO_PF_RETRY)) return 1; /* It is the real ept misconfig */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 292e6ca89f42..d21bce505315 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1193,20 +1193,37 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { + int faulted = 0; + /* n.b - signed multiplication and division required */ usdiff = data - kvm->arch.last_tsc_write; #ifdef CONFIG_X86_64 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; #else /* do_div() only does unsigned */ - asm("idivl %2; xor %%edx, %%edx" - : "=A"(usdiff) - : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); + asm("1: idivl %[divisor]\n" + "2: xor %%edx, %%edx\n" + " movl $0, %[faulted]\n" + "3:\n" + ".section .fixup,\"ax\"\n" + "4: movl $1, %[faulted]\n" + " jmp 3b\n" + ".previous\n" + + _ASM_EXTABLE(1b, 4b) + + : "=A"(usdiff), [faulted] "=r" (faulted) + : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz)); + #endif do_div(elapsed, 1000); usdiff -= elapsed; if (usdiff < 0) usdiff = -usdiff; + + /* idivl overflow => difference is larger than USEC_PER_SEC */ + if (faulted) + usdiff = USEC_PER_SEC; } else usdiff = USEC_PER_SEC; /* disable TSC match window below */ @@ -1587,6 +1604,30 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 0; } +/* + * kvmclock updates which are isolated to a given vcpu, such as + * vcpu->cpu migration, should not allow system_timestamp from + * the rest of the vcpus to remain static. Otherwise ntp frequency + * correction applies to one vcpu's system_timestamp but not + * the others. + * + * So in those cases, request a kvmclock update for all vcpus. + * The worst case for a remote vcpu to update its kvmclock + * is then bounded by maximum nohz sleep latency. + */ + +static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) +{ + int i; + struct kvm *kvm = v->kvm; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + kvm_vcpu_kick(vcpu); + } +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -1984,7 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvmclock_reset(vcpu); vcpu->arch.time = data; - kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); /* we verify if the enable bit is set... */ if (!(data & 1)) @@ -2701,7 +2742,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * kvmclock on vcpu->cpu migration */ if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) - kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); if (vcpu->cpu != cpu) kvm_migrate_timers(vcpu); vcpu->cpu = cpu; @@ -5238,7 +5279,13 @@ static void kvm_set_mmio_spte_mask(void) * Set the reserved bits and the present bit of an paging-structure * entry to generate page fault with PFER.RSV = 1. */ - mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr; + /* Mask the reserved physical address bits. */ + mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr; + + /* Bit 62 is always reserved for 32bit host. */ + mask |= 0x3ull << 62; + + /* Set the present bit. */ mask |= 1ull; #ifdef CONFIG_X86_64 @@ -5498,13 +5545,6 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) char instruction[3]; unsigned long rip = kvm_rip_read(vcpu); - /* - * Blow out the MMU to ensure that no other VCPU has an active mapping - * to ensure that the updated hypercall appears atomically across all - * VCPUs. - */ - kvm_mmu_zap_all(vcpu->kvm); - kvm_x86_ops->patch_hypercall(vcpu, instruction); return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); @@ -5702,6 +5742,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) __kvm_migrate_timers(vcpu); if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) kvm_gen_update_masterclock(vcpu->kvm); + if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu)) + kvm_gen_kvmclock_update(vcpu); if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { r = kvm_guest_time_update(vcpu); if (unlikely(r)) @@ -6812,6 +6854,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return -EINVAL; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ @@ -7040,22 +7083,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * If memory slot is created, or moved, we need to clear all * mmio sptes. */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - kvm_mmu_zap_mmio_sptes(kvm); - kvm_reload_remote_mmus(kvm); - } + kvm_mmu_invalidate_mmio_sptes(kvm); } void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_mmu_zap_all(kvm); - kvm_reload_remote_mmus(kvm); + kvm_mmu_invalidate_zap_all_pages(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - kvm_arch_flush_shadow_all(kvm); + kvm_mmu_invalidate_zap_all_pages(kvm); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) @@ -7263,3 +7302,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile index 94e0e54056a9..8f38d577a2fa 100644 --- a/arch/x86/lguest/Makefile +++ b/arch/x86/lguest/Makefile @@ -1,2 +1,2 @@ -obj-y := i386_head.o boot.o +obj-y := head_32.o boot.o CFLAGS_boot.o := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index d482bcaf61c1..6a22c19da663 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq) * It would be far better for everyone if the Guest had its own clock, but * until then the Host gives us the time on every interrupt. */ -static unsigned long lguest_get_wallclock(void) +static void lguest_get_wallclock(struct timespec *now) { - return lguest_data.time.tv_sec; + *now = lguest_data.time; } /* diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/head_32.S index 6ddfe4fc23c3..6ddfe4fc23c3 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/head_32.S diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 252b8f5489ba..4500142bc4aa 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -1,6 +1,7 @@ #include <linux/highmem.h> #include <linux/module.h> #include <linux/swap.h> /* for totalram_pages */ +#include <linux/bootmem.h> void *kmap(struct page *page) { @@ -121,6 +122,11 @@ void __init set_highmem_pages_init(void) struct zone *zone; int nid; + /* + * Explicitly reset zone->managed_pages because set_highmem_pages_init() + * is invoked before free_all_bootmem() + */ + reset_all_zones_managed_pages(); for_each_zone(zone) { unsigned long zone_start_pfn, zone_end_pfn; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,169 +16,6 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() - * and returns the corresponding pte. While this is not necessary for the - * !shared pmd case because we can allocate the pmd later as well, it makes the - * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise - * racing tasks could either miss the sharing (see huge_pte_offset) or select a - * bad pmd for sharing. - */ -static pte_t * -huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - pte_t *pte; - - if (!vma_shareable(vma, addr)) - return (pte_t *)pmd_alloc(mm, pud, addr); - - mutex_lock(&mapping->i_mmap_mutex); - vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); - return pte; -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); - else - pte = (pte_t *)pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) @@ -240,30 +77,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_PSE); } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - #endif /* x86_64 also uses this file */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1f34e9219775..2ec29ac78ae6 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -494,7 +494,6 @@ int devmem_is_allowed(unsigned long pagenr) void free_init_pages(char *what, unsigned long begin, unsigned long end) { - unsigned long addr; unsigned long begin_aligned, end_aligned; /* Make sure boundaries are page aligned */ @@ -509,8 +508,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) if (begin >= end) return; - addr = begin; - /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will @@ -529,18 +526,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - - for (; addr < end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_reserved_page(virt_to_page(addr)); - } + free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); #endif } void free_initmem(void) { - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); } @@ -566,7 +558,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) * - relocate_initrd() * So here We can do PAGE_ALIGN() safely to get partial page to be freed */ - free_init_pages("initrd memory", start, PAGE_ALIGN(end)); + free_init_pages("initrd", start, PAGE_ALIGN(end)); } #endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3ac7e319918d..4287f1ffba7e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -660,10 +660,8 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif @@ -671,7 +669,7 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(0); #ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; + max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn; #endif __vmalloc_start_set = true; @@ -739,9 +737,6 @@ static void __init test_wp_bit(void) void __init mem_init(void) { - int codesize, reservedpages, datasize, initsize; - int tmp; - pci_iommu_alloc(); #ifdef CONFIG_FLATMEM @@ -759,32 +754,11 @@ void __init mem_init(void) set_highmem_pages_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - /* - * Only count reserved RAM pages: - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; + free_all_bootmem(); after_bootmem = 1; - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk reserved, %dk data, %dk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - totalhigh_pages << (PAGE_SHIFT-10)); - + mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b3940b6b4d7e..104d56a9245f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -712,36 +712,22 @@ EXPORT_SYMBOL_GPL(arch_add_memory); static void __meminit free_pagetable(struct page *page, int order) { - struct zone *zone; - bool bootmem = false; unsigned long magic; unsigned int nr_pages = 1 << order; /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); - bootmem = true; magic = (unsigned long)page->lru.next; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); } else - __free_pages_bootmem(page, order); + while (nr_pages--) + free_reserved_page(page++); } else free_pages((unsigned long)page_address(page), order); - - /* - * SECTION_INFO pages and MIX_SECTION_INFO pages - * are all allocated by bootmem. - */ - if (bootmem) { - zone = page_zone(page); - zone_span_writelock(zone); - zone->present_pages += nr_pages; - zone_span_writeunlock(zone); - totalram_pages += nr_pages; - } } static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) @@ -1058,9 +1044,6 @@ static void __init register_page_bootmem_info(void) void __init mem_init(void) { - long codesize, reservedpages, datasize, initsize; - unsigned long absent_pages; - pci_iommu_alloc(); /* clear_bss() already clear the empty_zero_page */ @@ -1068,29 +1051,14 @@ void __init mem_init(void) register_page_bootmem_info(); /* this will put all memory onto the freelists */ - totalram_pages = free_all_bootmem(); - - absent_pages = absent_pages_in_range(0, max_pfn); - reservedpages = max_pfn - totalram_pages - absent_pages; + free_all_bootmem(); after_bootmem = 1; - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - /* Register memory areas for /proc/kcore */ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_pfn << (PAGE_SHIFT-10), - codesize >> 10, - absent_pages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); + mem_init_print_info(NULL); } #ifdef CONFIG_DEBUG_RODATA @@ -1166,11 +1134,10 @@ void mark_rodata_ro(void) set_memory_ro(start, (end-start) >> PAGE_SHIFT); #endif - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long) __va(__pa_symbol(text_end)), (unsigned long) __va(__pa_symbol(rodata_start))); - - free_init_pages("unused kernel memory", + free_init_pages("unused kernel", (unsigned long) __va(__pa_symbol(rodata_end)), (unsigned long) __va(__pa_symbol(_sdata))); } diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 73a6d7395bd3..0342d27ca798 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -83,10 +83,8 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 17fda6a8b3c2..dfa537a03be1 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -240,7 +240,6 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) { pud_t *pud; - unsigned long addr; int i; if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ @@ -248,8 +247,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) pud = pud_offset(pgd, 0); - for (addr = i = 0; i < PREALLOCATED_PMDS; - i++, pud++, addr += PUD_SIZE) { + for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) { pmd_t *pmd = pmds[i]; if (i >= KERNEL_PGD_BOUNDARY) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index f66b54086ce5..79c216aa0e2b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -12,6 +12,7 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> +#include <linux/random.h> /* * Conventions : @@ -144,6 +145,39 @@ static int pkt_type_offset(void) return -1; } +struct bpf_binary_header { + unsigned int pages; + /* Note : for security reasons, bpf code will follow a randomly + * sized amount of int3 instructions + */ + u8 image[]; +}; + +static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, + u8 **image_ptr) +{ + unsigned int sz, hole; + struct bpf_binary_header *header; + + /* Most of BPF filters are really small, + * but if some of them fill a page, allow at least + * 128 extra bytes to insert a random section of int3 + */ + sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE); + header = module_alloc(sz); + if (!header) + return NULL; + + memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ + + header->pages = sz / PAGE_SIZE; + hole = sz - (proglen + sizeof(*header)); + + /* insert a random number of int3 instructions before BPF code */ + *image_ptr = &header->image[prandom_u32() % hole]; + return header; +} + void bpf_jit_compile(struct sk_filter *fp) { u8 temp[64]; @@ -153,6 +187,7 @@ void bpf_jit_compile(struct sk_filter *fp) int t_offset, f_offset; u8 t_op, f_op, seen = 0, pass; u8 *image = NULL; + struct bpf_binary_header *header = NULL; u8 *func; int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ unsigned int cleanup_addr; /* epilogue code offset */ @@ -693,7 +728,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpb_jit_compile fatal error\n"); kfree(addrs); - module_free(NULL, image); + module_free(NULL, header); return; } memcpy(image + proglen, temp, ilen); @@ -717,10 +752,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; break; } if (proglen == oldproglen) { - image = module_alloc(max_t(unsigned int, - proglen, - sizeof(struct work_struct))); - if (!image) + header = bpf_alloc_binary(proglen, &image); + if (!header) goto out; } oldproglen = proglen; @@ -730,7 +763,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; bpf_jit_dump(flen, proglen, pass, image); if (image) { - bpf_flush_icache(image, image + proglen); + bpf_flush_icache(header, image + proglen); + set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)image; } out: @@ -738,20 +772,13 @@ out: return; } -static void jit_free_defer(struct work_struct *arg) -{ - module_free(NULL, arg); -} - -/* run from softirq, we must use a work_struct to call - * module_free() from process context - */ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) { - struct work_struct *work = (struct work_struct *)fp->bpf_func; + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; - INIT_WORK(work, jit_free_defer); - schedule_work(work); + set_memory_rw(addr, header->pages); + module_free(NULL, header); } } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 3e724256dbee..d641897a1f4e 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -324,14 +324,11 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->start = start; res->end = end; info->res_offset[info->res_num] = addr.translation_offset; + info->res_num++; - if (!pci_use_crs) { + if (!pci_use_crs) dev_printk(KERN_DEBUG, &info->bridge->dev, "host bridge window %pR (ignored)\n", res); - return AE_OK; - } - - info->res_num++; return AE_OK; } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b410b71bdcf7..c8d5577044bb 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -274,8 +274,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, return status; } -int efi_set_rtc_mmss(unsigned long nowtime) +int efi_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; efi_status_t status; efi_time_t eft; efi_time_cap_t cap; @@ -310,7 +311,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -unsigned long efi_get_time(void) +void efi_get_time(struct timespec *now) { efi_status_t status; efi_time_t eft; @@ -320,8 +321,9 @@ unsigned long efi_get_time(void) if (status != EFI_SUCCESS) pr_err("Oops: efitime: can't read time!\n"); - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); + now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, + eft.minute, eft.second); + now->tv_nsec = 0; } /* diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c index d62b0a3b5c14..5e355b134ba4 100644 --- a/arch/x86/platform/mrst/vrtc.c +++ b/arch/x86/platform/mrst/vrtc.c @@ -56,7 +56,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg) } EXPORT_SYMBOL_GPL(vrtc_cmos_write); -unsigned long vrtc_get_time(void) +void vrtc_get_time(struct timespec *now) { u8 sec, min, hour, mday, mon; unsigned long flags; @@ -82,17 +82,18 @@ unsigned long vrtc_get_time(void) printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d " "mon: %d year: %d\n", sec, min, hour, mday, mon, year); - return mktime(year, mon, mday, hour, min, sec); + now->tv_sec = mktime(year, mon, mday, hour, min, sec); + now->tv_nsec = 0; } -int vrtc_set_mmss(unsigned long nowtime) +int vrtc_set_mmss(const struct timespec *now) { unsigned long flags; struct rtc_time tm; int year; int retval = 0; - rtc_time_to_tm(nowtime, &tm); + rtc_time_to_tm(now->tv_sec, &tm); if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) { /* * tm.year is the number of years since 1900, and the @@ -110,7 +111,7 @@ int vrtc_set_mmss(unsigned long nowtime) } else { printk(KERN_ERR "%s: Invalid vRTC value: write of %lx to vRTC failed\n", - __FUNCTION__, nowtime); + __FUNCTION__, now->tv_sec); retval = -EINVAL; } return retval; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d99cae8147d1..c1367b29c3b1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -40,11 +40,15 @@ cpumask_var_t xen_cpu_initialized_map; -static DEFINE_PER_CPU(int, xen_resched_irq); -static DEFINE_PER_CPU(int, xen_callfunc_irq); -static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); -static DEFINE_PER_CPU(int, xen_irq_work); -static DEFINE_PER_CPU(int, xen_debug_irq) = -1; +struct xen_common_irq { + int irq; + char *name; +}; +static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 }; +static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 }; +static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 }; +static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; +static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); @@ -99,10 +103,47 @@ static void __cpuinit cpu_bringup_and_idle(void) cpu_startup_entry(CPUHP_ONLINE); } +static void xen_smp_intr_free(unsigned int cpu) +{ + if (per_cpu(xen_resched_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL); + per_cpu(xen_resched_irq, cpu).irq = -1; + kfree(per_cpu(xen_resched_irq, cpu).name); + per_cpu(xen_resched_irq, cpu).name = NULL; + } + if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL); + per_cpu(xen_callfunc_irq, cpu).irq = -1; + kfree(per_cpu(xen_callfunc_irq, cpu).name); + per_cpu(xen_callfunc_irq, cpu).name = NULL; + } + if (per_cpu(xen_debug_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL); + per_cpu(xen_debug_irq, cpu).irq = -1; + kfree(per_cpu(xen_debug_irq, cpu).name); + per_cpu(xen_debug_irq, cpu).name = NULL; + } + if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq, + NULL); + per_cpu(xen_callfuncsingle_irq, cpu).irq = -1; + kfree(per_cpu(xen_callfuncsingle_irq, cpu).name); + per_cpu(xen_callfuncsingle_irq, cpu).name = NULL; + } + if (xen_hvm_domain()) + return; + + if (per_cpu(xen_irq_work, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL); + per_cpu(xen_irq_work, cpu).irq = -1; + kfree(per_cpu(xen_irq_work, cpu).name); + per_cpu(xen_irq_work, cpu).name = NULL; + } +}; static int xen_smp_intr_init(unsigned int cpu) { int rc; - const char *resched_name, *callfunc_name, *debug_name; + char *resched_name, *callfunc_name, *debug_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, @@ -113,7 +154,8 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(xen_resched_irq, cpu) = rc; + per_cpu(xen_resched_irq, cpu).irq = rc; + per_cpu(xen_resched_irq, cpu).name = resched_name; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, @@ -124,7 +166,8 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(xen_callfunc_irq, cpu) = rc; + per_cpu(xen_callfunc_irq, cpu).irq = rc; + per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -132,7 +175,8 @@ static int xen_smp_intr_init(unsigned int cpu) debug_name, NULL); if (rc < 0) goto fail; - per_cpu(xen_debug_irq, cpu) = rc; + per_cpu(xen_debug_irq, cpu).irq = rc; + per_cpu(xen_debug_irq, cpu).name = debug_name; callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, @@ -143,7 +187,8 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(xen_callfuncsingle_irq, cpu) = rc; + per_cpu(xen_callfuncsingle_irq, cpu).irq = rc; + per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; /* * The IRQ worker on PVHVM goes through the native path and uses the @@ -161,26 +206,13 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(xen_irq_work, cpu) = rc; + per_cpu(xen_irq_work, cpu).irq = rc; + per_cpu(xen_irq_work, cpu).name = callfunc_name; return 0; fail: - if (per_cpu(xen_resched_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); - if (per_cpu(xen_callfunc_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); - if (per_cpu(xen_debug_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); - if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), - NULL); - if (xen_hvm_domain()) - return rc; - - if (per_cpu(xen_irq_work, cpu) >= 0) - unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); - + xen_smp_intr_free(cpu); return rc; } @@ -433,12 +465,7 @@ static void xen_cpu_die(unsigned int cpu) current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } - unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); - if (!xen_hvm_domain()) - unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); + xen_smp_intr_free(cpu); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3002ec1bb71a..a40f8508e760 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -7,6 +7,7 @@ #include <linux/debugfs.h> #include <linux/log2.h> #include <linux/gfp.h> +#include <linux/slab.h> #include <asm/paravirt.h> @@ -165,6 +166,7 @@ static int xen_spin_trylock(struct arch_spinlock *lock) return old == 0; } +static DEFINE_PER_CPU(char *, irq_name); static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); @@ -362,7 +364,7 @@ static irqreturn_t dummy_handler(int irq, void *dev_id) void __cpuinit xen_init_lock_cpu(int cpu) { int irq; - const char *name; + char *name; WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", cpu, per_cpu(lock_kicker_irq, cpu)); @@ -385,6 +387,7 @@ void __cpuinit xen_init_lock_cpu(int cpu) if (irq >= 0) { disable_irq(irq); /* make sure it's never delivered */ per_cpu(lock_kicker_irq, cpu) = irq; + per_cpu(irq_name, cpu) = name; } printk("cpu %d spinlock event irq %d\n", cpu, irq); @@ -401,6 +404,8 @@ void xen_uninit_lock_cpu(int cpu) unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); per_cpu(lock_kicker_irq, cpu) = -1; + kfree(per_cpu(irq_name, cpu)); + per_cpu(irq_name, cpu) = NULL; } void __init xen_init_spinlocks(void) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 3d88bfdf9e1c..ee365895b06b 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -14,6 +14,8 @@ #include <linux/kernel_stat.h> #include <linux/math64.h> #include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/pvclock_gtod.h> #include <asm/pvclock.h> #include <asm/xen/hypervisor.h> @@ -36,9 +38,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); /* snapshots of runstate info */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); -/* unused ns of stolen and blocked time */ +/* unused ns of stolen time */ static DEFINE_PER_CPU(u64, xen_residual_stolen); -static DEFINE_PER_CPU(u64, xen_residual_blocked); /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) @@ -115,7 +116,7 @@ static void do_stolen_accounting(void) { struct vcpu_runstate_info state; struct vcpu_runstate_info *snap; - s64 blocked, runnable, offline, stolen; + s64 runnable, offline, stolen; cputime_t ticks; get_runstate_snapshot(&state); @@ -125,7 +126,6 @@ static void do_stolen_accounting(void) snap = &__get_cpu_var(xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ - blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; @@ -141,17 +141,6 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __this_cpu_write(xen_residual_stolen, stolen); account_steal_ticks(ticks); - - /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. */ - blocked += __this_cpu_read(xen_residual_blocked); - - if (blocked < 0) - blocked = 0; - - ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __this_cpu_write(xen_residual_blocked, blocked); - account_idle_ticks(ticks); } /* Get the TSC speed from Xen */ @@ -191,34 +180,56 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -static unsigned long xen_get_wallclock(void) +static void xen_get_wallclock(struct timespec *now) { - struct timespec ts; + xen_read_wallclock(now); +} - xen_read_wallclock(&ts); - return ts.tv_sec; +static int xen_set_wallclock(const struct timespec *now) +{ + return -1; } -static int xen_set_wallclock(unsigned long now) +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) { + /* Protected by the calling core code serialization */ + static struct timespec next_sync; + struct xen_platform_op op; - int rc; + struct timespec now; - /* do nothing for domU */ - if (!xen_initial_domain()) - return -1; + now = __current_kernel_time(); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec_compare(&now, &next_sync) < 0) + return NOTIFY_OK; op.cmd = XENPF_settime; - op.u.settime.secs = now; - op.u.settime.nsecs = 0; + op.u.settime.secs = now.tv_sec; + op.u.settime.nsecs = now.tv_nsec; op.u.settime.system_time = xen_clocksource_read(); - rc = HYPERVISOR_dom0_op(&op); - WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); + (void)HYPERVISOR_dom0_op(&op); + + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; - return rc; + return NOTIFY_OK; } +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, @@ -377,11 +388,16 @@ static const struct clock_event_device xen_vcpuop_clockevent = { static const struct clock_event_device *xen_clockevent = &xen_timerop_clockevent; -static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events) = { .irq = -1 }; + +struct xen_clock_event_device { + struct clock_event_device evt; + char *name; +}; +static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); + struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt; irqreturn_t ret; ret = IRQ_NONE; @@ -395,14 +411,30 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) return ret; } +void xen_teardown_timer(int cpu) +{ + struct clock_event_device *evt; + BUG_ON(cpu == 0); + evt = &per_cpu(xen_clock_events, cpu).evt; + + if (evt->irq >= 0) { + unbind_from_irqhandler(evt->irq, NULL); + evt->irq = -1; + kfree(per_cpu(xen_clock_events, cpu).name); + per_cpu(xen_clock_events, cpu).name = NULL; + } +} + void xen_setup_timer(int cpu) { - const char *name; + char *name; struct clock_event_device *evt; int irq; - evt = &per_cpu(xen_clock_events, cpu); + evt = &per_cpu(xen_clock_events, cpu).evt; WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); + if (evt->irq >= 0) + xen_teardown_timer(cpu); printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); @@ -420,22 +452,15 @@ void xen_setup_timer(int cpu) evt->cpumask = cpumask_of(cpu); evt->irq = irq; + per_cpu(xen_clock_events, cpu).name = name; } -void xen_teardown_timer(int cpu) -{ - struct clock_event_device *evt; - BUG_ON(cpu == 0); - evt = &per_cpu(xen_clock_events, cpu); - unbind_from_irqhandler(evt->irq, NULL); - evt->irq = -1; -} void xen_setup_cpu_clockevents(void) { BUG_ON(preemptible()); - clockevents_register_device(&__get_cpu_var(xen_clock_events)); + clockevents_register_device(&__get_cpu_var(xen_clock_events).evt); } void xen_timer_resume(void) @@ -480,6 +505,9 @@ static void __init xen_time_init(void) xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } void __init xen_init_time_ops(void) @@ -492,7 +520,9 @@ void __init xen_init_time_ops(void) x86_platform.calibrate_tsc = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; + /* Dom0 uses the native method to set the hardware RTC. */ + if (!xen_initial_domain()) + x86_platform.set_wallclock = xen_set_wallclock; } #ifdef CONFIG_XEN_PVHVM diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index a8f44f50e651..b21ace4fc9ba 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -85,4 +85,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_LL 46 + #endif /* _XTENSA_SOCKET_H */ diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index bba125b4bb06..479d7537a32a 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -173,39 +173,16 @@ void __init zones_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long highmemsize, tmp, ram; - - max_mapnr = num_physpages = max_low_pfn - ARCH_PFN_OFFSET; + max_mapnr = max_low_pfn - ARCH_PFN_OFFSET; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - highmemsize = 0; #ifdef CONFIG_HIGHMEM #error HIGHGMEM not implemented in init.c #endif - totalram_pages += free_all_bootmem(); - - reservedpages = ram = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) { - ram++; - if (PageReserved(mem_map+tmp)) - reservedpages++; - } + free_all_bootmem(); - codesize = (unsigned long) _etext - (unsigned long) _stext; - datasize = (unsigned long) _edata - (unsigned long) _sdata; - initsize = (unsigned long) __init_end - (unsigned long) __init_begin; - - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, " - "%ldk data, %ldk init %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - highmemsize >> 10); + mem_init_print_info(NULL); } #ifdef CONFIG_BLK_DEV_INITRD @@ -214,11 +191,11 @@ extern int initrd_is_mapped; void free_initrd_mem(unsigned long start, unsigned long end) { if (initrd_is_mapped) - free_reserved_area(start, end, 0, "initrd"); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(-1); } |