diff options
Diffstat (limited to 'arch')
494 files changed, 5733 insertions, 3948 deletions
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 2f24447fef92..46bf263c3153 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -168,7 +168,7 @@ smp_callin(void) cpuid, current, current->active_mm)); preempt_disable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */ diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 76dde9db7934..8a188bc1786a 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -12,8 +12,6 @@ config ARC select BUILDTIME_EXTABLE_SORT select COMMON_CLK select CLONE_BACKWARDS - # ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev - select DEVTMPFS if !INITRAMFS_SOURCE="" select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT @@ -275,14 +273,6 @@ config ARC_DCCM_BASE default "0xA0000000" depends on ARC_HAS_DCCM -config ARC_HAS_HW_MPY - bool "Use Hardware Multiplier (Normal or Faster XMAC)" - default y - help - Influences how gcc generates code for MPY operations. - If enabled, MPYxx insns are generated, provided by Standard/XMAC - Multipler. Otherwise software multipy lib is used - choice prompt "MMU Version" default ARC_MMU_V3 if ARC_CPU_770 @@ -338,6 +328,19 @@ config ARC_PAGE_SIZE_4K endchoice +choice + prompt "MMU Super Page Size" + depends on ISA_ARCV2 && TRANSPARENT_HUGEPAGE + default ARC_HUGEPAGE_2M + +config ARC_HUGEPAGE_2M + bool "2MB" + +config ARC_HUGEPAGE_16M + bool "16MB" + +endchoice + if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS @@ -410,7 +413,7 @@ config ARC_HAS_RTC default n depends on !SMP -config ARC_HAS_GRTC +config ARC_HAS_GFRC bool "SMP synchronized 64-bit cycle counter" default y depends on SMP @@ -529,14 +532,6 @@ config ARC_DBG_TLB_MISS_COUNT Counts number of I and D TLB Misses and exports them via Debugfs The counters can be cleared via Debugfs as well -if SMP - -config ARC_IPI_DBG - bool "Debug Inter Core interrupts" - default n - -endif - endif config ARC_UBOOT_SUPPORT @@ -566,6 +561,12 @@ endmenu endmenu # "ARC Architecture Configuration" source "mm/Kconfig" + +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + default "12" if ARC_HUGEPAGE_16M + default "11" + source "net/Kconfig" source "drivers/Kconfig" source "fs/Kconfig" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index aeb19021099e..c8230f3395f2 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -74,10 +74,6 @@ ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB # --build-id w/o "-marclinux". Default arc-elf32-ld is OK ldflags-$(upto_gcc44) += -marclinux -ifndef CONFIG_ARC_HAS_HW_MPY - cflags-y += -mno-mpy -endif - LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index f1ac9818b751..5d4e2a07ad3e 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -39,6 +39,7 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -73,7 +74,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_DESIGNWARE_PLATFORM=y # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y @@ -91,12 +91,10 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT3_FS=y -CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 323486d6ee83..87ee46b237ef 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -39,14 +39,10 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_AXS=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y @@ -78,14 +74,12 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_DESIGNWARE_PLATFORM=y # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set -CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y @@ -97,12 +91,10 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT3_FS=y -CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 66191cd0447e..d80daf4f7e73 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -40,14 +40,10 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_AXS=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y @@ -79,14 +75,12 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_DESIGNWARE_PLATFORM=y # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set -CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y @@ -98,12 +92,10 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT3_FS=y -CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 138f9d887957..f41095340b6a 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -4,6 +4,7 @@ CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -26,7 +27,6 @@ CONFIG_ARC_PLAT_SIM=y CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -34,6 +34,7 @@ CONFIG_UNIX_DIAG=y CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -51,7 +52,6 @@ CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set @@ -63,4 +63,3 @@ CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_PREEMPT is not set -CONFIG_XZ_DEC=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index f68838e8068a..cfaa33cb5921 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -35,6 +35,7 @@ CONFIG_UNIX_DIAG=y CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -49,7 +50,6 @@ CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set @@ -61,4 +61,3 @@ CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_PREEMPT is not set -CONFIG_XZ_DEC=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 96bd1c20fb0b..bb2a8dc778b5 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -2,6 +2,7 @@ CONFIG_CROSS_COMPILE="arc-linux-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -21,13 +22,11 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_SIM=y -CONFIG_ARC_BOARD_ML509=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -35,6 +34,7 @@ CONFIG_UNIX_DIAG=y CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -49,7 +49,6 @@ CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set @@ -60,4 +59,3 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_XZ_DEC=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 31e1d95764ff..646182e93753 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -33,6 +33,7 @@ CONFIG_UNIX_DIAG=y CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -58,7 +59,6 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_HID is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index fcae66683ca0..ceca2541950d 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -34,12 +34,12 @@ CONFIG_UNIX_DIAG=y CONFIG_NET_KEY=y CONFIG_INET=y # CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y -CONFIG_NET_OSCI_LAN=y CONFIG_INPUT_EVDEV=y # CONFIG_MOUSE_PS2_ALPS is not set # CONFIG_MOUSE_PS2_LOGIPS2PP is not set @@ -58,7 +58,6 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_HID is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index b01b659168ea..4b6da90f6f26 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -2,6 +2,7 @@ CONFIG_CROSS_COMPILE="arc-linux-" CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y @@ -18,15 +19,11 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_SIM=y -CONFIG_ARC_BOARD_ML509=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y -CONFIG_ARC_HAS_LL64=y -# CONFIG_ARC_HAS_RTSC is not set CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=y @@ -40,6 +37,7 @@ CONFIG_INET=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -56,14 +54,11 @@ CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set -CONFIG_NET_OSCI_LAN=y # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_LIBPS2=y CONFIG_SERIO_ARC_PS2=y -CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y @@ -75,9 +70,6 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y -CONFIG_ARCPGU_RGB888=y -CONFIG_ARCPGU_DISPTYPE=0 -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_HID is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 3b4dc9cebcf1..9b342eaf95ae 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -3,6 +3,7 @@ CONFIG_CROSS_COMPILE="arc-linux-" CONFIG_DEFAULT_HOSTNAME="tb10x" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -26,12 +27,10 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLOCK is not set CONFIG_ARC_PLAT_TB10X=y CONFIG_ARC_CACHE_LINE_SHIFT=5 -CONFIG_ARC_STACK_NONEXEC=y CONFIG_HZ=250 CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk" CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_COMPACTION is not set -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -44,8 +43,8 @@ CONFIG_IP_MULTICAST=y # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y # CONFIG_FIRMWARE_IN_KERNEL is not set -CONFIG_PROC_DEVICETREE=y CONFIG_NETDEVICES=y # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -55,9 +54,6 @@ CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_SEEQ is not set CONFIG_STMMAC_ETH=y -CONFIG_STMMAC_DEBUG_FS=y -CONFIG_STMMAC_DA=y -CONFIG_STMMAC_CHAINED=y # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_WLAN is not set # CONFIG_INPUT is not set @@ -91,7 +87,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_DMADEVICES=y CONFIG_DW_DMAC=y -CONFIG_NET_DMA=y CONFIG_ASYNC_TX_DMA=y # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set @@ -100,17 +95,16 @@ CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_MAGIC_SYSRQ=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index f36c047b33ca..735985974a31 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y -# CONFIG_ARC_HAS_GRTC is not set +# CONFIG_ARC_HAS_GFRC is not set CONFIG_ARC_UBOOT_SUPPORT=y CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp" CONFIG_PREEMPT=y diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 7fac7d85ed6a..f9f4c6f59fdb 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -10,7 +10,8 @@ #define _ASM_ARC_ARCREGS_H /* Build Configuration Registers */ -#define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */ +#define ARC_REG_AUX_DCCM 0x18 /* DCCM Base Addr ARCv2 */ +#define ARC_REG_DCCM_BASE_BUILD 0x61 /* DCCM Base Addr ARCompact */ #define ARC_REG_CRC_BCR 0x62 #define ARC_REG_VECBASE_BCR 0x68 #define ARC_REG_PERIBASE_BCR 0x69 @@ -18,10 +19,10 @@ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ #define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ #define ARC_REG_SLC_BCR 0xce -#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ +#define ARC_REG_DCCM_BUILD 0x74 /* DCCM size (common) */ #define ARC_REG_TIMERS_BCR 0x75 #define ARC_REG_AP_BCR 0x76 -#define ARC_REG_ICCM_BCR 0x78 +#define ARC_REG_ICCM_BUILD 0x78 /* ICCM size (common) */ #define ARC_REG_XY_MEM_BCR 0x79 #define ARC_REG_MAC_BCR 0x7a #define ARC_REG_MUL_BCR 0x7b @@ -36,6 +37,7 @@ #define ARC_REG_IRQ_BCR 0xF3 #define ARC_REG_SMART_BCR 0xFF #define ARC_REG_CLUSTER_BCR 0xcf +#define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */ /* status32 Bits Positions */ #define STATUS_AE_BIT 5 /* Exception active */ @@ -246,7 +248,7 @@ struct bcr_perip { #endif }; -struct bcr_iccm { +struct bcr_iccm_arcompact { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int base:16, pad:5, sz:3, ver:8; #else @@ -254,17 +256,15 @@ struct bcr_iccm { #endif }; -/* DCCM Base Address Register: ARC_REG_DCCMBASE_BCR */ -struct bcr_dccm_base { +struct bcr_iccm_arcv2 { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int addr:24, ver:8; + unsigned int pad:8, sz11:4, sz01:4, sz10:4, sz00:4, ver:8; #else - unsigned int ver:8, addr:24; + unsigned int ver:8, sz00:4, sz10:4, sz01:4, sz11:4, pad:8; #endif }; -/* DCCM RAM Configuration Register: ARC_REG_DCCM_BCR */ -struct bcr_dccm { +struct bcr_dccm_arcompact { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int res:21, sz:3, ver:8; #else @@ -272,6 +272,14 @@ struct bcr_dccm { #endif }; +struct bcr_dccm_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:12, cyc:3, pad1:1, sz1:4, sz0:4, ver:8; +#else + unsigned int ver:8, sz0:4, sz1:4, pad1:1, cyc:3, pad2:12; +#endif +}; + /* ARCompact: Both SP and DP FPU BCRs have same format */ struct bcr_fp_arcompact { #ifdef CONFIG_CPU_BIG_ENDIAN @@ -315,9 +323,9 @@ struct bcr_bpu_arcv2 { struct bcr_generic { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:24, ver:8; + unsigned int info:24, ver:8; #else - unsigned int ver:8, pad:24; + unsigned int ver:8, info:24; #endif }; @@ -349,14 +357,13 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; - struct bcr_timer timers; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, fpu_sp:1, fpu_dp:1, pad2:6, debug:1, ap:1, smart:1, rtt:1, pad3:4, - pad4:8; + timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; struct bcr_mpy extn_mpy; struct bcr_extn_xymem extn_xymem; diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index 4fd7d62a6e30..49014f0ef36d 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -16,11 +16,9 @@ #ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 -#define IPI_IRQ (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */ #else #define TIMER0_IRQ 16 #define TIMER1_IRQ 17 -#define IPI_IRQ 19 #endif #include <linux/interrupt.h> diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index 258b0e5ad332..37c2f751eebf 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -22,6 +22,7 @@ #define AUX_IRQ_CTRL 0x00E #define AUX_IRQ_ACT 0x043 /* Active Intr across all levels */ #define AUX_IRQ_LVL_PEND 0x200 /* Pending Intr across all levels */ +#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */ #define AUX_IRQ_PRIORITY 0x206 #define ICAUSE 0x40a #define AUX_IRQ_SELECT 0x40b @@ -30,8 +31,11 @@ /* Was Intr taken in User Mode */ #define AUX_IRQ_ACT_BIT_U 31 -/* 0 is highest level, but taken by FIRQs, if present in design */ -#define ARCV2_IRQ_DEF_PRIO 0 +/* + * User space should be interruptable even by lowest prio interrupt + * Safe even if actual interrupt priorities is fewer or even one + */ +#define ARCV2_IRQ_DEF_PRIO 15 /* seed value for status register */ #define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \ @@ -112,6 +116,16 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +static inline void arc_softirq_trigger(int irq) +{ + write_aux_reg(AUX_IRQ_HINT, irq); +} + +static inline void arc_softirq_clear(int irq) +{ + write_aux_reg(AUX_IRQ_HINT, 0); +} + #else .macro IRQ_DISABLE scratch diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 46f4e5351b2a..847e3bbe387f 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -39,8 +39,8 @@ struct mcip_cmd { #define CMD_DEBUG_SET_MASK 0x34 #define CMD_DEBUG_SET_SELECT 0x36 -#define CMD_GRTC_READ_LO 0x42 -#define CMD_GRTC_READ_HI 0x43 +#define CMD_GFRC_READ_LO 0x42 +#define CMD_GFRC_READ_HI 0x43 #define CMD_IDU_ENABLE 0x71 #define CMD_IDU_DISABLE 0x72 diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 57af2f05ae84..d426d4215513 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -179,37 +179,44 @@ #define __S111 PAGE_U_X_W_R /**************************************************************** - * Page Table Lookup split + * 2 tier (PGD:PTE) software page walker * - * We implement 2 tier paging and since this is all software, we are free - * to customize the span of a PGD / PTE entry to suit us - * - * 32 bit virtual address + * [31] 32 bit virtual address [0] * ------------------------------------------------------- - * | BITS_FOR_PGD | BITS_FOR_PTE | BITS_IN_PAGE | + * | | <------------ PGDIR_SHIFT ----------> | + * | | | + * | BITS_FOR_PGD | BITS_FOR_PTE | <-- PAGE_SHIFT --> | * ------------------------------------------------------- * | | | * | | --> off in page frame - * | | * | ---> index into Page Table - * | * ----> index into Page Directory + * + * In a single page size configuration, only PAGE_SHIFT is fixed + * So both PGD and PTE sizing can be tweaked + * e.g. 8K page (PAGE_SHIFT 13) can have + * - PGDIR_SHIFT 21 -> 11:8:13 address split + * - PGDIR_SHIFT 24 -> 8:11:13 address split + * + * If Super Page is configured, PGDIR_SHIFT becomes fixed too, + * so the sizing flexibility is gone. */ -#define BITS_IN_PAGE PAGE_SHIFT - -/* Optimal Sizing of Pg Tbl - based on MMU page size */ -#if defined(CONFIG_ARC_PAGE_SIZE_8K) -#define BITS_FOR_PTE 8 /* 11:8:13 */ -#elif defined(CONFIG_ARC_PAGE_SIZE_16K) -#define BITS_FOR_PTE 8 /* 10:8:14 */ -#elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define BITS_FOR_PTE 9 /* 11:9:12 */ +#if defined(CONFIG_ARC_HUGEPAGE_16M) +#define PGDIR_SHIFT 24 +#elif defined(CONFIG_ARC_HUGEPAGE_2M) +#define PGDIR_SHIFT 21 +#else +/* + * Only Normal page support so "hackable" (see comment above) + * Default value provides 11:8:13 (8K), 11:9:12 (4K) + */ +#define PGDIR_SHIFT 21 #endif -#define BITS_FOR_PGD (32 - BITS_FOR_PTE - BITS_IN_PAGE) +#define BITS_FOR_PTE (PGDIR_SHIFT - PAGE_SHIFT) +#define BITS_FOR_PGD (32 - PGDIR_SHIFT) -#define PGDIR_SHIFT (32 - BITS_FOR_PGD) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index cbfec79137bf..c1264607bbff 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -45,11 +45,12 @@ VECTOR reserved ; Reserved slots VECTOR handle_interrupt ; (16) Timer0 VECTOR handle_interrupt ; unused (Timer1) VECTOR handle_interrupt ; unused (WDT) -VECTOR handle_interrupt ; (19) ICI (inter core interrupt) -VECTOR handle_interrupt -VECTOR handle_interrupt -VECTOR handle_interrupt -VECTOR handle_interrupt ; (23) End of fixed IRQs +VECTOR handle_interrupt ; (19) Inter core Interrupt (IPI) +VECTOR handle_interrupt ; (20) perf Interrupt +VECTOR handle_interrupt ; (21) Software Triggered Intr (Self IPI) +VECTOR handle_interrupt ; unused +VECTOR handle_interrupt ; (23) unused +# End of fixed IRQs .rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8 VECTOR handle_interrupt @@ -211,7 +212,11 @@ debug_marker_syscall: ; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig ; entry was via Exception in DS which got preempted in kernel). ; -; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling +; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround +; +; Solution is return from Intr w/o any delay slot quirks into a kernel trampoline +; and from pure kernel mode return to delay slot which handles DS bit/BTA correctly + .Lintr_ret_to_delay_slot: debug_marker_ds: @@ -222,18 +227,23 @@ debug_marker_ds: ld r2, [sp, PT_ret] ld r3, [sp, PT_status32] + ; STAT32 for Int return created from scratch + ; (No delay dlot, disable Further intr in trampoline) + bic r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK st r0, [sp, PT_status32] mov r1, .Lintr_ret_to_delay_slot_2 st r1, [sp, PT_ret] + ; Orig exception PC/STAT32 safekept @orig_r0 and @event stack slots st r2, [sp, 0] st r3, [sp, 4] b .Lisr_ret_fast_path .Lintr_ret_to_delay_slot_2: + ; Trampoline to restore orig exception PC/STAT32/BTA/AUX_USER_SP sub sp, sp, SZ_PT_REGS st r9, [sp, -4] @@ -243,11 +253,19 @@ debug_marker_ds: ld r9, [sp, 4] sr r9, [erstatus] + ; restore AUX_USER_SP if returning to U mode + bbit0 r9, STATUS_U_BIT, 1f + ld r9, [sp, PT_sp] + sr r9, [AUX_USER_SP] + +1: ld r9, [sp, 8] sr r9, [erbta] ld r9, [sp, -4] add sp, sp, SZ_PT_REGS + + ; return from pure kernel mode to delay slot rtie END(ret_from_exception) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 0394f9f61b46..942526322ae7 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -14,6 +14,8 @@ #include <linux/irqchip.h> #include <asm/irq.h> +static int irq_prio; + /* * Early Hardware specific Interrupt setup * -Called very early (start_kernel -> setup_arch -> setup_processor) @@ -24,6 +26,14 @@ void arc_init_IRQ(void) { unsigned int tmp; + struct irq_build { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:3, firq:1, prio:4, exts:8, irqs:8, ver:8; +#else + unsigned int ver:8, irqs:8, exts:8, prio:4, firq:1, pad:3; +#endif + } irq_bcr; + struct aux_irq_ctrl { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int res3:18, save_idx_regs:1, res2:1, @@ -46,28 +56,25 @@ void arc_init_IRQ(void) WRITE_AUX(AUX_IRQ_CTRL, ictrl); - /* setup status32, don't enable intr yet as kernel doesn't want */ - tmp = read_aux_reg(0xa); - tmp |= ISA_INIT_STATUS_BITS; - tmp &= ~STATUS_IE_MASK; - asm volatile("flag %0 \n"::"r"(tmp)); - /* * ARCv2 core intc provides multiple interrupt priorities (upto 16). * Typical builds though have only two levels (0-high, 1-low) * Linux by default uses lower prio 1 for most irqs, reserving 0 for * NMI style interrupts in future (say perf) - * - * Read the intc BCR to confirm that Linux default priority is avail - * in h/w - * - * Note: - * IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level - * is 0 based. */ - tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF; - if (ARCV2_IRQ_DEF_PRIO > tmp) - panic("Linux default irq prio incorrect\n"); + + READ_BCR(ARC_REG_IRQ_BCR, irq_bcr); + + irq_prio = irq_bcr.prio; /* Encoded as N-1 for N levels */ + pr_info("archs-intc\t: %d priority levels (default %d)%s\n", + irq_prio + 1, irq_prio, + irq_bcr.firq ? " FIRQ (not used)":""); + + /* setup status32, don't enable intr yet as kernel doesn't want */ + tmp = read_aux_reg(0xa); + tmp |= STATUS_AD_MASK | (irq_prio << 1); + tmp &= ~STATUS_IE_MASK; + asm volatile("flag %0 \n"::"r"(tmp)); } static void arcv2_irq_mask(struct irq_data *data) @@ -86,7 +93,7 @@ void arcv2_irq_enable(struct irq_data *data) { /* set default priority */ write_aux_reg(AUX_IRQ_SELECT, data->irq); - write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO); + write_aux_reg(AUX_IRQ_PRIORITY, irq_prio); /* * hw auto enables (linux unmask) all by default diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index 06bcedf19b62..224d1c3aa9c4 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -81,9 +81,6 @@ static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, { switch (irq) { case TIMER0_IRQ: -#ifdef CONFIG_SMP - case IPI_IRQ: -#endif irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); break; default: diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index bd237acdf4f2..c41c364b926c 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -11,9 +11,13 @@ #include <linux/smp.h> #include <linux/irq.h> #include <linux/spinlock.h> +#include <asm/irqflags-arcv2.h> #include <asm/mcip.h> #include <asm/setup.h> +#define IPI_IRQ 19 +#define SOFTIRQ_IRQ 21 + static char smp_cpuinfo_buf[128]; static int idu_detected; @@ -22,6 +26,7 @@ static DEFINE_RAW_SPINLOCK(mcip_lock); static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); + smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ); } static void mcip_ipi_send(int cpu) @@ -29,46 +34,44 @@ static void mcip_ipi_send(int cpu) unsigned long flags; int ipi_was_pending; + /* ARConnect can only send IPI to others */ + if (unlikely(cpu == raw_smp_processor_id())) { + arc_softirq_trigger(SOFTIRQ_IRQ); + return; + } + + raw_spin_lock_irqsave(&mcip_lock, flags); + /* - * NOTE: We must spin here if the other cpu hasn't yet - * serviced a previous message. This can burn lots - * of time, but we MUST follows this protocol or - * ipi messages can be lost!!! - * Also, we must release the lock in this loop because - * the other side may get to this same loop and not - * be able to ack -- thus causing deadlock. + * If receiver already has a pending interrupt, elide sending this one. + * Linux cross core calling works well with concurrent IPIs + * coalesced into one + * see arch/arc/kernel/smp.c: ipi_send_msg_one() */ + __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu); + ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK); + if (!ipi_was_pending) + __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu); - do { - raw_spin_lock_irqsave(&mcip_lock, flags); - __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu); - ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK); - if (ipi_was_pending == 0) - break; /* break out but keep lock */ - raw_spin_unlock_irqrestore(&mcip_lock, flags); - } while (1); - - __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu); raw_spin_unlock_irqrestore(&mcip_lock, flags); - -#ifdef CONFIG_ARC_IPI_DBG - if (ipi_was_pending) - pr_info("IPI ACK delayed from cpu %d\n", cpu); -#endif } static void mcip_ipi_clear(int irq) { unsigned int cpu, c; unsigned long flags; - unsigned int __maybe_unused copy; + + if (unlikely(irq == SOFTIRQ_IRQ)) { + arc_softirq_clear(irq); + return; + } raw_spin_lock_irqsave(&mcip_lock, flags); /* Who sent the IPI */ __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0); - copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */ + cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */ /* * In rare case, multiple concurrent IPIs sent to same target can @@ -82,12 +85,6 @@ static void mcip_ipi_clear(int irq) } while (cpu); raw_spin_unlock_irqrestore(&mcip_lock, flags); - -#ifdef CONFIG_ARC_IPI_DBG - if (c != __ffs(copy)) - pr_info("IPIs from %x coalesced to %x\n", - copy, raw_smp_processor_id()); -#endif } static void mcip_probe_n_setup(void) @@ -96,13 +93,13 @@ static void mcip_probe_n_setup(void) #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad3:8, idu:1, llm:1, num_cores:6, - iocoh:1, grtc:1, dbg:1, pad2:1, + iocoh:1, gfrc:1, dbg:1, pad2:1, msg:1, sem:1, ipi:1, pad:1, ver:8; #else unsigned int ver:8, pad:1, ipi:1, sem:1, msg:1, - pad2:1, dbg:1, grtc:1, iocoh:1, + pad2:1, dbg:1, gfrc:1, iocoh:1, num_cores:6, llm:1, idu:1, pad3:8; #endif @@ -111,12 +108,13 @@ static void mcip_probe_n_setup(void) READ_BCR(ARC_REG_MCIP_BCR, mp); sprintf(smp_cpuinfo_buf, - "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n", + "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n", mp.ver, mp.num_cores, IS_AVAIL1(mp.ipi, "IPI "), IS_AVAIL1(mp.idu, "IDU "), + IS_AVAIL1(mp.llm, "LLM "), IS_AVAIL1(mp.dbg, "DEBUG "), - IS_AVAIL1(mp.grtc, "GRTC")); + IS_AVAIL1(mp.gfrc, "GFRC")); idu_detected = mp.idu; @@ -125,8 +123,8 @@ static void mcip_probe_n_setup(void) __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); } - if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc) - panic("kernel trying to use non-existent GRTC\n"); + if (IS_ENABLED(CONFIG_ARC_HAS_GFRC) && !mp.gfrc) + panic("kernel trying to use non-existent GFRC\n"); } struct plat_smp_ops plat_smp_ops = { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index e1b87444ea9a..cdc821df1809 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -42,9 +42,57 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; +static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) +{ + if (is_isa_arcompact()) { + struct bcr_iccm_arcompact iccm; + struct bcr_dccm_arcompact dccm; + + READ_BCR(ARC_REG_ICCM_BUILD, iccm); + if (iccm.ver) { + cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */ + cpu->iccm.base_addr = iccm.base << 16; + } + + READ_BCR(ARC_REG_DCCM_BUILD, dccm); + if (dccm.ver) { + unsigned long base; + cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */ + + base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD); + cpu->dccm.base_addr = base & ~0xF; + } + } else { + struct bcr_iccm_arcv2 iccm; + struct bcr_dccm_arcv2 dccm; + unsigned long region; + + READ_BCR(ARC_REG_ICCM_BUILD, iccm); + if (iccm.ver) { + cpu->iccm.sz = 256 << iccm.sz00; /* 512B to 16M */ + if (iccm.sz00 == 0xF && iccm.sz01 > 0) + cpu->iccm.sz <<= iccm.sz01; + + region = read_aux_reg(ARC_REG_AUX_ICCM); + cpu->iccm.base_addr = region & 0xF0000000; + } + + READ_BCR(ARC_REG_DCCM_BUILD, dccm); + if (dccm.ver) { + cpu->dccm.sz = 256 << dccm.sz0; + if (dccm.sz0 == 0xF && dccm.sz1 > 0) + cpu->dccm.sz <<= dccm.sz1; + + region = read_aux_reg(ARC_REG_AUX_DCCM); + cpu->dccm.base_addr = region & 0xF0000000; + } + } +} + static void read_arc_build_cfg_regs(void) { struct bcr_perip uncached_space; + struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; unsigned long perip_space; @@ -53,7 +101,11 @@ static void read_arc_build_cfg_regs(void) READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); - READ_BCR(ARC_REG_TIMERS_BCR, cpu->timers); + READ_BCR(ARC_REG_TIMERS_BCR, timer); + cpu->extn.timer0 = timer.t0; + cpu->extn.timer1 = timer.t1; + cpu->extn.rtc = timer.rtc; + cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); @@ -71,36 +123,11 @@ static void read_arc_build_cfg_regs(void) cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */ cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0; cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */ - - /* Note that we read the CCM BCRs independent of kernel config - * This is to catch the cases where user doesn't know that - * CCMs are present in hardware build - */ - { - struct bcr_iccm iccm; - struct bcr_dccm dccm; - struct bcr_dccm_base dccm_base; - unsigned int bcr_32bit_val; - - bcr_32bit_val = read_aux_reg(ARC_REG_ICCM_BCR); - if (bcr_32bit_val) { - iccm = *((struct bcr_iccm *)&bcr_32bit_val); - cpu->iccm.base_addr = iccm.base << 16; - cpu->iccm.sz = 0x2000 << (iccm.sz - 1); - } - - bcr_32bit_val = read_aux_reg(ARC_REG_DCCM_BCR); - if (bcr_32bit_val) { - dccm = *((struct bcr_dccm *)&bcr_32bit_val); - cpu->dccm.sz = 0x800 << (dccm.sz); - - READ_BCR(ARC_REG_DCCMBASE_BCR, dccm_base); - cpu->dccm.base_addr = dccm_base.addr << 8; - } - } - READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem); + /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */ + read_decode_ccm_bcr(cpu); + read_decode_mmu_bcr(); read_decode_cache_bcr(); @@ -208,9 +235,9 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) (unsigned int)(arc_get_core_freq() / 10000) % 100); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ", - IS_AVAIL1(cpu->timers.t0, "Timer0 "), - IS_AVAIL1(cpu->timers.t1, "Timer1 "), - IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ", + IS_AVAIL1(cpu->extn.timer0, "Timer0 "), + IS_AVAIL1(cpu->extn.timer1, "Timer1 "), + IS_AVAIL2(cpu->extn.rtc, "Local-64-bit-Ctr ", CONFIG_ARC_HAS_RTC)); n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", @@ -232,8 +259,6 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); } - n += scnprintf(buf + n, len - n, "%s", - IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY)); } n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", @@ -293,13 +318,13 @@ static void arc_chk_core_config(void) struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; int fpu_enabled; - if (!cpu->timers.t0) + if (!cpu->extn.timer0) panic("Timer0 is not present!\n"); - if (!cpu->timers.t1) + if (!cpu->extn.timer1) panic("Timer1 is not present!\n"); - if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc) + if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->extn.rtc) panic("RTC is not present\n"); #ifdef CONFIG_ARC_HAS_DCCM @@ -334,6 +359,7 @@ static void arc_chk_core_config(void) panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic && + IS_ENABLED(CONFIG_ARC_HAS_LLSC) && !IS_ENABLED(CONFIG_ARC_STAR_9000923308)) panic("llock/scond livelock workaround missing\n"); } diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index ef6e9e15b82a..4cb3add77c75 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -142,7 +142,7 @@ void start_kernel_secondary(void) local_irq_enable(); preempt_disable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* @@ -336,11 +336,8 @@ irqreturn_t do_IPI(int irq, void *dev_id) int rc; rc = __do_IPI(msg); -#ifdef CONFIG_ARC_IPI_DBG - /* IPI received but no valid @msg */ if (rc) pr_info("IPI with bogus msg %ld in %ld\n", msg, copy); -#endif pending &= ~(1U << msg); } while (pending); diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index dfad287f1db1..156d9833ff84 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -62,7 +62,7 @@ /********** Clock Source Device *********/ -#ifdef CONFIG_ARC_HAS_GRTC +#ifdef CONFIG_ARC_HAS_GFRC static int arc_counter_setup(void) { @@ -83,10 +83,10 @@ static cycle_t arc_counter_read(struct clocksource *cs) local_irq_save(flags); - __mcip_cmd(CMD_GRTC_READ_LO, 0); + __mcip_cmd(CMD_GFRC_READ_LO, 0); stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK); - __mcip_cmd(CMD_GRTC_READ_HI, 0); + __mcip_cmd(CMD_GFRC_READ_HI, 0); stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK); local_irq_restore(flags); @@ -95,7 +95,7 @@ static cycle_t arc_counter_read(struct clocksource *cs) } static struct clocksource arc_counter = { - .name = "ARConnect GRTC", + .name = "ARConnect GFRC", .rating = 400, .read = arc_counter_read, .mask = CLOCKSOURCE_MASK(64), diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 7a6a58ef8aaf..43788b1a1ac5 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -195,5 +195,7 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) +AFLAGS_hyp-stub.o := -Wa,-march=armv7-a + $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S $(call cmd,shipped) diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index f3db13d2d90e..0cc150b87b86 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -285,8 +285,10 @@ }; }; + +/include/ "tps65217.dtsi" + &tps { - compatible = "ti,tps65217"; /* * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only * mode") at poweroff. Most BeagleBone versions do not support RTC-only @@ -307,17 +309,12 @@ ti,pmic-shutdown-controller; regulators { - #address-cells = <1>; - #size-cells = <0>; - dcdc1_reg: regulator@0 { - reg = <0>; regulator-name = "vdds_dpr"; regulator-always-on; }; dcdc2_reg: regulator@1 { - reg = <1>; /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <925000>; @@ -327,7 +324,6 @@ }; dcdc3_reg: regulator@2 { - reg = <2>; /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <925000>; @@ -337,25 +333,21 @@ }; ldo1_reg: regulator@3 { - reg = <3>; regulator-name = "vio,vrtc,vdds"; regulator-always-on; }; ldo2_reg: regulator@4 { - reg = <4>; regulator-name = "vdd_3v3aux"; regulator-always-on; }; ldo3_reg: regulator@5 { - reg = <5>; regulator-name = "vdd_1v8"; regulator-always-on; }; ldo4_reg: regulator@6 { - reg = <6>; regulator-name = "vdd_3v3a"; regulator-always-on; }; diff --git a/arch/arm/boot/dts/am335x-chilisom.dtsi b/arch/arm/boot/dts/am335x-chilisom.dtsi index fda457b07e15..857d9894103a 100644 --- a/arch/arm/boot/dts/am335x-chilisom.dtsi +++ b/arch/arm/boot/dts/am335x-chilisom.dtsi @@ -128,21 +128,16 @@ }; -&tps { - compatible = "ti,tps65217"; +/include/ "tps65217.dtsi" +&tps { regulators { - #address-cells = <1>; - #size-cells = <0>; - dcdc1_reg: regulator@0 { - reg = <0>; regulator-name = "vdds_dpr"; regulator-always-on; }; dcdc2_reg: regulator@1 { - reg = <1>; /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <925000>; @@ -152,7 +147,6 @@ }; dcdc3_reg: regulator@2 { - reg = <2>; /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <925000>; @@ -162,28 +156,24 @@ }; ldo1_reg: regulator@3 { - reg = <3>; regulator-name = "vio,vrtc,vdds"; regulator-boot-on; regulator-always-on; }; ldo2_reg: regulator@4 { - reg = <4>; regulator-name = "vdd_3v3aux"; regulator-boot-on; regulator-always-on; }; ldo3_reg: regulator@5 { - reg = <5>; regulator-name = "vdd_1v8"; regulator-boot-on; regulator-always-on; }; ldo4_reg: regulator@6 { - reg = <6>; regulator-name = "vdd_3v3d"; regulator-boot-on; regulator-always-on; diff --git a/arch/arm/boot/dts/am335x-nano.dts b/arch/arm/boot/dts/am335x-nano.dts index 77559a1ded60..f313999c503e 100644 --- a/arch/arm/boot/dts/am335x-nano.dts +++ b/arch/arm/boot/dts/am335x-nano.dts @@ -375,15 +375,11 @@ wp-gpios = <&gpio3 18 0>; }; -&tps { - compatible = "ti,tps65217"; +#include "tps65217.dtsi" +&tps { regulators { - #address-cells = <1>; - #size-cells = <0>; - dcdc1_reg: regulator@0 { - reg = <0>; /* +1.5V voltage with ±4% tolerance */ regulator-min-microvolt = <1450000>; regulator-max-microvolt = <1550000>; @@ -392,7 +388,6 @@ }; dcdc2_reg: regulator@1 { - reg = <1>; /* VDD_MPU voltage limits 0.95V - 1.1V with ±4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <915000>; @@ -402,7 +397,6 @@ }; dcdc3_reg: regulator@2 { - reg = <2>; /* VDD_CORE voltage limits 0.95V - 1.1V with ±4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <915000>; @@ -412,7 +406,6 @@ }; ldo1_reg: regulator@3 { - reg = <3>; /* +1.8V voltage with ±4% tolerance */ regulator-min-microvolt = <1750000>; regulator-max-microvolt = <1870000>; @@ -421,7 +414,6 @@ }; ldo2_reg: regulator@4 { - reg = <4>; /* +3.3V voltage with ±4% tolerance */ regulator-min-microvolt = <3175000>; regulator-max-microvolt = <3430000>; @@ -430,7 +422,6 @@ }; ldo3_reg: regulator@5 { - reg = <5>; /* +1.8V voltage with ±4% tolerance */ regulator-min-microvolt = <1750000>; regulator-max-microvolt = <1870000>; @@ -439,7 +430,6 @@ }; ldo4_reg: regulator@6 { - reg = <6>; /* +3.3V voltage with ±4% tolerance */ regulator-min-microvolt = <3175000>; regulator-max-microvolt = <3430000>; diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 471a3a70ea1f..8867aaaec54d 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -420,9 +420,9 @@ vin-supply = <&vbat>; }; -&tps { - compatible = "ti,tps65217"; +/include/ "tps65217.dtsi" +&tps { backlight { isel = <1>; /* ISET1 */ fdim = <200>; /* TPS65217_BL_FDIM_200HZ */ @@ -430,17 +430,12 @@ }; regulators { - #address-cells = <1>; - #size-cells = <0>; - dcdc1_reg: regulator@0 { - reg = <0>; /* VDD_1V8 system supply */ regulator-always-on; }; dcdc2_reg: regulator@1 { - reg = <1>; /* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <925000>; @@ -450,7 +445,6 @@ }; dcdc3_reg: regulator@2 { - reg = <2>; /* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <925000>; @@ -460,21 +454,18 @@ }; ldo1_reg: regulator@3 { - reg = <3>; /* VRTC 1.8V always-on supply */ regulator-name = "vrtc,vdds"; regulator-always-on; }; ldo2_reg: regulator@4 { - reg = <4>; /* 3.3V rail */ regulator-name = "vdd_3v3aux"; regulator-always-on; }; ldo3_reg: regulator@5 { - reg = <5>; /* VDD_3V3A 3.3V rail */ regulator-name = "vdd_3v3a"; regulator-min-microvolt = <3300000>; @@ -482,7 +473,6 @@ }; ldo4_reg: regulator@6 { - reg = <6>; /* VDD_3V3B 3.3V rail */ regulator-name = "vdd_3v3b"; regulator-always-on; diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts index 1b5b044fcd91..865de8500f1c 100644 --- a/arch/arm/boot/dts/am335x-shc.dts +++ b/arch/arm/boot/dts/am335x-shc.dts @@ -46,7 +46,7 @@ gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>; linux,code = <KEY_BACK>; debounce-interval = <1000>; - gpio-key,wakeup; + wakeup-source; }; front_button { @@ -54,7 +54,7 @@ gpios = <&gpio1 25 GPIO_ACTIVE_HIGH>; linux,code = <KEY_FRONT>; debounce-interval = <1000>; - gpio-key,wakeup; + wakeup-source; }; }; diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts index d38edfa53bb9..3303c281697b 100644 --- a/arch/arm/boot/dts/am335x-sl50.dts +++ b/arch/arm/boot/dts/am335x-sl50.dts @@ -375,19 +375,16 @@ pinctrl-0 = <&uart4_pins>; }; +#include "tps65217.dtsi" + &tps { - compatible = "ti,tps65217"; ti,pmic-shutdown-controller; interrupt-parent = <&intc>; interrupts = <7>; /* NNMI */ regulators { - #address-cells = <1>; - #size-cells = <0>; - dcdc1_reg: regulator@0 { - reg = <0>; /* VDDS_DDR */ regulator-min-microvolt = <1500000>; regulator-max-microvolt = <1500000>; @@ -395,7 +392,6 @@ }; dcdc2_reg: regulator@1 { - reg = <1>; /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <925000>; @@ -405,7 +401,6 @@ }; dcdc3_reg: regulator@2 { - reg = <2>; /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <925000>; @@ -415,7 +410,6 @@ }; ldo1_reg: regulator@3 { - reg = <3>; /* VRTC / VIO / VDDS*/ regulator-always-on; regulator-min-microvolt = <1800000>; @@ -423,7 +417,6 @@ }; ldo2_reg: regulator@4 { - reg = <4>; /* VDD_3V3AUX */ regulator-always-on; regulator-min-microvolt = <3300000>; @@ -431,7 +424,6 @@ }; ldo3_reg: regulator@5 { - reg = <5>; /* VDD_1V8 */ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -439,7 +431,6 @@ }; ldo4_reg: regulator@6 { - reg = <6>; /* VDD_3V3A */ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 04885f9f959e..1fafaad516ba 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -439,6 +439,7 @@ ti,mbox-num-users = <4>; ti,mbox-num-fifos = <8>; mbox_wkupm3: wkup_m3 { + ti,mbox-send-noirq; ti,mbox-tx = <0 0 0>; ti,mbox-rx = <0 0 3>; }; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index df955ba4dc62..92068fbf8b57 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -73,7 +73,7 @@ global_timer: timer@48240200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x48240200 0x100>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; interrupt-parent = <&gic>; clocks = <&mpu_periphclk>; }; @@ -81,7 +81,7 @@ local_timer: timer@48240600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x48240600 0x100>; - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>; interrupt-parent = <&gic>; clocks = <&mpu_periphclk>; }; @@ -290,6 +290,7 @@ ti,mbox-num-users = <4>; ti,mbox-num-fifos = <8>; mbox_wkupm3: wkup_m3 { + ti,mbox-send-noirq; ti,mbox-tx = <0 0 0>; ti,mbox-rx = <0 0 3>; }; diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index 64d43325bcbc..ecd09ab6d581 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -590,8 +590,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pixcir_ts_pins>; reg = <0x5c>; - interrupt-parent = <&gpio3>; - interrupts = <22 0>; attb-gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>; @@ -599,7 +597,7 @@ * 0x264 represents the offset of padconf register of * gpio3_22 from am43xx_pinmux base. */ - interrupts-extended = <&gpio3 22 IRQ_TYPE_NONE>, + interrupts-extended = <&gpio3 22 IRQ_TYPE_EDGE_FALLING>, <&am43xx_pinmux 0x264>; interrupt-names = "tsc", "wakeup"; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 746fd2b17958..d580e2b70f9a 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -491,7 +491,7 @@ pinctrl-0 = <&pixcir_ts_pins>; reg = <0x5c>; interrupt-parent = <&gpio1>; - interrupts = <17 0>; + interrupts = <17 IRQ_TYPE_EDGE_FALLING>; attb-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 36c0fa6c362a..a0986c65be0c 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -173,6 +173,8 @@ sound0_master: simple-audio-card,codec { sound-dai = <&tlv320aic3104>; + assigned-clocks = <&clkoutmux2_clk_mux>; + assigned-clock-parents = <&sys_clk2_dclk_div>; clocks = <&clkout2_clk>; }; }; @@ -796,6 +798,8 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&mcasp3_pins_default>; pinctrl-1 = <&mcasp3_pins_sleep>; + assigned-clocks = <&mcasp3_ahclkx_mux>; + assigned-clock-parents = <&sys_clkin2>; status = "okay"; op-mode = <0>; /* MCASP_IIS_MODE */ diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts index c53882643ae9..1c06cb76da07 100644 --- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts +++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts @@ -167,7 +167,7 @@ DRA7XX_CORE_IOPAD(0x35b8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d20.rgmii1_rd3 */ DRA7XX_CORE_IOPAD(0x35bc, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d21.rgmii1_rd2 */ DRA7XX_CORE_IOPAD(0x35c0, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d22.rgmii1_rd1 */ - DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLUP | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */ + DRA7XX_CORE_IOPAD(0x35c4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* vin2a_d23.rgmii1_rd0 */ >; }; @@ -492,14 +492,14 @@ pinctrl-names = "default"; pinctrl-0 = <&qspi1_pins>; - spi-max-frequency = <20000000>; + spi-max-frequency = <48000000>; spi_flash: spi_flash@0 { #address-cells = <1>; #size-cells = <1>; compatible = "spansion,m25p80", "jedec,spi-nor"; reg = <0>; /* CS0 */ - spi-max-frequency = <20000000>; + spi-max-frequency = <48000000>; partition@0 { label = "uboot"; @@ -545,7 +545,7 @@ ti,debounce-tol = /bits/ 16 <10>; ti,debounce-rep = /bits/ 16 <1>; - linux,wakeup; + wakeup-source; }; }; @@ -559,13 +559,13 @@ &cpsw_emac0 { phy_id = <&davinci_mdio>, <0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; dual_emac_res_vlan = <0>; }; &cpsw_emac1 { phy_id = <&davinci_mdio>, <1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; dual_emac_res_vlan = <1>; }; @@ -588,7 +588,7 @@ }; &usb2 { - dr_mode = "peripheral"; + dr_mode = "host"; }; &mcasp3 { diff --git a/arch/arm/boot/dts/am57xx-sbc-am57x.dts b/arch/arm/boot/dts/am57xx-sbc-am57x.dts index 77bb8e17401a..988e99632d49 100644 --- a/arch/arm/boot/dts/am57xx-sbc-am57x.dts +++ b/arch/arm/boot/dts/am57xx-sbc-am57x.dts @@ -25,8 +25,8 @@ &dra7_pmx_core { uart3_pins_default: uart3_pins_default { pinctrl-single,pins = < - DRA7XX_CORE_IOPAD(0x37f8, PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */ - DRA7XX_CORE_IOPAD(0x37fc, PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */ + DRA7XX_CORE_IOPAD(0x3648, PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd */ + DRA7XX_CORE_IOPAD(0x364c, PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd */ >; }; @@ -108,9 +108,9 @@ pinctrl-0 = <&i2c5_pins_default>; clock-frequency = <400000>; - eeprom_base: atmel@50 { + eeprom_base: atmel@54 { compatible = "atmel,24c08"; - reg = <0x50>; + reg = <0x54>; pagesize = <16>; }; diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts index 23fc670c0427..5c21b236721f 100644 --- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts +++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts @@ -70,8 +70,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index f774101416a5..ebe1d267406d 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -76,8 +76,8 @@ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; devbus-bootcs { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts index 4878d7353069..5730b875c4f5 100644 --- a/arch/arm/boot/dts/armada-xp-gp.dts +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -95,8 +95,8 @@ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; devbus-bootcs { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts index 13cf69a8d0fb..8af463f26ea1 100644 --- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts +++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts @@ -65,8 +65,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; @@ -152,6 +152,7 @@ nand-on-flash-bbt; partitions { + compatible = "fixed-partitions"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts index 6e9820e141f8..b89e6cf1271a 100644 --- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts +++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts @@ -70,8 +70,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts index 6ab33837a2b6..6522b04f4a8e 100644 --- a/arch/arm/boot/dts/armada-xp-matrix.dts +++ b/arch/arm/boot/dts/armada-xp-matrix.dts @@ -68,8 +68,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; internal-regs { serial@12000 { diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts index 62175a8848bc..d19f44c70925 100644 --- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts +++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts @@ -64,8 +64,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index a5db17782e08..853bd392a4fe 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -65,9 +65,9 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000 + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; devbus-bootcs { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts index 2391b11dc546..d17dab0a6f51 100644 --- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts +++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts @@ -78,8 +78,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts index 77ddff036409..e683856c507c 100644 --- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts @@ -114,9 +114,15 @@ macb0: ethernet@f8008000 { pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_macb0_default>; + pinctrl-0 = <&pinctrl_macb0_default &pinctrl_macb0_phy_irq>; phy-mode = "rmii"; status = "okay"; + + ethernet-phy@1 { + reg = <0x1>; + interrupt-parent = <&pioA>; + interrupts = <73 IRQ_TYPE_LEVEL_LOW>; + }; }; pdmic@f8018000 { @@ -300,6 +306,10 @@ bias-disable; }; + pinctrl_macb0_phy_irq: macb0_phy_irq { + pinmux = <PIN_PC9__GPIO>; + }; + pinctrl_pdmic_default: pdmic_default { pinmux = <PIN_PB26__PDMIC_DAT>, <PIN_PB27__PDMIC_CLK>; diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index 131614f28e75..569026e8f96c 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -86,10 +86,12 @@ macb0: ethernet@f8020000 { phy-mode = "rmii"; status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>; phy0: ethernet-phy@1 { interrupt-parent = <&pioE>; - interrupts = <1 IRQ_TYPE_EDGE_FALLING>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; reg = <1>; }; }; @@ -152,6 +154,10 @@ atmel,pins = <AT91_PIOE 8 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>; }; + pinctrl_macb0_phy_irq: macb0_phy_irq_0 { + atmel,pins = + <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>; + }; }; }; }; diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts index 2d4a33100af6..4e98cda97403 100644 --- a/arch/arm/boot/dts/at91-sama5d4ek.dts +++ b/arch/arm/boot/dts/at91-sama5d4ek.dts @@ -160,8 +160,15 @@ }; macb0: ethernet@f8020000 { + pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>; phy-mode = "rmii"; status = "okay"; + + ethernet-phy@1 { + reg = <0x1>; + interrupt-parent = <&pioE>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; + }; }; mmc1: mmc@fc000000 { @@ -193,6 +200,10 @@ pinctrl@fc06a000 { board { + pinctrl_macb0_phy_irq: macb0_phy_irq { + atmel,pins = + <AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>; + }; pinctrl_mmc0_cd: mmc0_cd { atmel,pins = <AT91_PIOE 5 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>; diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts index ca4ddf86817a..626c67d66626 100644 --- a/arch/arm/boot/dts/at91sam9n12ek.dts +++ b/arch/arm/boot/dts/at91sam9n12ek.dts @@ -215,7 +215,7 @@ }; panel: panel { - compatible = "qd,qd43003c0-40", "simple-panel"; + compatible = "qiaodian,qd43003c0-40", "simple-panel"; backlight = <&backlight>; power-supply = <&panel_reg>; #address-cells = <1>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index c4d9175b90dc..f82aa44c3cee 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1500,6 +1500,16 @@ 0x48485200 0x2E00>; #address-cells = <1>; #size-cells = <1>; + + /* + * Do not allow gating of cpsw clock as workaround + * for errata i877. Keeping internal clock disabled + * causes the device switching characteristics + * to degrade over time and eventually fail to meet + * the data manual delay time/skew specs. + */ + ti,no-idle; + /* * rx_thresh_pend * rx_pend diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 4f6ae921656f..f74d3db4846d 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -896,7 +896,6 @@ #size-cells = <1>; reg = <0x2100000 0x10000>; ranges = <0 0x2100000 0x10000>; - interrupt-parent = <&intc>; clocks = <&clks IMX6QDL_CLK_CAAM_MEM>, <&clks IMX6QDL_CLK_CAAM_ACLK>, <&clks IMX6QDL_CLK_CAAM_IPG>, diff --git a/arch/arm/boot/dts/kirkwood-ds112.dts b/arch/arm/boot/dts/kirkwood-ds112.dts index bf4143c6cb8f..b84af3da8c84 100644 --- a/arch/arm/boot/dts/kirkwood-ds112.dts +++ b/arch/arm/boot/dts/kirkwood-ds112.dts @@ -14,7 +14,7 @@ #include "kirkwood-synology.dtsi" / { - model = "Synology DS111"; + model = "Synology DS112"; compatible = "synology,ds111", "marvell,kirkwood"; memory { diff --git a/arch/arm/boot/dts/kirkwood-lswvl.dts b/arch/arm/boot/dts/kirkwood-lswvl.dts index 09eed3cea0af..36eec7392ab4 100644 --- a/arch/arm/boot/dts/kirkwood-lswvl.dts +++ b/arch/arm/boot/dts/kirkwood-lswvl.dts @@ -1,7 +1,8 @@ /* * Device Tree file for Buffalo Linkstation LS-WVL/VL * - * Copyright (C) 2015, rogershimizu@gmail.com + * Copyright (C) 2015, 2016 + * Roger Shimizu <rogershimizu@gmail.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -156,21 +157,21 @@ button@1 { label = "Function Button"; linux,code = <KEY_OPTION>; - gpios = <&gpio0 45 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 13 GPIO_ACTIVE_LOW>; }; button@2 { label = "Power-on Switch"; linux,code = <KEY_RESERVED>; linux,input-type = <5>; - gpios = <&gpio0 46 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; }; button@3 { label = "Power-auto Switch"; linux,code = <KEY_ESC>; linux,input-type = <5>; - gpios = <&gpio0 47 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 15 GPIO_ACTIVE_LOW>; }; }; @@ -185,38 +186,38 @@ led@1 { label = "lswvl:red:alarm"; - gpios = <&gpio0 36 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; }; led@2 { label = "lswvl:red:func"; - gpios = <&gpio0 37 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>; }; led@3 { label = "lswvl:amber:info"; - gpios = <&gpio0 38 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>; }; led@4 { label = "lswvl:blue:func"; - gpios = <&gpio0 39 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>; }; led@5 { label = "lswvl:blue:power"; - gpios = <&gpio0 40 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 8 GPIO_ACTIVE_LOW>; default-state = "keep"; }; led@6 { label = "lswvl:red:hdderr0"; - gpios = <&gpio0 34 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; }; led@7 { label = "lswvl:red:hdderr1"; - gpios = <&gpio0 35 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>; }; }; @@ -233,7 +234,7 @@ 3250 1 5000 0>; - alarm-gpios = <&gpio0 43 GPIO_ACTIVE_HIGH>; + alarm-gpios = <&gpio1 11 GPIO_ACTIVE_HIGH>; }; restart_poweroff { diff --git a/arch/arm/boot/dts/kirkwood-lswxl.dts b/arch/arm/boot/dts/kirkwood-lswxl.dts index f5db16a08597..b13ec20a7088 100644 --- a/arch/arm/boot/dts/kirkwood-lswxl.dts +++ b/arch/arm/boot/dts/kirkwood-lswxl.dts @@ -1,7 +1,8 @@ /* * Device Tree file for Buffalo Linkstation LS-WXL/WSXL * - * Copyright (C) 2015, rogershimizu@gmail.com + * Copyright (C) 2015, 2016 + * Roger Shimizu <rogershimizu@gmail.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -156,21 +157,21 @@ button@1 { label = "Function Button"; linux,code = <KEY_OPTION>; - gpios = <&gpio1 41 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 9 GPIO_ACTIVE_LOW>; }; button@2 { label = "Power-on Switch"; linux,code = <KEY_RESERVED>; linux,input-type = <5>; - gpios = <&gpio1 42 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 10 GPIO_ACTIVE_LOW>; }; button@3 { label = "Power-auto Switch"; linux,code = <KEY_ESC>; linux,input-type = <5>; - gpios = <&gpio1 43 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 11 GPIO_ACTIVE_LOW>; }; }; @@ -185,12 +186,12 @@ led@1 { label = "lswxl:blue:func"; - gpios = <&gpio1 36 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; }; led@2 { label = "lswxl:red:alarm"; - gpios = <&gpio1 49 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 17 GPIO_ACTIVE_LOW>; }; led@3 { @@ -200,23 +201,23 @@ led@4 { label = "lswxl:blue:power"; - gpios = <&gpio1 8 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>; + default-state = "keep"; }; led@5 { label = "lswxl:red:func"; - gpios = <&gpio1 5 GPIO_ACTIVE_LOW>; - default-state = "keep"; + gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; }; led@6 { label = "lswxl:red:hdderr0"; - gpios = <&gpio1 2 GPIO_ACTIVE_LOW>; + gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>; }; led@7 { label = "lswxl:red:hdderr1"; - gpios = <&gpio1 3 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; }; }; @@ -225,15 +226,15 @@ pinctrl-0 = <&pmx_fan_low &pmx_fan_high &pmx_fan_lock>; pinctrl-names = "default"; - gpios = <&gpio0 47 GPIO_ACTIVE_LOW - &gpio0 48 GPIO_ACTIVE_LOW>; + gpios = <&gpio1 16 GPIO_ACTIVE_LOW + &gpio1 15 GPIO_ACTIVE_LOW>; gpio-fan,speed-map = <0 3 1500 2 3250 1 5000 0>; - alarm-gpios = <&gpio1 49 GPIO_ACTIVE_HIGH>; + alarm-gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>; }; restart_poweroff { @@ -256,7 +257,7 @@ enable-active-high; regulator-always-on; regulator-boot-on; - gpio = <&gpio0 37 GPIO_ACTIVE_HIGH>; + gpio = <&gpio1 5 GPIO_ACTIVE_HIGH>; }; hdd_power0: regulator@2 { compatible = "regulator-fixed"; diff --git a/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts b/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts index 1db6f2c506cc..8082d64266a3 100644 --- a/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts +++ b/arch/arm/boot/dts/kirkwood-pogoplug-series-4.dts @@ -131,6 +131,7 @@ chip-delay = <40>; status = "okay"; partitions { + compatible = "fixed-partitions"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 7fed0bd4f3de..00805322367e 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -112,14 +112,6 @@ clock-frequency = <400000>; }; -&i2c2 { - clock-frequency = <400000>; -}; - -&i2c3 { - clock-frequency = <400000>; -}; - /* * Only found on the wireless SOM. For the SOM without wireless, the pins for * MMC3 can be routed with jumpers to the second MMC slot on the devkit and @@ -143,6 +135,7 @@ interrupt-parent = <&gpio5>; interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */ ref-clock-frequency = <26000000>; + tcxo-clock-frequency = <26000000>; }; }; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 888412c63f97..902657d6713b 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -130,6 +130,16 @@ }; }; +&gpio8 { + /* TI trees use GPIO instead of msecure, see also muxing */ + p234 { + gpio-hog; + gpios = <10 GPIO_ACTIVE_HIGH>; + output-high; + line-name = "gpio8_234/msecure"; + }; +}; + &omap5_pmx_core { pinctrl-names = "default"; pinctrl-0 = < @@ -213,6 +223,13 @@ >; }; + /* TI trees use GPIO mode; msecure mode does not work reliably? */ + palmas_msecure_pins: palmas_msecure_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x180, PIN_OUTPUT | MUX_MODE6) /* gpio8_234 */ + >; + }; + usbhost_pins: pinmux_usbhost_pins { pinctrl-single,pins = < OMAP5_IOPAD(0x0c4, PIN_INPUT | MUX_MODE0) /* usbb2_hsic_strobe */ @@ -278,6 +295,12 @@ &usbhost_wkup_pins >; + palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */ + >; + }; + usbhost_wkup_pins: pinmux_usbhost_wkup_pins { pinctrl-single,pins = < OMAP5_IOPAD(0x05a, PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */ @@ -345,6 +368,8 @@ interrupt-controller; #interrupt-cells = <2>; ti,system-power-controller; + pinctrl-names = "default"; + pinctrl-0 = <&palmas_sys_nirq_pins &palmas_msecure_pins>; extcon_usb3: palmas_usb { compatible = "ti,palmas-usb-vid"; @@ -358,6 +383,14 @@ #clock-cells = <0>; }; + rtc { + compatible = "ti,palmas-rtc"; + interrupt-parent = <&palmas>; + interrupts = <8 IRQ_TYPE_NONE>; + ti,backup-battery-chargeable; + ti,backup-battery-charge-high-current; + }; + palmas_pmic { compatible = "ti,palmas-pmic"; interrupt-parent = <&palmas>; diff --git a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts index 3daec912b4bf..aae8a7aceab7 100644 --- a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts +++ b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts @@ -1,7 +1,8 @@ /* * Device Tree file for Buffalo Linkstation LS-WTGL * - * Copyright (C) 2015, Roger Shimizu <rogershimizu@gmail.com> + * Copyright (C) 2015, 2016 + * Roger Shimizu <rogershimizu@gmail.com> * * This file is dual-licensed: you can use it either under the terms * of the GPL or the X11 license, at your option. Note that this dual @@ -69,8 +70,6 @@ internal-regs { pinctrl: pinctrl@10000 { - pinctrl-0 = <&pmx_usb_power &pmx_power_hdd - &pmx_fan_low &pmx_fan_high &pmx_fan_lock>; pinctrl-names = "default"; pmx_led_power: pmx-leds { @@ -162,6 +161,7 @@ led@1 { label = "lswtgl:blue:power"; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; + default-state = "keep"; }; led@2 { @@ -188,7 +188,7 @@ 3250 1 5000 0>; - alarm-gpios = <&gpio0 2 GPIO_ACTIVE_HIGH>; + alarm-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; }; restart_poweroff { @@ -228,6 +228,37 @@ }; }; +&devbus_bootcs { + status = "okay"; + devbus,keep-config; + + flash@0 { + compatible = "jedec-flash"; + reg = <0 0x40000>; + bank-width = <1>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + header@0 { + reg = <0 0x30000>; + read-only; + }; + + uboot@30000 { + reg = <0x30000 0xF000>; + read-only; + }; + + uboot_env@3F000 { + reg = <0x3F000 0x1000>; + }; + }; + }; +}; + &mdio { status = "okay"; diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts index 6713b1ea732b..01d239c3eaaa 100644 --- a/arch/arm/boot/dts/r8a7791-porter.dts +++ b/arch/arm/boot/dts/r8a7791-porter.dts @@ -283,7 +283,6 @@ pinctrl-names = "default"; status = "okay"; - renesas,enable-gpio = <&gpio5 31 GPIO_ACTIVE_HIGH>; }; &usbphy { diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h index 1afe24629d1f..b0c912feaa2f 100644 --- a/arch/arm/boot/dts/sama5d2-pinfunc.h +++ b/arch/arm/boot/dts/sama5d2-pinfunc.h @@ -90,7 +90,7 @@ #define PIN_PA14__I2SC1_MCK PINMUX_PIN(PIN_PA14, 4, 2) #define PIN_PA14__FLEXCOM3_IO2 PINMUX_PIN(PIN_PA14, 5, 1) #define PIN_PA14__D9 PINMUX_PIN(PIN_PA14, 6, 2) -#define PIN_PA15 14 +#define PIN_PA15 15 #define PIN_PA15__GPIO PINMUX_PIN(PIN_PA15, 0, 0) #define PIN_PA15__SPI0_MOSI PINMUX_PIN(PIN_PA15, 1, 1) #define PIN_PA15__TF1 PINMUX_PIN(PIN_PA15, 2, 1) diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index b8032bca4621..db1151c18466 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -1342,7 +1342,7 @@ dbgu: serial@fc069000 { compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; reg = <0xfc069000 0x200>; - interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>; + interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_dbgu>; clocks = <&dbgu_clk>; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index d0c743853318..27a333eb8987 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -127,22 +127,14 @@ }; mmcsd_default_mode: mmcsd_default { mmcsd_default_cfg1 { - /* MCCLK */ - pins = "GPIO8_B10"; - ste,output = <0>; - }; - mmcsd_default_cfg2 { - /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */ - pins = "GPIO10_C11", "GPIO15_A12", - "GPIO16_C13", "GPIO23_D15"; - ste,output = <1>; - }; - mmcsd_default_cfg3 { - /* MCCMD, MCDAT3-0, MCMSFBCLK */ - pins = "GPIO9_A10", "GPIO11_B11", - "GPIO12_A11", "GPIO13_C12", - "GPIO14_B12", "GPIO24_C15"; - ste,input = <1>; + /* + * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 + * MCCMD, MCDAT3-0, MCMSFBCLK + */ + pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11", + "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12", + "GPIO16_C13", "GPIO23_D15", "GPIO24_C15"; + ste,output = <2>; }; }; }; @@ -802,10 +794,21 @@ clock-names = "mclk", "apb_pclk"; interrupt-parent = <&vica>; interrupts = <22>; - max-frequency = <48000000>; + max-frequency = <400000>; bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + full-pwr-cycle; + /* + * The STw4811 circuit used with the Nomadik strictly + * requires that all of these signal direction pins be + * routed and used for its 4-bit levelshifter. + */ + st,sig-dir-dat0; + st,sig-dir-dat2; + st,sig-dir-dat31; + st,sig-dir-cmd; + st,sig-pin-fbclk; pinctrl-names = "default"; pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>; vmmc-supply = <&vmmc_regulator>; diff --git a/arch/arm/boot/dts/tps65217.dtsi b/arch/arm/boot/dts/tps65217.dtsi new file mode 100644 index 000000000000..a63272422d76 --- /dev/null +++ b/arch/arm/boot/dts/tps65217.dtsi @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Integrated Power Management Chip + * http://www.ti.com/lit/ds/symlink/tps65217.pdf + */ + +&tps { + compatible = "ti,tps65217"; + + regulators { + #address-cells = <1>; + #size-cells = <0>; + + dcdc1_reg: regulator@0 { + reg = <0>; + regulator-compatible = "dcdc1"; + }; + + dcdc2_reg: regulator@1 { + reg = <1>; + regulator-compatible = "dcdc2"; + }; + + dcdc3_reg: regulator@2 { + reg = <2>; + regulator-compatible = "dcdc3"; + }; + + ldo1_reg: regulator@3 { + reg = <3>; + regulator-compatible = "ldo1"; + }; + + ldo2_reg: regulator@4 { + reg = <4>; + regulator-compatible = "ldo2"; + }; + + ldo3_reg: regulator@5 { + reg = <5>; + regulator-compatible = "ldo3"; + }; + + ldo4_reg: regulator@6 { + reg = <6>; + regulator-compatible = "ldo4"; + }; + }; +}; diff --git a/arch/arm/common/icst.c b/arch/arm/common/icst.c index 2dc6da70ae59..d7ed252708c5 100644 --- a/arch/arm/common/icst.c +++ b/arch/arm/common/icst.c @@ -16,7 +16,7 @@ */ #include <linux/module.h> #include <linux/kernel.h> - +#include <asm/div64.h> #include <asm/hardware/icst.h> /* @@ -29,7 +29,11 @@ EXPORT_SYMBOL(icst525_s2div); unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco) { - return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]); + u64 dividend = p->ref * 2 * (u64)(vco.v + 8); + u32 divisor = (vco.r + 2) * p->s2div[vco.s]; + + do_div(dividend, divisor); + return (unsigned long)dividend; } EXPORT_SYMBOL(icst_hz); @@ -58,6 +62,7 @@ icst_hz_to_vco(const struct icst_params *p, unsigned long freq) if (f > p->vco_min && f <= p->vco_max) break; + i++; } while (i < 8); if (i >= 8) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 314f6be2dca2..8e8b2ace9b7c 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -426,6 +426,7 @@ CONFIG_SUNXI_WATCHDOG=y CONFIG_IMX2_WDT=y CONFIG_TEGRA_WATCHDOG=m CONFIG_MESON_WATCHDOG=y +CONFIG_DW_WATCHDOG=y CONFIG_DIGICOLOR_WATCHDOG=y CONFIG_MFD_AS3711=y CONFIG_MFD_AS3722=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index c5e1943e5427..d18d6b42fcf5 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -50,6 +50,7 @@ CONFIG_SOC_AM33XX=y CONFIG_SOC_AM43XX=y CONFIG_SOC_DRA7XX=y CONFIG_ARM_THUMBEE=y +CONFIG_ARM_KERNMEM_PERMS=y CONFIG_ARM_ERRATA_411920=y CONFIG_ARM_ERRATA_430973=y CONFIG_SMP=y @@ -177,6 +178,7 @@ CONFIG_TI_CPTS=y CONFIG_AT803X_PHY=y CONFIG_SMSC_PHY=y CONFIG_USB_USBNET=m +CONFIG_USB_NET_SMSC75XX=m CONFIG_USB_NET_SMSC95XX=m CONFIG_USB_ALI_M5632=y CONFIG_USB_AN2720=y @@ -290,24 +292,23 @@ CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y -CONFIG_OMAP2_DSS=m -CONFIG_OMAP5_DSS_HDMI=y -CONFIG_OMAP2_DSS_SDI=y -CONFIG_OMAP2_DSS_DSI=y +CONFIG_FB_OMAP5_DSS_HDMI=y +CONFIG_FB_OMAP2_DSS_SDI=y +CONFIG_FB_OMAP2_DSS_DSI=y CONFIG_FB_OMAP2=m -CONFIG_DISPLAY_ENCODER_TFP410=m -CONFIG_DISPLAY_ENCODER_TPD12S015=m -CONFIG_DISPLAY_CONNECTOR_DVI=m -CONFIG_DISPLAY_CONNECTOR_HDMI=m -CONFIG_DISPLAY_CONNECTOR_ANALOG_TV=m -CONFIG_DISPLAY_PANEL_DPI=m -CONFIG_DISPLAY_PANEL_DSI_CM=m -CONFIG_DISPLAY_PANEL_SONY_ACX565AKM=m -CONFIG_DISPLAY_PANEL_LGPHILIPS_LB035Q02=m -CONFIG_DISPLAY_PANEL_SHARP_LS037V7DW01=m -CONFIG_DISPLAY_PANEL_TPO_TD028TTEC1=m -CONFIG_DISPLAY_PANEL_TPO_TD043MTEA1=m -CONFIG_DISPLAY_PANEL_NEC_NL8048HL11=m +CONFIG_FB_OMAP2_ENCODER_TFP410=m +CONFIG_FB_OMAP2_ENCODER_TPD12S015=m +CONFIG_FB_OMAP2_CONNECTOR_DVI=m +CONFIG_FB_OMAP2_CONNECTOR_HDMI=m +CONFIG_FB_OMAP2_CONNECTOR_ANALOG_TV=m +CONFIG_FB_OMAP2_PANEL_DPI=m +CONFIG_FB_OMAP2_PANEL_DSI_CM=m +CONFIG_FB_OMAP2_PANEL_SONY_ACX565AKM=m +CONFIG_FB_OMAP2_PANEL_LGPHILIPS_LB035Q02=m +CONFIG_FB_OMAP2_PANEL_SHARP_LS037V7DW01=m +CONFIG_FB_OMAP2_PANEL_TPO_TD028TTEC1=m +CONFIG_FB_OMAP2_PANEL_TPO_TD043MTEA1=m +CONFIG_FB_OMAP2_PANEL_NEC_NL8048HL11=m CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=y @@ -354,6 +355,11 @@ CONFIG_USB_MUSB_DSPS=m CONFIG_USB_INVENTRA_DMA=y CONFIG_USB_TI_CPPI41_DMA=y CONFIG_USB_DWC3=m +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_SIMPLE=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_TEST=m CONFIG_AM335X_PHY_USB=y CONFIG_USB_GADGET=m @@ -387,6 +393,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=m CONFIG_LEDS_GPIO=m CONFIG_LEDS_PWM=m +CONFIG_LEDS_PCA963X=m CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_ONESHOT=m @@ -449,6 +456,8 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_SPLIT=y +CONFIG_DEBUG_INFO_DWARF4=y CONFIG_MAGIC_SYSRQ=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index b445a5d56f43..89a3a3e592d6 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -364,7 +364,7 @@ static struct crypto_alg aes_algs[] = { { .cra_blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = ce_aes_setkey, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, @@ -441,7 +441,7 @@ static struct crypto_alg aes_algs[] = { { .cra_ablkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 7da5503c0591..e08d15184056 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -117,6 +117,7 @@ static inline u32 gic_read_iar(void) u32 irqstat; asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat)); + dsb(sy); return irqstat; } diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d5525bfc7e3e..9156fc303afd 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif #ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); void set_kernel_text_rw(void); void set_kernel_text_ro(void); #else diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index 0375c8caa061..9408a994cc91 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -35,14 +35,21 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page); + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<<compound_order(page)) * XEN_PFN_PER_PAGE; + bool local = (page_pfn <= dev_pfn) && + (dev_pfn - page_pfn < compound_pages); + /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. So if the first xen_pfn == mfn the page is local - * otherwise it's a foreign page grant-mapped in dom0. If the page is - * local we can safely call the native dma_ops function, otherwise we - * call the xen specific function. + * Dom0 is mapped 1:1, while the Linux page can span across + * multiple Xen pages, it's not possible for it to contain a + * mix of local and foreign Xen pages. So if the first xen_pfn + * == mfn the page is local otherwise it's a foreign page + * grant-mapped in dom0. If the page is local we can safely + * call the native dma_ops function, otherwise we call the xen + * specific function. */ if (local) __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 2c5f160be65e..ad325a8c7e1e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o +AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a ifeq ($(CONFIG_ARM_PSCI),y) obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7d0cba6f1cc5..139791ed473d 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -176,13 +176,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -851,7 +851,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 37312f6749f3..baee70267f29 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void) /* * OK, it's off to the idle thread for us */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index dda1959f0dde..08e49c423c24 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -506,18 +506,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) { - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); vcpu->arch.pause = false; - wake_up_interruptible(wq); + swake_up(wq); } } static void vcpu_sleep(struct kvm_vcpu *vcpu) { - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); - wait_event_interruptible(*wq, ((!vcpu->arch.power_off) && + swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); } diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 5fa69d7bae58..99361f11354a 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -161,7 +161,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } static unsigned long num_core_regs(void) diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 7f33b2056ae6..0f6600f05137 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -206,7 +206,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; - memcpy(run->mmio.data, data_buf, len); + if (is_write) + memcpy(run->mmio.data, data_buf, len); if (!ret) { /* We handled the access successfully in the kernel. */ diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index a9b3b905e661..c2b131527a64 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; - wait_queue_head_t *wq; + struct swait_queue_head *wq; unsigned long cpu_id; unsigned long context_id; phys_addr_t target_pc; @@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) smp_mb(); /* Make sure the above is visible */ wq = kvm_arch_vcpu_wq(vcpu); - wake_up_interruptible(wq); + swake_up(wq); return PSCI_RET_SUCCESS; } diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c index 77d6b1bab278..ee06fabdf60e 100644 --- a/arch/arm/mach-lpc32xx/phy3250.c +++ b/arch/arm/mach-lpc32xx/phy3250.c @@ -86,8 +86,8 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb) { dma_addr_t dma; - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, - PANEL_SIZE, &dma, GFP_KERNEL); + fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma, + GFP_KERNEL); if (!fb->fb.screen_base) { printk(KERN_ERR "CLCD: unable to map framebuffer\n"); return -ENOMEM; @@ -116,15 +116,14 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb) static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma) { - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, fb->fb.fix.smem_start, - fb->fb.fix.smem_len); + return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base, + fb->fb.fix.smem_start, fb->fb.fix.smem_len); } static void lpc32xx_clcd_remove(struct clcd_fb *fb) { - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); + dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, + fb->fb.fix.smem_start); } /* diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 64e3d2ce9a07..b003e3afd693 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU depends on ARCH_MULTI_V7 || ARCH_MULTI_V5 select ARCH_SUPPORTS_BIG_ENDIAN select CLKSRC_MMIO - select GENERIC_IRQ_CHIP select PINCTRL select PLAT_ORION select SOC_BUS @@ -29,6 +28,7 @@ config MACH_ARMADA_370 bool "Marvell Armada 370 boards" depends on ARCH_MULTI_V7 select ARMADA_370_CLK + select ARMADA_370_XP_IRQ select CPU_PJ4B select MACH_MVEBU_V7 select PINCTRL_ARMADA_370 @@ -39,6 +39,7 @@ config MACH_ARMADA_370 config MACH_ARMADA_375 bool "Marvell Armada 375 boards" depends on ARCH_MULTI_V7 + select ARMADA_370_XP_IRQ select ARM_ERRATA_720789 select ARM_ERRATA_753970 select ARM_GIC @@ -58,6 +59,7 @@ config MACH_ARMADA_38X select ARM_ERRATA_720789 select ARM_ERRATA_753970 select ARM_GIC + select ARMADA_370_XP_IRQ select ARMADA_38X_CLK select HAVE_ARM_SCU select HAVE_ARM_TWD if SMP @@ -72,6 +74,7 @@ config MACH_ARMADA_39X bool "Marvell Armada 39x boards" depends on ARCH_MULTI_V7 select ARM_GIC + select ARMADA_370_XP_IRQ select ARMADA_39X_CLK select CACHE_L2X0 select HAVE_ARM_SCU @@ -86,6 +89,7 @@ config MACH_ARMADA_39X config MACH_ARMADA_XP bool "Marvell Armada XP boards" depends on ARCH_MULTI_V7 + select ARMADA_370_XP_IRQ select ARMADA_XP_CLK select CPU_PJ4B select MACH_MVEBU_V7 diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c index d122ee6ab991..8814ee5e98fd 100644 --- a/arch/arm/mach-netx/fb.c +++ b/arch/arm/mach-netx/fb.c @@ -42,8 +42,8 @@ int netx_clcd_setup(struct clcd_fb *fb) fb->panel = netx_panel; - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024, - &dma, GFP_KERNEL); + fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma, + GFP_KERNEL); if (!fb->fb.screen_base) { printk(KERN_ERR "CLCD: unable to map framebuffer\n"); return -ENOMEM; @@ -57,16 +57,14 @@ int netx_clcd_setup(struct clcd_fb *fb) int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma) { - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, - fb->fb.fix.smem_start, - fb->fb.fix.smem_len); + return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base, + fb->fb.fix.smem_start, fb->fb.fix.smem_len); } void netx_clcd_remove(struct clcd_fb *fb) { - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); + dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, + fb->fb.fix.smem_start); } static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL); diff --git a/arch/arm/mach-nspire/clcd.c b/arch/arm/mach-nspire/clcd.c index abea12617b17..ea0e5b2ca1cd 100644 --- a/arch/arm/mach-nspire/clcd.c +++ b/arch/arm/mach-nspire/clcd.c @@ -90,8 +90,8 @@ int nspire_clcd_setup(struct clcd_fb *fb) panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8; panel_size = ALIGN(panel_size, PAGE_SIZE); - fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, - panel_size, &dma, GFP_KERNEL); + fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma, + GFP_KERNEL); if (!fb->fb.screen_base) { pr_err("CLCD: unable to map framebuffer\n"); @@ -107,13 +107,12 @@ int nspire_clcd_setup(struct clcd_fb *fb) int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma) { - return dma_mmap_writecombine(&fb->dev->dev, vma, - fb->fb.screen_base, fb->fb.fix.smem_start, - fb->fb.fix.smem_len); + return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base, + fb->fb.fix.smem_start, fb->fb.fix.smem_len); } void nspire_clcd_remove(struct clcd_fb *fb) { - dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len, - fb->fb.screen_base, fb->fb.fix.smem_start); + dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base, + fb->fb.fix.smem_start); } diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 809827265fb3..bab814d2f37d 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -18,6 +18,7 @@ #include <asm/setup.h> #include <asm/mach/arch.h> +#include <asm/system_info.h> #include "common.h" @@ -77,12 +78,31 @@ static const char *const n900_boards_compat[] __initconst = { NULL, }; +/* Set system_rev from atags */ +static void __init rx51_set_system_rev(const struct tag *tags) +{ + const struct tag *tag; + + if (tags->hdr.tag != ATAG_CORE) + return; + + for_each_tag(tag, tags) { + if (tag->hdr.tag == ATAG_REVISION) { + system_rev = tag->u.revision.rev; + break; + } + } +} + /* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags, * save them while the data is still not overwritten */ static void __init rx51_reserve(void) { - save_atags((const struct tag *)(PAGE_OFFSET + 0x100)); + const struct tag *tags = (const struct tag *)(PAGE_OFFSET + 0x100); + + save_atags(tags); + rx51_set_system_rev(tags); omap_reserve(); } diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 9cda974a3009..d7f1d69daf6d 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -18,7 +18,6 @@ #include <linux/slab.h> #include <linux/of.h> #include <linux/pinctrl/machine.h> -#include <linux/platform_data/mailbox-omap.h> #include <asm/mach-types.h> #include <asm/mach/map.h> @@ -66,32 +65,6 @@ static int __init omap3_l3_init(void) } omap_postcore_initcall(omap3_l3_init); -#if defined(CONFIG_OMAP2PLUS_MBOX) || defined(CONFIG_OMAP2PLUS_MBOX_MODULE) -static inline void __init omap_init_mbox(void) -{ - struct omap_hwmod *oh; - struct platform_device *pdev; - struct omap_mbox_pdata *pdata; - - oh = omap_hwmod_lookup("mailbox"); - if (!oh) { - pr_err("%s: unable to find hwmod\n", __func__); - return; - } - if (!oh->dev_attr) { - pr_err("%s: hwmod doesn't have valid attrs\n", __func__); - return; - } - - pdata = (struct omap_mbox_pdata *)oh->dev_attr; - pdev = omap_device_build("omap-mailbox", -1, oh, pdata, sizeof(*pdata)); - WARN(IS_ERR(pdev), "%s: could not build device, err %ld\n", - __func__, PTR_ERR(pdev)); -} -#else -static inline void omap_init_mbox(void) { } -#endif /* CONFIG_OMAP2PLUS_MBOX */ - static inline void omap_init_sti(void) {} #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE) @@ -229,7 +202,6 @@ static int __init omap2_init_devices(void) * please keep these calls, and their implementations above, * in alphabetical order so they're easier to sort through. */ - omap_init_mbox(); omap_init_mcspi(); omap_init_sham(); omap_init_aes(); diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 7b76ce01c21d..8633c703546a 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -101,10 +101,8 @@ static void omap2_onenand_set_async_mode(void __iomem *onenand_base) static void set_onenand_cfg(void __iomem *onenand_base) { - u32 reg; + u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT; - reg = readw(onenand_base + ONENAND_REG_SYS_CFG1); - reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9)); reg |= (latency << ONENAND_SYS_CFG1_BRL_SHIFT) | ONENAND_SYS_CFG1_BL_16; if (onenand_flags & ONENAND_FLAG_SYNCREAD) @@ -123,6 +121,7 @@ static void set_onenand_cfg(void __iomem *onenand_base) reg |= ONENAND_SYS_CFG1_VHF; else reg &= ~ONENAND_SYS_CFG1_VHF; + writew(reg, onenand_base + ONENAND_REG_SYS_CFG1); } @@ -289,6 +288,7 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base) } } + onenand_async.sync_write = true; omap2_onenand_calc_async_timings(&t); ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async); diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 0437537751bc..f7ff3b9dad87 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -191,12 +191,22 @@ static int _omap_device_notifier_call(struct notifier_block *nb, { struct platform_device *pdev = to_platform_device(dev); struct omap_device *od; + int err; switch (event) { case BUS_NOTIFY_DEL_DEVICE: if (pdev->archdata.od) omap_device_delete(pdev->archdata.od); break; + case BUS_NOTIFY_UNBOUND_DRIVER: + od = to_omap_device(pdev); + if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED)) { + dev_info(dev, "enabled after unload, idling\n"); + err = omap_device_idle(pdev); + if (err) + dev_err(dev, "failed to idle\n"); + } + break; case BUS_NOTIFY_ADD_DEVICE: if (pdev->dev.of_node) omap_device_build_from_dt(pdev); @@ -602,8 +612,10 @@ static int _od_runtime_resume(struct device *dev) int ret; ret = omap_device_enable(pdev); - if (ret) + if (ret) { + dev_err(dev, "use pm_runtime_put_sync_suspend() in driver?\n"); return ret; + } return pm_generic_runtime_resume(dev); } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index e9f65fec55c0..b6d62e4cdfdd 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2200,6 +2200,11 @@ static int _enable(struct omap_hwmod *oh) */ static int _idle(struct omap_hwmod *oh) { + if (oh->flags & HWMOD_NO_IDLE) { + oh->_int_flags |= _HWMOD_SKIP_ENABLE; + return 0; + } + pr_debug("omap_hwmod: %s: idling\n", oh->name); if (oh->_state != _HWMOD_STATE_ENABLED) { @@ -2504,6 +2509,8 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->flags |= HWMOD_INIT_NO_RESET; if (of_find_property(np, "ti,no-idle-on-init", NULL)) oh->flags |= HWMOD_INIT_NO_IDLE; + if (of_find_property(np, "ti,no-idle", NULL)) + oh->flags |= HWMOD_NO_IDLE; } oh->_state = _HWMOD_STATE_INITIALIZED; @@ -2630,7 +2637,7 @@ static void __init _setup_postsetup(struct omap_hwmod *oh) * XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data - * it should be set by the core code as a runtime flag during startup */ - if ((oh->flags & HWMOD_INIT_NO_IDLE) && + if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) && (postsetup_state == _HWMOD_STATE_IDLE)) { oh->_int_flags |= _HWMOD_SKIP_ENABLE; postsetup_state = _HWMOD_STATE_ENABLED; diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 76bce11c85a4..7c7a31169475 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -525,6 +525,8 @@ struct omap_hwmod_omap4_prcm { * or idled. * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to * operate and they need to be handled at the same time as the main_clk. + * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain + * IPs like CPSW on DRA7, where clocks to this module cannot be disabled. */ #define HWMOD_SWSUP_SIDLE (1 << 0) #define HWMOD_SWSUP_MSTANDBY (1 << 1) @@ -541,6 +543,7 @@ struct omap_hwmod_omap4_prcm { #define HWMOD_SWSUP_SIDLE_ACT (1 << 12) #define HWMOD_RECONFIG_IO_CHAIN (1 << 13) #define HWMOD_OPT_CLKS_NEEDED (1 << 14) +#define HWMOD_NO_IDLE (1 << 15) /* * omap_hwmod._int_flags definitions diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index e781e4fae13a..a935d28443da 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -23,6 +23,8 @@ #include <linux/platform_data/pinctrl-single.h> #include <linux/platform_data/iommu-omap.h> #include <linux/platform_data/wkup_m3.h> +#include <linux/platform_data/pwm_omap_dmtimer.h> +#include <plat/dmtimer.h> #include "common.h" #include "common-board-devices.h" @@ -449,6 +451,24 @@ void omap_auxdata_legacy_init(struct device *dev) dev->platform_data = &twl_gpio_auxdata; } +/* Dual mode timer PWM callbacks platdata */ +#if IS_ENABLED(CONFIG_OMAP_DM_TIMER) +struct pwm_omap_dmtimer_pdata pwm_dmtimer_pdata = { + .request_by_node = omap_dm_timer_request_by_node, + .free = omap_dm_timer_free, + .enable = omap_dm_timer_enable, + .disable = omap_dm_timer_disable, + .get_fclk = omap_dm_timer_get_fclk, + .start = omap_dm_timer_start, + .stop = omap_dm_timer_stop, + .set_load = omap_dm_timer_set_load, + .set_match = omap_dm_timer_set_match, + .set_pwm = omap_dm_timer_set_pwm, + .set_prescaler = omap_dm_timer_set_prescaler, + .write_counter = omap_dm_timer_write_counter, +}; +#endif + /* * Few boards still need auxdata populated before we populate * the dev entries in of_platform_populate(). @@ -502,6 +522,9 @@ static struct of_dev_auxdata omap_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("ti,am4372-wkup-m3", 0x44d00000, "44d00000.wkup_m3", &wkup_m3_data), #endif +#if IS_ENABLED(CONFIG_OMAP_DM_TIMER) + OF_DEV_AUXDATA("ti,omap-dmtimer-pwm", 0, NULL, &pwm_dmtimer_pdata), +#endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) OF_DEV_AUXDATA("ti,omap4-iommu", 0x4a066000, "4a066000.mmu", &omap4_iommu_pdata), diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index eafd120b53f1..1b9f0520dea9 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -86,13 +86,18 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) stmfd sp!, {lr} @ save registers on stack /* Setup so that we will disable and enable l2 */ mov r1, #0x1 - adrl r2, l2dis_3630 @ may be too distant for plain adr - str r1, [r2] + adrl r3, l2dis_3630_offset @ may be too distant for plain adr + ldr r2, [r3] @ value for offset + str r1, [r2, r3] @ write to l2dis_3630 ldmfd sp!, {pc} @ restore regs and return ENDPROC(enable_omap3630_toggle_l2_on_restore) - .text -/* Function to call rom code to save secure ram context */ +/* + * Function to call rom code to save secure ram context. This gets + * relocated to SRAM, so it can be all in .data section. Otherwise + * we need to initialize api_params separately. + */ + .data .align 3 ENTRY(save_secure_ram_context) stmfd sp!, {r4 - r11, lr} @ save registers on stack @@ -126,6 +131,8 @@ ENDPROC(save_secure_ram_context) ENTRY(save_secure_ram_context_sz) .word . - save_secure_ram_context + .text + /* * ====================== * == Idle entry point == @@ -289,12 +296,6 @@ wait_sdrc_ready: bic r5, r5, #0x40 str r5, [r4] -/* - * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a - * base instead. - * Be careful not to clobber r7 when maintaing this code. - */ - is_dll_in_lock_mode: /* Is dll in lock mode? */ ldr r4, sdrc_dlla_ctrl @@ -302,11 +303,7 @@ is_dll_in_lock_mode: tst r5, #0x4 bne exit_nonoff_modes @ Return if locked /* wait till dll locks */ - adr r7, kick_counter wait_dll_lock_timed: - ldr r4, wait_dll_lock_counter - add r4, r4, #1 - str r4, [r7, #wait_dll_lock_counter - kick_counter] ldr r4, sdrc_dlla_status /* Wait 20uS for lock */ mov r6, #8 @@ -330,9 +327,6 @@ kick_dll: orr r6, r6, #(1<<3) @ enable dll str r6, [r4] dsb - ldr r4, kick_counter - add r4, r4, #1 - str r4, [r7] @ kick_counter b wait_dll_lock_timed exit_nonoff_modes: @@ -360,15 +354,6 @@ sdrc_dlla_status: .word SDRC_DLLA_STATUS_V sdrc_dlla_ctrl: .word SDRC_DLLA_CTRL_V - /* - * When exporting to userspace while the counters are in SRAM, - * these 2 words need to be at the end to facilitate retrival! - */ -kick_counter: - .word 0 -wait_dll_lock_counter: - .word 0 - ENTRY(omap3_do_wfi_sz) .word . - omap3_do_wfi @@ -437,7 +422,9 @@ ENTRY(omap3_restore) cmp r2, #0x0 @ Check if target power state was OFF or RET bne logic_l1_restore - ldr r0, l2dis_3630 + adr r1, l2dis_3630_offset @ address for offset + ldr r0, [r1] @ value for offset + ldr r0, [r1, r0] @ value at l2dis_3630 cmp r0, #0x1 @ should we disable L2 on 3630? bne skipl2dis mrc p15, 0, r0, c1, c0, 1 @@ -449,12 +436,14 @@ skipl2dis: and r1, #0x700 cmp r1, #0x300 beq l2_inv_gp + adr r0, l2_inv_api_params_offset + ldr r3, [r0] + add r3, r3, r0 @ r3 points to dummy parameters mov r0, #40 @ set service ID for PPA mov r12, r0 @ copy secure Service ID in r12 mov r1, #0 @ set task id for ROM code in r1 mov r2, #4 @ set some flags in r2, r6 mov r6, #0xff - adr r3, l2_inv_api_params @ r3 points to dummy parameters dsb @ data write barrier dmb @ data memory barrier smc #1 @ call SMI monitor (smi #1) @@ -488,8 +477,8 @@ skipl2dis: b logic_l1_restore .align -l2_inv_api_params: - .word 0x1, 0x00 +l2_inv_api_params_offset: + .long l2_inv_api_params - . l2_inv_gp: /* Execute smi to invalidate L2 cache */ mov r12, #0x1 @ set up to invalidate L2 @@ -506,7 +495,9 @@ l2_inv_gp: mov r12, #0x2 smc #0 @ Call SMI monitor (smieq) logic_l1_restore: - ldr r1, l2dis_3630 + adr r0, l2dis_3630_offset @ adress for offset + ldr r1, [r0] @ value for offset + ldr r1, [r0, r1] @ value at l2dis_3630 cmp r1, #0x1 @ Test if L2 re-enable needed on 3630 bne skipl2reen mrc p15, 0, r1, c1, c0, 1 @@ -535,9 +526,17 @@ control_stat: .word CONTROL_STAT control_mem_rta: .word CONTROL_MEM_RTA_CTRL +l2dis_3630_offset: + .long l2dis_3630 - . + + .data l2dis_3630: .word 0 + .data +l2_inv_api_params: + .word 0x1, 0x00 + /* * Internal functions */ diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index 9b09d85d811a..c7a3b4aab4b5 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -29,12 +29,6 @@ dsb .endm -ppa_zero_params: - .word 0x0 - -ppa_por_params: - .word 1, 0 - #ifdef CONFIG_ARCH_OMAP4 /* @@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume) beq skip_ns_smp_enable ppa_actrl_retry: mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX - adr r3, ppa_zero_params @ Pointer to parameters + adr r1, ppa_zero_params_offset + ldr r3, [r1] + add r3, r3, r1 @ Pointer to ppa_zero_params mov r1, #0x0 @ Process ID mov r2, #0x4 @ Flag mov r6, #0xff @@ -303,7 +299,9 @@ skip_ns_smp_enable: ldr r0, =OMAP4_PPA_L2_POR_INDEX ldr r1, =OMAP44XX_SAR_RAM_BASE ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET] - adr r3, ppa_por_params + adr r1, ppa_por_params_offset + ldr r3, [r1] + add r3, r3, r1 @ Pointer to ppa_por_params str r4, [r3, #0x04] mov r1, #0x0 @ Process ID mov r2, #0x4 @ Flag @@ -328,6 +326,8 @@ skip_l2en: #endif b cpu_resume @ Jump to generic resume +ppa_por_params_offset: + .long ppa_por_params - . ENDPROC(omap4_cpu_resume) #endif /* CONFIG_ARCH_OMAP4 */ @@ -380,4 +380,13 @@ ENTRY(omap_do_wfi) nop ldmfd sp!, {pc} +ppa_zero_params_offset: + .long ppa_zero_params - . ENDPROC(omap_do_wfi) + + .data +ppa_zero_params: + .word 0 + +ppa_por_params: + .word 1, 0 diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig index def40a0dd60c..70ab4a25a5f8 100644 --- a/arch/arm/mach-realview/Kconfig +++ b/arch/arm/mach-realview/Kconfig @@ -1,5 +1,6 @@ menuconfig ARCH_REALVIEW - bool "ARM Ltd. RealView family" if ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7 + bool "ARM Ltd. RealView family" + depends on ARCH_MULTI_V5 || ARCH_MULTI_V6 || ARCH_MULTI_V7 select ARM_AMBA select ARM_TIMER_SP804 select COMMON_CLK_VERSATILE diff --git a/arch/arm/mach-realview/platsmp-dt.c b/arch/arm/mach-realview/platsmp-dt.c index 65585392655b..6964e8876061 100644 --- a/arch/arm/mach-realview/platsmp-dt.c +++ b/arch/arm/mach-realview/platsmp-dt.c @@ -80,7 +80,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus) virt_to_phys(versatile_secondary_startup)); } -struct smp_operations realview_dt_smp_ops __initdata = { +static const struct smp_operations realview_dt_smp_ops __initconst = { .smp_prepare_cpus = realview_smp_prepare_cpus, .smp_secondary_init = versatile_secondary_init, .smp_boot_secondary = versatile_boot_secondary, diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index 9cb11215ceba..b3a4ed5289ec 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -4,7 +4,6 @@ extern void shmobile_init_delay(void); extern void shmobile_boot_vector(void); extern unsigned long shmobile_boot_fn; -extern unsigned long shmobile_boot_arg; extern unsigned long shmobile_boot_size; extern void shmobile_smp_boot(void); extern void shmobile_smp_sleep(void); diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S index fa5248c52399..5e503d91ad70 100644 --- a/arch/arm/mach-shmobile/headsmp-scu.S +++ b/arch/arm/mach-shmobile/headsmp-scu.S @@ -38,9 +38,3 @@ ENTRY(shmobile_boot_scu) b secondary_startup ENDPROC(shmobile_boot_scu) - - .text - .align 2 - .globl shmobile_scu_base -shmobile_scu_base: - .space 4 diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index 330c1fc63197..32e0bf6e3ccb 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -24,7 +24,6 @@ .arm .align 12 ENTRY(shmobile_boot_vector) - ldr r0, 2f ldr r1, 1f bx r1 @@ -34,9 +33,6 @@ ENDPROC(shmobile_boot_vector) .globl shmobile_boot_fn shmobile_boot_fn: 1: .space 4 - .globl shmobile_boot_arg -shmobile_boot_arg: -2: .space 4 .globl shmobile_boot_size shmobile_boot_size: .long . - shmobile_boot_vector @@ -46,13 +42,15 @@ shmobile_boot_size: */ ENTRY(shmobile_smp_boot) - @ r0 = MPIDR_HWID_BITMASK mrc p15, 0, r1, c0, c0, 5 @ r1 = MPIDR - and r0, r1, r0 @ r0 = cpu_logical_map() value + and r0, r1, #0xffffff @ MPIDR_HWID_BITMASK + @ r0 = cpu_logical_map() value mov r1, #0 @ r1 = CPU index - adr r5, 1f @ array of per-cpu mpidr values - adr r6, 2f @ array of per-cpu functions - adr r7, 3f @ array of per-cpu arguments + adr r2, 1f + ldmia r2, {r5, r6, r7} + add r5, r5, r2 @ array of per-cpu mpidr values + add r6, r6, r2 @ array of per-cpu functions + add r7, r7, r2 @ array of per-cpu arguments shmobile_smp_boot_find_mpidr: ldr r8, [r5, r1, lsl #2] @@ -80,12 +78,18 @@ ENTRY(shmobile_smp_sleep) b shmobile_smp_boot ENDPROC(shmobile_smp_sleep) + .align 2 +1: .long shmobile_smp_mpidr - . + .long shmobile_smp_fn - 1b + .long shmobile_smp_arg - 1b + + .bss .globl shmobile_smp_mpidr shmobile_smp_mpidr: -1: .space NR_CPUS * 4 + .space NR_CPUS * 4 .globl shmobile_smp_fn shmobile_smp_fn: -2: .space NR_CPUS * 4 + .space NR_CPUS * 4 .globl shmobile_smp_arg shmobile_smp_arg: -3: .space NR_CPUS * 4 + .space NR_CPUS * 4 diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index 911884f7e28b..aba75c89f9c1 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -123,7 +123,6 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, { /* install boot code shared by all CPUs */ shmobile_boot_fn = virt_to_phys(shmobile_smp_boot); - shmobile_boot_arg = MPIDR_HWID_BITMASK; /* perform per-cpu setup */ apmu_parse_cfg(apmu_init_cpu, apmu_config, num); diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c index 64663110ab6c..081a097c9219 100644 --- a/arch/arm/mach-shmobile/platsmp-scu.c +++ b/arch/arm/mach-shmobile/platsmp-scu.c @@ -17,6 +17,9 @@ #include <asm/smp_scu.h> #include "common.h" + +void __iomem *shmobile_scu_base; + static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -41,7 +44,6 @@ void __init shmobile_smp_scu_prepare_cpus(unsigned int max_cpus) { /* install boot code shared by all CPUs */ shmobile_boot_fn = virt_to_phys(shmobile_smp_boot); - shmobile_boot_arg = MPIDR_HWID_BITMASK; /* enable SCU and cache coherency on booting CPU */ scu_enable(shmobile_scu_base); diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c index b854fe2095ad..0b024a9dbd43 100644 --- a/arch/arm/mach-shmobile/smp-r8a7779.c +++ b/arch/arm/mach-shmobile/smp-r8a7779.c @@ -92,8 +92,6 @@ static void __init r8a7779_smp_prepare_cpus(unsigned int max_cpus) { /* Map the reset vector (in headsmp-scu.S, headsmp.S) */ __raw_writel(__pa(shmobile_boot_vector), AVECR); - shmobile_boot_fn = virt_to_phys(shmobile_boot_scu); - shmobile_boot_arg = (unsigned long)shmobile_scu_base; /* setup r8a7779 specific SCU bits */ shmobile_scu_base = IOMEM(R8A7779_SCU_BASE); diff --git a/arch/arm/mach-tango/Kconfig b/arch/arm/mach-tango/Kconfig index d6a3714b096e..ebe15b93bbe8 100644 --- a/arch/arm/mach-tango/Kconfig +++ b/arch/arm/mach-tango/Kconfig @@ -1,5 +1,6 @@ config ARCH_TANGO - bool "Sigma Designs Tango4 (SMP87xx)" if ARCH_MULTI_V7 + bool "Sigma Designs Tango4 (SMP87xx)" + depends on ARCH_MULTI_V7 # Cortex-A9 MPCore r3p0, PL310 r3p2 select ARCH_HAS_HOLES_MEMORYMODEL select ARM_ERRATA_754322 diff --git a/arch/arm/mach-tango/platsmp.c b/arch/arm/mach-tango/platsmp.c index a18d5a34e2f5..a21f55e000d2 100644 --- a/arch/arm/mach-tango/platsmp.c +++ b/arch/arm/mach-tango/platsmp.c @@ -9,7 +9,7 @@ static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle) return 0; } -static struct smp_operations tango_smp_ops __initdata = { +static const struct smp_operations tango_smp_ops __initconst = { .smp_boot_secondary = tango_boot_secondary, }; diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 4b4058db0781..66353caa35b9 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index cf30daff8932..d19b1ad29b07 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -49,6 +49,9 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } + if (!numpages) + return 0; + if (start < MODULES_VADDR || start >= MODULES_END) return -EINVAL; diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c index 04aff2c31b46..70f2f699bed3 100644 --- a/arch/arm/plat-samsung/pm-check.c +++ b/arch/arm/plat-samsung/pm-check.c @@ -53,8 +53,8 @@ static void s3c_pm_run_res(struct resource *ptr, run_fn_t fn, u32 *arg) if (ptr->child != NULL) s3c_pm_run_res(ptr->child, fn, arg); - if ((ptr->flags & IORESOURCE_MEM) && - strcmp(ptr->name, "System RAM") == 0) { + if ((ptr->flags & IORESOURCE_SYSTEM_RAM) + == IORESOURCE_SYSTEM_RAM) { S3C_PMDBG("Found system RAM at %08lx..%08lx\n", (unsigned long)ptr->start, (unsigned long)ptr->end); diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S index b2b97e3e7bab..a62a7b64f49c 100644 --- a/arch/arm/vdso/vdso.S +++ b/arch/arm/vdso/vdso.S @@ -23,9 +23,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .data..ro_after_init .balign PAGE_SIZE vdso_start: .incbin "arch/arm/vdso/vdso.so" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 307237cfe728..b5e3f6d42b88 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -88,7 +88,7 @@ Image: vmlinux Image.%: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -zinstall install: vmlinux +zinstall install: $(Q)$(MAKE) $(build)=$(boot) $@ %.dtb: scripts diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index abcbba2f01ba..305c552b5ec1 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -34,10 +34,10 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) -install: $(obj)/Image +install: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" -zinstall: $(obj)/Image.gz +zinstall: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image.gz System.map "$(INSTALL_PATH)" diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index dd5158eb5872..e5b59ca9debb 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -115,6 +115,7 @@ <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi index da7b6e613257..933cba359918 100644 --- a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi @@ -23,9 +23,8 @@ soc0: soc@000000000 { }; }; - dsa: dsa@c7000000 { + dsaf0: dsa@c7000000 { compatible = "hisilicon,hns-dsaf-v1"; - dsa_name = "dsaf0"; mode = "6port-16rss"; interrupt-parent = <&mbigen_dsa>; @@ -127,7 +126,7 @@ soc0: soc@000000000 { eth0: ethernet@0{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <0>; local-mac-address = [00 00 00 01 00 58]; status = "disabled"; @@ -135,14 +134,14 @@ soc0: soc@000000000 { }; eth1: ethernet@1{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <1>; status = "disabled"; dma-coherent; }; eth2: ethernet@2{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <2>; local-mac-address = [00 00 00 01 00 5a]; status = "disabled"; @@ -150,7 +149,7 @@ soc0: soc@000000000 { }; eth3: ethernet@3{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <3>; local-mac-address = [00 00 00 01 00 5b]; status = "disabled"; @@ -158,7 +157,7 @@ soc0: soc@000000000 { }; eth4: ethernet@4{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <4>; local-mac-address = [00 00 00 01 00 5c]; status = "disabled"; @@ -166,7 +165,7 @@ soc0: soc@000000000 { }; eth5: ethernet@5{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <5>; local-mac-address = [00 00 00 01 00 5d]; status = "disabled"; @@ -174,7 +173,7 @@ soc0: soc@000000000 { }; eth6: ethernet@6{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <6>; local-mac-address = [00 00 00 01 00 5e]; status = "disabled"; @@ -182,7 +181,7 @@ soc0: soc@000000000 { }; eth7: ethernet@7{ compatible = "hisilicon,hns-nic-v1"; - ae-name = "dsaf0"; + ae-handle = <&dsaf0>; port-id = <7>; local-mac-address = [00 00 00 01 00 5f]; status = "disabled"; diff --git a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts index 7dfe1c085966..62f33fc84e3e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts +++ b/arch/arm64/boot/dts/nvidia/tegra132-norrin.dts @@ -12,6 +12,8 @@ rtc1 = "/rtc@0,7000e000"; }; + chosen { }; + memory { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x80000000>; diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh index 12ed78aa6f0c..d91e1f022573 100644 --- a/arch/arm64/boot/install.sh +++ b/arch/arm64/boot/install.sh @@ -20,6 +20,20 @@ # $4 - default install path (blank if root directory) # +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + # User may have a custom install script if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 05d9e16c0dfd..7a3d22a46faf 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -294,7 +294,7 @@ static struct crypto_alg aes_algs[] = { { .cra_blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = aes_setkey, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, @@ -371,7 +371,7 @@ static struct crypto_alg aes_algs[] = { { .cra_ablkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 2731d3b25ed2..8ec88e5b290f 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -103,6 +103,7 @@ static inline u64 gic_read_iar_common(void) u64 irqstat; asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + dsb(sy); return irqstat; } diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7fc294c3bc5b..22dda613f9c9 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #endif diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 007a69fc4f40..5f3ab8c1db55 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .align 3\n" " .quad 1b, 4b, 2b, 4b\n" " .popsection\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 738a95f93e49..d201d4b396d1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -107,8 +107,6 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B) - /* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1 << 31) #define VTCR_EL2_PS_MASK (7 << 16) @@ -182,6 +180,7 @@ #define CPTR_EL2_TCPAC (1 << 31) #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) +#define CPTR_EL2_DEFAULT 0x000033ff /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_TDRA (1 << 11) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3066328cd86b..779a5872a2c5 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -127,10 +127,14 @@ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { - u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + u32 mode; - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { + mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; return mode > COMPAT_PSR_MODE_USR; + } + + mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; return mode != PSR_MODE_EL0t; } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 9b2f5a9d019d..ae615b9d9a55 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -39,6 +39,7 @@ #ifndef __ASSEMBLY__ +#include <linux/personality.h> /* for READ_IMPLIES_EXEC */ #include <asm/pgtable-types.h> extern void __cpu_clear_user_page(void *p, unsigned long user); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bf464de33f52..819aff5d593f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,7 +34,7 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. * - * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). * VMALLOC_START: beginning of the kernel VA space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, @@ -51,7 +51,9 @@ #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) -#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) +#define VMEMMAP_START (VMALLOC_END + SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - \ + SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 8aee3aeec3e6..c536c9e307b9 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -226,11 +226,28 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) return retval; } +static void send_user_sigtrap(int si_code) +{ + struct pt_regs *regs = current_pt_regs(); + siginfo_t info = { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = si_code, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + if (WARN_ON(!user_mode(regs))) + return; + + if (interrupts_enabled(regs)) + local_irq_enable(); + + force_sig_info(SIGTRAP, &info, current); +} + static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - siginfo_t info; - /* * If we are stepping a pending breakpoint, call the hw_breakpoint * handler first. @@ -239,11 +256,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; if (user_mode(regs)) { - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)instruction_pointer(regs); - force_sig_info(SIGTRAP, &info, current); + send_user_sigtrap(TRAP_HWBKPT); /* * ptrace will disable single step unless explicitly @@ -307,17 +320,8 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - siginfo_t info; - if (user_mode(regs)) { - info = (siginfo_t) { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); + send_user_sigtrap(TRAP_BRKPT); } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { pr_warning("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; @@ -328,7 +332,6 @@ static int brk_handler(unsigned long addr, unsigned int esr, int aarch32_break_handler(struct pt_regs *regs) { - siginfo_t info; u32 arm_instr; u16 thumb_instr; bool bp = false; @@ -359,14 +362,7 @@ int aarch32_break_handler(struct pt_regs *regs) if (!bp) return -EFAULT; - info = (siginfo_t) { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = pc, - }; - - force_sig_info(SIGTRAP, &info, current); + send_user_sigtrap(TRAP_BRKPT); return 0; } diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 999633bd7294..352f7abd91c9 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -89,6 +89,7 @@ __efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy); __efistub_memmove = KALLSYMS_HIDE(__pi_memmove); __efistub_memset = KALLSYMS_HIDE(__pi_memset); __efistub_strlen = KALLSYMS_HIDE(__pi_strlen); +__efistub_strnlen = KALLSYMS_HIDE(__pi_strnlen); __efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp); __efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp); __efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8119479147db..450987d99b9b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -73,13 +73,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -210,7 +210,7 @@ static void __init request_standard_resources(void) res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index e33fe33876ab..fd10eb663868 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -145,6 +145,10 @@ ENTRY(cpu_resume_mmu) ENDPROC(cpu_resume_mmu) .popsection cpu_resume_after_mmu: +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif mov x0, #0 // return zero on success ldp x19, x20, [sp, #16] ldp x21, x22, [sp, #32] diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b1adc51b2c2e..460765799c64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void) /* * OK, it's off to the idle thread for us */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 4fad9787ab46..d9751a4769e7 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -44,14 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long irq_stack_ptr; /* - * Use raw_smp_processor_id() to avoid false-positives from - * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping - * task stacks, we can be pre-empted in this case, so - * {raw_,}smp_processor_id() may give us the wrong value. Sleeping - * tasks can't ever be on an interrupt stack, so regardless of cpu, - * the checks will always fail. + * Switching between stacks is valid when tracing current and in + * non-preemptible context. */ - irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); + if (tsk == current && !preemptible()) + irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + else + irq_stack_ptr = 0; low = frame->sp; /* irq stacks are not THREAD_SIZE aligned */ @@ -64,8 +63,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) return -EINVAL; frame->sp = fp + 0x10; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk && tsk->ret_stack && diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index cbedd724f48e..c5392081b49b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -146,9 +146,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + unsigned long irq_stack_ptr; int skip; + /* + * Switching between stacks is valid when tracing current and in + * non-preemptible context. + */ + if (tsk == current && !preemptible()) + irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + else + irq_stack_ptr = 0; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index fcb778899a38..9e54ad7c240a 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -194,7 +194,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } /** diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3e568dcd907b..d073b5a216f7 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -64,7 +64,7 @@ __do_hyp_init: mrs x4, tcr_el1 ldr x5, =TCR_EL2_MASK and x4, x4, x5 - ldr x5, =TCR_EL2_FLAGS + mov x5, #TCR_EL2_RES1 orr x4, x4, x5 #ifndef CONFIG_ARM64_VA_BITS_48 @@ -85,15 +85,17 @@ __do_hyp_init: ldr_l x5, idmap_t0sz bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH #endif - msr tcr_el2, x4 - - ldr x4, =VTCR_EL2_FLAGS /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in - * VTCR_EL2. + * TCR_EL2 and VTCR_EL2. */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 + + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + bfi x4, x5, #16, #3 /* * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in * VTCR_EL2. diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ca8f5a5e2f96..f0e7bdfae134 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -36,7 +36,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, hcr_el2); /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); - write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TFP; + write_sysreg(val, cptr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); } @@ -45,7 +49,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_RW, hcr_el2); write_sysreg(0, hstr_el2); write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); - write_sysreg(0, cptr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); } static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 9142e082f5f3..5dd2a26444ec 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -149,16 +149,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) switch (nr_pri_bits) { case 7: - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); - case 6: - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); - default: - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); - } - - switch (nr_pri_bits) { - case 7: write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); case 6: @@ -167,6 +157,16 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); } + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + switch (max_lr_idx) { case 15: write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 648112e90ed5..4d1ac81870d2 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -27,7 +27,11 @@ #define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ PSR_I_BIT | PSR_D_BIT) -#define EL1_EXCEPT_SYNC_OFFSET 0x200 + +#define CURRENT_EL_SP_EL0_VECTOR 0x0 +#define CURRENT_EL_SP_ELx_VECTOR 0x200 +#define LOWER_EL_AArch64_VECTOR 0x400 +#define LOWER_EL_AArch32_VECTOR 0x600 static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { @@ -97,6 +101,34 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, *fsr = 0x14; } +enum exception_type { + except_type_sync = 0, + except_type_irq = 0x80, + except_type_fiq = 0x100, + except_type_serror = 0x180, +}; + +static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) +{ + u64 exc_offset; + + switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) { + case PSR_MODE_EL1t: + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + break; + case PSR_MODE_EL1h: + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + break; + case PSR_MODE_EL0t: + exc_offset = LOWER_EL_AArch64_VECTOR; + break; + default: + exc_offset = LOWER_EL_AArch32_VECTOR; + } + + return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -108,8 +140,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; vcpu_sys_reg(vcpu, FAR_EL1) = addr; @@ -143,8 +175,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; /* * Build an unknown exception, depending on the instruction diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index eec3598b4184..2e90371cfb37 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1007,10 +1007,9 @@ static int emulate_cp(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + /* Handled */ + return 0; } - - /* Handled */ - return 0; } /* Not handled */ @@ -1043,7 +1042,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, } /** - * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ @@ -1095,7 +1094,7 @@ out: } /** - * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index 2ca665711bf2..eae38da6e0bb 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -168,4 +168,4 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -ENDPROC(strnlen) +ENDPIPROC(strnlen) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 331c4ca6205c..a6e757cbab77 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -933,6 +933,10 @@ static int __init __iommu_dma_init(void) ret = register_iommu_dma_ops_notifier(&platform_bus_type); if (!ret) ret = register_iommu_dma_ops_notifier(&amba_bustype); + + /* handle devices queued before this arch_initcall */ + if (!ret) + __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL); return ret; } arch_initcall(__iommu_dma_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 92ddac1e8ca2..abe2a9542b3a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -371,6 +371,13 @@ static int __kprobes do_translation_fault(unsigned long addr, return 0; } +static int do_alignment_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + do_bad_area(addr, esr, regs); + return 0; +} + /* * This abort handler always returns "fault". */ @@ -418,7 +425,7 @@ static struct fault_info { { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, { do_bad, SIGBUS, 0, "unknown 32" }, - { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, { do_bad, SIGBUS, 0, "unknown 34" }, { do_bad, SIGBUS, 0, "unknown 35" }, { do_bad, SIGBUS, 0, "unknown 36" }, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 82d607c3614e..da30529bb1f6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt) hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else if (ps == (PAGE_SIZE * CONT_PTES)) { - hugetlb_add_hstate(CONT_PTE_SHIFT); - } else if (ps == (PMD_SIZE * CONT_PMDS)) { - hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; @@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); - -#ifdef CONFIG_ARM64_64K_PAGES -static __init int add_default_hugepagesz(void) -{ - if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) - hugetlb_add_hstate(CONT_PMD_SHIFT); - return 0; -} -arch_initcall(add_default_hugepagesz); -#endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f3b061e67bfe..7802f216a67a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -319,8 +319,8 @@ void __init mem_init(void) #endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP - MLG((unsigned long)vmemmap, - (unsigned long)vmemmap + VMEMMAP_SIZE), + MLG(VMEMMAP_START, + VMEMMAP_START + VMEMMAP_SIZE), MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 4c893b5189dd..232f787a088a 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void) #ifdef CONFIG_COMPAT if (test_thread_flag(TIF_32BIT)) - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); else #endif - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index cf6240741134..0795c3a36d8f 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/vmalloc.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages, unsigned long end = start + size; int ret; struct page_change_data data; + struct vm_struct *area; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; @@ -51,10 +53,23 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } - if (start < MODULES_VADDR || start >= MODULES_END) - return -EINVAL; - - if (end < MODULES_VADDR || end >= MODULES_END) + /* + * Kernel VA mappings are always live, and splitting live section + * mappings into page mappings may cause TLB conflicts. This means + * we have to ensure that changing the permission bits of the range + * we are operating on does not result in such splitting. + * + * Let's restrict ourselves to mappings created by vmalloc (or vmap). + * Those are guaranteed to consist entirely of page mappings, and + * splitting is never needed. + * + * So check whether the [addr, addr + size) interval is entirely + * covered by precisely one VM area that has the VM_ALLOC flag set. + */ + area = find_vm_area((void *)addr); + if (!area || + end > (unsigned long)area->addr + area->size || + !(area->flags & VM_ALLOC)) return -EINVAL; if (!numpages) diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index 209ae5ad3495..e6928896da2a 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -49,13 +49,13 @@ static struct resource __initdata kernel_data = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM, + .flags = IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata kernel_code = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM, + .flags = IORESOURCE_SYSTEM_RAM, .sibling = &kernel_data, }; @@ -134,7 +134,7 @@ add_physical_memory(resource_size_t start, resource_size_t end) new->start = start; new->end = end; new->name = "System RAM"; - new->flags = IORESOURCE_MEM; + new->flags = IORESOURCE_SYSTEM_RAM; *pprev = new; } diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 0030e21cfceb..23c4ef5f8bdc 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -333,7 +333,7 @@ void secondary_start_kernel(void) /* We are done with local CPU inits, unblock the boot CPU. */ set_cpu_online(cpu, true); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void __init smp_prepare_boot_cpu(void) diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c index 72e17f7ebd6f..786e36e2f61d 100644 --- a/arch/c6x/kernel/setup.c +++ b/arch/c6x/kernel/setup.c @@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr) */ set_ist(_vectors_start); - lockdep_init(); - /* * dtb is passed in from bootloader. * fdt is linked in blob. diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index ff759f26b96a..983bae7d2665 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -180,7 +180,7 @@ void start_secondary(void) local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index caae3f4e4341..300dac3702f1 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1178,7 +1178,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, efi_memory_desc_t *md; u64 efi_desc_size; char *name; - unsigned long flags; + unsigned long flags, desc; efi_map_start = __va(ia64_boot_param->efi_memmap); efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; @@ -1193,6 +1193,8 @@ efi_initialize_iomem_resources(struct resource *code_resource, continue; flags = IORESOURCE_MEM | IORESOURCE_BUSY; + desc = IORES_DESC_NONE; + switch (md->type) { case EFI_MEMORY_MAPPED_IO: @@ -1207,14 +1209,17 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->attribute & EFI_MEMORY_WP) { name = "System ROM"; flags |= IORESOURCE_READONLY; - } else if (md->attribute == EFI_MEMORY_UC) + } else if (md->attribute == EFI_MEMORY_UC) { name = "Uncached RAM"; - else + } else { name = "System RAM"; + flags |= IORESOURCE_SYSRAM; + } break; case EFI_ACPI_MEMORY_NVS: name = "ACPI Non-volatile Storage"; + desc = IORES_DESC_ACPI_NV_STORAGE; break; case EFI_UNUSABLE_MEMORY: @@ -1224,6 +1229,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, case EFI_PERSISTENT_MEMORY: name = "Persistent Memory"; + desc = IORES_DESC_PERSISTENT_MEMORY; break; case EFI_RESERVED_TYPE: @@ -1246,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, res->start = md->phys_addr; res->end = md->phys_addr + efi_md_size(md) - 1; res->flags = flags; + res->desc = desc; if (insert_resource(&iomem_resource, res) < 0) kfree(res); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4f118b0d3091..2029a38a72ae 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -80,17 +80,17 @@ unsigned long vga_console_membase; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; unsigned long ia64_max_cacheline_size; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 0e76fad27975..74fe317477e6 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -454,7 +454,7 @@ start_secondary (void *unused) preempt_disable(); smp_callin(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); return 0; } diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 836ac5a963c8..2841c0a3fd3b 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -276,6 +276,7 @@ source "kernel/Kconfig.preempt" config SMP bool "Symmetric multi-processing support" + depends on MMU ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index a5ecef7188ba..136c69f1fb8a 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -70,14 +70,14 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; unsigned long memory_start; diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index a468467542f4..f98d2f6519d6 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -432,7 +432,7 @@ int __init start_secondary(void *unused) */ local_flush_tlb_all(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); return 0; } diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index fc96e814188e..d1fc4796025e 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -108,6 +108,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -266,6 +268,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -366,6 +374,7 @@ CONFIG_ARIADNE=y # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NETRONOME is not set CONFIG_HYDRA=y CONFIG_APNE=y CONFIG_ZORRO8390=y diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 05c904f08d9d..9bfe8be3658c 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -106,6 +106,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -264,6 +266,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -344,6 +352,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index d572b731c510..ebdcfae55580 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -106,6 +106,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -264,6 +266,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -353,6 +361,7 @@ CONFIG_ATARILANCE=y # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NETRONOME is not set CONFIG_NE2000=y # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 11a30c65ad44..8acc65e54995 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -104,6 +104,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -262,6 +264,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -343,6 +351,7 @@ CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 6630a5154b9d..0c6a3d52b26e 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -106,6 +106,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -264,6 +266,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -345,6 +353,7 @@ CONFIG_HPLANCE=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 1d90b71d0903..12a8a6cb32f4 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -105,6 +105,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -266,6 +268,12 @@ CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m CONFIG_IPDDP_ENCAP=y CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -362,6 +370,7 @@ CONFIG_MAC89x0=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set CONFIG_MACSONIC=y +# CONFIG_NET_VENDOR_NETRONOME is not set CONFIG_MAC8390=y # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 1fd21c1ca87f..64ff2dcb34c8 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -115,6 +115,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -276,6 +278,12 @@ CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m CONFIG_IPDDP_ENCAP=y CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -404,6 +412,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set CONFIG_MACSONIC=y +# CONFIG_NET_VENDOR_NETRONOME is not set CONFIG_HYDRA=y CONFIG_MAC8390=y CONFIG_NE2000=y diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 74e10f79d7b1..07fc6abcfe0c 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -103,6 +103,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -261,6 +263,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -343,6 +351,7 @@ CONFIG_MVME147_NET=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 7034e716f166..69903ded88f7 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -104,6 +104,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -262,6 +264,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -343,6 +351,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index f7deb5f702a6..bd8401686dde 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -104,6 +104,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -262,6 +264,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -352,6 +360,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NETRONOME is not set CONFIG_NE2000=y # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 0ce79eb0d805..5f9fb3ab9636 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -101,6 +101,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -259,6 +261,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -340,6 +348,7 @@ CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 4cb787e4991f..5d1c674530e2 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -101,6 +101,8 @@ CONFIG_NFT_NAT=m CONFIG_NFT_QUEUE=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m +CONFIG_NFT_DUP_NETDEV=m +CONFIG_NFT_FWD_NETDEV=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m @@ -259,6 +261,12 @@ CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m CONFIG_6LOWPAN=m +CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m +CONFIG_6LOWPAN_GHC_UDP=m +CONFIG_6LOWPAN_GHC_ICMPV6=m +CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m +CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m +CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m CONFIG_BATMAN_ADV_DAT=y @@ -341,6 +349,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index f9d96bf86910..bafaff6dcd7b 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 376 +#define NR_syscalls 377 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 36cf129de663..0ca729665f29 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -381,5 +381,6 @@ #define __NR_userfaultfd 373 #define __NR_membarrier 374 #define __NR_mlock2 375 +#define __NR_copy_file_range 376 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 282cd903f4c4..8bb94261ff97 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -396,3 +396,4 @@ ENTRY(sys_call_table) .long sys_userfaultfd .long sys_membarrier .long sys_mlock2 /* 375 */ + .long sys_copy_file_range diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index c3c6f0864881..bad13232de51 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void) /* * OK, it's off to the idle thread for us */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 89a2a9394927..f31ebb5dc26c 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, memset(__bss_start, 0, __bss_stop-__bss_start); memset(_ssbss, 0, _esbss-_ssbss); - lockdep_init(); - /* initialize device tree for usage in early_printk */ early_init_devtree(_fdt_start); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 57a945e832f4..a65eacf59918 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -151,6 +151,7 @@ config BMIPS_GENERIC select CSRC_R4K select SYNC_R4K select COMMON_CLK + select BCM6345_L1_IRQ select BCM7038_L1_IRQ select BCM7120_L2_IRQ select BRCMSTB_L2_IRQ @@ -2085,7 +2086,7 @@ config PAGE_SIZE_32KB config PAGE_SIZE_64KB bool "64kB" - depends on !CPU_R3000 && !CPU_TX39XX + depends on !CPU_R3000 && !CPU_TX39XX && !CPU_R6000 help Using 64kB page size will result in higher performance kernel at the price of higher memory consumption. This option is available on @@ -2169,7 +2170,6 @@ config MIPS_MT_SMP select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select SYNC_R4K - select MIPS_GIC_IPI select MIPS_MT select SMP select SMP_UP @@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT config MIPS_CMP bool "MIPS CMP framework support (DEPRECATED)" depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6 - select MIPS_GIC_IPI select SMP select SYNC_R4K select SYS_SUPPORTS_SMP @@ -2287,7 +2286,6 @@ config MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU - select MIPS_GIC_IPI select SMP select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU @@ -2305,9 +2303,6 @@ config MIPS_CPS_PM select MIPS_CPC bool -config MIPS_GIC_IPI - bool - config MIPS_CM bool diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 511c06560dc1..2dfff1f19004 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -26,90 +26,6 @@ #include "common.h" #include "machtypes.h" -static void __init ath79_misc_intc_domain_init( - struct device_node *node, int irq); - -static void ath79_misc_irq_handler(struct irq_desc *desc) -{ - struct irq_domain *domain = irq_desc_get_handler_data(desc); - void __iomem *base = domain->host_data; - u32 pending; - - pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) & - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); - - if (!pending) { - spurious_interrupt(); - return; - } - - while (pending) { - int bit = __ffs(pending); - - generic_handle_irq(irq_linear_revmap(domain, bit)); - pending &= ~BIT(bit); - } -} - -static void ar71xx_misc_irq_unmask(struct irq_data *d) -{ - void __iomem *base = irq_data_get_irq_chip_data(d); - unsigned int irq = d->hwirq; - u32 t; - - t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); - __raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); - - /* flush write */ - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); -} - -static void ar71xx_misc_irq_mask(struct irq_data *d) -{ - void __iomem *base = irq_data_get_irq_chip_data(d); - unsigned int irq = d->hwirq; - u32 t; - - t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); - __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); - - /* flush write */ - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); -} - -static void ar724x_misc_irq_ack(struct irq_data *d) -{ - void __iomem *base = irq_data_get_irq_chip_data(d); - unsigned int irq = d->hwirq; - u32 t; - - t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); - __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS); - - /* flush write */ - __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); -} - -static struct irq_chip ath79_misc_irq_chip = { - .name = "MISC", - .irq_unmask = ar71xx_misc_irq_unmask, - .irq_mask = ar71xx_misc_irq_mask, -}; - -static void __init ath79_misc_irq_init(void) -{ - if (soc_is_ar71xx() || soc_is_ar913x()) - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - else if (soc_is_ar724x() || - soc_is_ar933x() || - soc_is_ar934x() || - soc_is_qca955x()) - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - else - BUG(); - - ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6)); -} static void ar934x_ip2_irq_dispatch(struct irq_desc *desc) { @@ -212,142 +128,12 @@ static void qca955x_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch); } -/* - * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for - * these devices typically allocate coherent DMA memory, however the - * DMA controller may still have some unsynchronized data in the FIFO. - * Issue a flush in the handlers to ensure that the driver sees - * the update. - * - * This array map the interrupt lines to the DDR write buffer channels. - */ - -static unsigned irq_wb_chan[8] = { - -1, -1, -1, -1, -1, -1, -1, -1, -}; - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned long pending; - int irq; - - pending = read_c0_status() & read_c0_cause() & ST0_IM; - - if (!pending) { - spurious_interrupt(); - return; - } - - pending >>= CAUSEB_IP; - while (pending) { - irq = fls(pending) - 1; - if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1) - ath79_ddr_wb_flush(irq_wb_chan[irq]); - do_IRQ(MIPS_CPU_IRQ_BASE + irq); - pending &= ~BIT(irq); - } -} - -static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) -{ - irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq); - irq_set_chip_data(irq, d->host_data); - return 0; -} - -static const struct irq_domain_ops misc_irq_domain_ops = { - .xlate = irq_domain_xlate_onecell, - .map = misc_map, -}; - -static void __init ath79_misc_intc_domain_init( - struct device_node *node, int irq) -{ - void __iomem *base = ath79_reset_base; - struct irq_domain *domain; - - domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT, - ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base); - if (!domain) - panic("Failed to add MISC irqdomain"); - - /* Disable and clear all interrupts */ - __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); - __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); - - irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain); -} - -static int __init ath79_misc_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - int irq; - - irq = irq_of_parse_and_map(node, 0); - if (!irq) - panic("Failed to get MISC IRQ"); - - ath79_misc_intc_domain_init(node, irq); - return 0; -} - -static int __init ar7100_misc_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - return ath79_misc_intc_of_init(node, parent); -} - -IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", - ar7100_misc_intc_of_init); - -static int __init ar7240_misc_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - return ath79_misc_intc_of_init(node, parent); -} - -IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", - ar7240_misc_intc_of_init); - -static int __init ar79_cpu_intc_of_init( - struct device_node *node, struct device_node *parent) -{ - int err, i, count; - - /* Fill the irq_wb_chan table */ - count = of_count_phandle_with_args( - node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells"); - - for (i = 0; i < count; i++) { - struct of_phandle_args args; - u32 irq = i; - - of_property_read_u32_index( - node, "qca,ddr-wb-channel-interrupts", i, &irq); - if (irq >= ARRAY_SIZE(irq_wb_chan)) - continue; - - err = of_parse_phandle_with_args( - node, "qca,ddr-wb-channels", - "#qca,ddr-wb-channel-cells", - i, &args); - if (err) - return err; - - irq_wb_chan[irq] = args.args[0]; - pr_info("IRQ: Set flush channel of IRQ%d to %d\n", - irq, args.args[0]); - } - - return mips_cpu_irq_of_init(node, parent); -} -IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", - ar79_cpu_intc_of_init); - void __init arch_init_irq(void) { + unsigned irq_wb_chan2 = -1; + unsigned irq_wb_chan3 = -1; + bool misc_is_ar71xx; + if (mips_machtype == ATH79_MACH_GENERIC_OF) { irqchip_init(); return; @@ -355,14 +141,26 @@ void __init arch_init_irq(void) if (soc_is_ar71xx() || soc_is_ar724x() || soc_is_ar913x() || soc_is_ar933x()) { - irq_wb_chan[2] = 3; - irq_wb_chan[3] = 2; + irq_wb_chan2 = 3; + irq_wb_chan3 = 2; } else if (soc_is_ar934x()) { - irq_wb_chan[3] = 2; + irq_wb_chan3 = 2; } - mips_cpu_irq_init(); - ath79_misc_irq_init(); + ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3); + + if (soc_is_ar71xx() || soc_is_ar913x()) + misc_is_ar71xx = true; + else if (soc_is_ar724x() || + soc_is_ar933x() || + soc_is_ar934x() || + soc_is_qca955x()) + misc_is_ar71xx = false; + else + BUG(); + ath79_misc_irq_init( + ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS, + ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx); if (soc_is_ar934x()) ar934x_ip2_irq_init(); diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c index e7fc6f9348ba..7efefcf44033 100644 --- a/arch/mips/bmips/irq.c +++ b/arch/mips/bmips/irq.c @@ -15,6 +15,12 @@ #include <asm/irq_cpu.h> #include <asm/time.h> +static const struct of_device_id smp_intc_dt_match[] = { + { .compatible = "brcm,bcm7038-l1-intc" }, + { .compatible = "brcm,bcm6345-l1-intc" }, + {} +}; + unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; @@ -24,8 +30,8 @@ void __init arch_init_irq(void) { struct device_node *dn; - /* Only the STB (bcm7038) controller supports SMP IRQ affinity */ - dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc"); + /* Only these controllers support SMP IRQ affinity */ + dn = of_find_matching_node(NULL, smp_intc_dt_match); if (dn) of_node_put(dn); else diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index 408799a839b4..f7521142deda 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -17,7 +17,7 @@ #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) #endif -#ifdef CONFIG_MACH_JZ4740 +#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780) #include <asm/mach-jz4740/base.h> #define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset)) #endif diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi index 459b9b252c3b..d61b1616b604 100644 --- a/arch/mips/boot/dts/brcm/bcm6328.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi @@ -74,6 +74,7 @@ timer: timer@10000040 { compatible = "syscon"; reg = <0x10000040 0x2c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi index 4fc7ecee273c..1a7efa883c5e 100644 --- a/arch/mips/boot/dts/brcm/bcm7125.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi @@ -98,6 +98,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7125-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index a3039bb53477..d4bf52cfcf17 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -118,6 +118,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7346-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi index 4274ff41ec21..8e2501694d03 100644 --- a/arch/mips/boot/dts/brcm/bcm7358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi @@ -112,6 +112,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7358-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index 0dcc9163c27b..7e5f76040fb8 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -112,6 +112,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7360-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index 2f3f9fc2c478..c739ea77acb0 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -118,6 +118,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7362-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi index bee221b3b568..5f55d0a50a28 100644 --- a/arch/mips/boot/dts/brcm/bcm7420.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi @@ -99,6 +99,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7420-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi index 571f30f52e3f..e24d41ab4e30 100644 --- a/arch/mips/boot/dts/brcm/bcm7425.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi @@ -100,6 +100,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi index 614ee211f71a..8b9432cc062b 100644 --- a/arch/mips/boot/dts/brcm/bcm7435.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -114,6 +114,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index cefb7a596878..e090fc388e02 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -227,7 +227,7 @@ struct mips_elf_abiflags_v0 { int __res = 1; \ struct elfhdr *__h = (hdr); \ \ - if (__h->e_machine != EM_MIPS) \ + if (!mips_elf_check_machine(__h)) \ __res = 0; \ if (__h->e_ident[EI_CLASS] != ELFCLASS32) \ __res = 0; \ @@ -258,7 +258,7 @@ struct mips_elf_abiflags_v0 { int __res = 1; \ struct elfhdr *__h = (hdr); \ \ - if (__h->e_machine != EM_MIPS) \ + if (!mips_elf_check_machine(__h)) \ __res = 0; \ if (__h->e_ident[EI_CLASS] != ELFCLASS64) \ __res = 0; \ @@ -285,6 +285,11 @@ struct mips_elf_abiflags_v0 { #endif /* !defined(ELF_ARCH) */ +#define mips_elf_check_machine(x) ((x)->e_machine == EM_MIPS) + +#define vmcore_elf32_check_arch mips_elf_check_machine +#define vmcore_elf64_check_arch mips_elf_check_machine + struct mips_abi; extern struct mips_abi mips_abi; diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 9cbf383b8834..f06f97bd62df 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -179,6 +179,10 @@ static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) if (save) _save_fp(tsk); __disable_fpu(); + } else { + /* FPU should not have been left enabled with no owner */ + WARN(read_c0_status() & ST0_CU1, + "Orphaned FPU left enabled"); } KSTK_STATUS(tsk) &= ~ST0_CU1; clear_tsk_thread_flag(tsk, TIF_USEDFPU); diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h index 2b3487213d1e..441faa92c3cd 100644 --- a/arch/mips/include/asm/mach-ath79/ath79.h +++ b/arch/mips/include/asm/mach-ath79/ath79.h @@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg) void ath79_device_reset_set(u32 mask); void ath79_device_reset_clear(u32 mask); +void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3); +void ath79_misc_irq_init(void __iomem *regs, int irq, + int irq_base, bool is_ar71xx); + #endif /* __ASM_MACH_ATH79_H */ diff --git a/arch/mips/include/asm/octeon/octeon-feature.h b/arch/mips/include/asm/octeon/octeon-feature.h index 8ebd3f579b84..3ed10a8d7865 100644 --- a/arch/mips/include/asm/octeon/octeon-feature.h +++ b/arch/mips/include/asm/octeon/octeon-feature.h @@ -128,7 +128,8 @@ static inline int octeon_has_feature(enum octeon_feature feature) case OCTEON_FEATURE_PCIE: return OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN52XX) - || OCTEON_IS_MODEL(OCTEON_CN6XXX); + || OCTEON_IS_MODEL(OCTEON_CN6XXX) + || OCTEON_IS_MODEL(OCTEON_CN7XXX); case OCTEON_FEATURE_SRIO: return OCTEON_IS_MODEL(OCTEON_CN63XX) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 3f832c3dd8f5..041153f5cf93 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -45,7 +45,7 @@ extern unsigned int vced_count, vcei_count; * User space process size: 2GB. This is hardcoded into a few places, * so don't change it unless you know what you are doing. */ -#define TASK_SIZE 0x7fff8000UL +#define TASK_SIZE 0x80000000UL #endif #define STACK_TOP_MAX TASK_SIZE diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 6ba1fb8b11e2..db7c322f057f 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -44,8 +44,9 @@ static inline void plat_smp_setup(void) mp_ops->smp_setup(); } -extern void gic_send_ipi_single(int cpu, unsigned int action); -extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action); +extern void mips_smp_send_ipi_single(int cpu, unsigned int action); +extern void mips_smp_send_ipi_mask(const struct cpumask *mask, + unsigned int action); #else /* !CONFIG_SMP */ diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index a71da576883c..eebf39549606 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -289,7 +289,7 @@ .set reorder .set noat mfc0 a0, CP0_STATUS - li v1, 0xff00 + li v1, ST0_CU1 | ST0_IM ori a0, STATMASK xori a0, STATMASK mtc0 a0, CP0_STATUS @@ -330,7 +330,7 @@ ori a0, STATMASK xori a0, STATMASK mtc0 a0, CP0_STATUS - li v1, 0xff00 + li v1, ST0_CU1 | ST0_FR | ST0_IM and a0, v1 LONG_L v0, PT_STATUS(sp) nor v1, $0, v1 diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6499d93ae68d..47bc45a67e9b 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -101,10 +101,8 @@ static inline void syscall_get_arguments(struct task_struct *task, /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */ if ((config_enabled(CONFIG_32BIT) || test_tsk_thread_flag(task, TIF_32BIT_REGS)) && - (regs->regs[2] == __NR_syscall)) { + (regs->regs[2] == __NR_syscall)) i++; - n++; - } while (n--) ret |= mips_get_syscall_arg(args++, task, regs, i++); diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 90f03a7da665..3129795de940 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -380,16 +380,17 @@ #define __NR_userfaultfd (__NR_Linux + 357) #define __NR_membarrier (__NR_Linux + 358) #define __NR_mlock2 (__NR_Linux + 359) +#define __NR_copy_file_range (__NR_Linux + 360) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 359 +#define __NR_Linux_syscalls 360 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 359 +#define __NR_O32_Linux_syscalls 360 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -717,16 +718,17 @@ #define __NR_userfaultfd (__NR_Linux + 317) #define __NR_membarrier (__NR_Linux + 318) #define __NR_mlock2 (__NR_Linux + 319) +#define __NR_copy_file_range (__NR_Linux + 320) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 319 +#define __NR_Linux_syscalls 320 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 319 +#define __NR_64_Linux_syscalls 320 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1058,15 +1060,16 @@ #define __NR_userfaultfd (__NR_Linux + 321) #define __NR_membarrier (__NR_Linux + 322) #define __NR_mlock2 (__NR_Linux + 323) +#define __NR_copy_file_range (__NR_Linux + 324) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 323 +#define __NR_Linux_syscalls 324 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 323 +#define __NR_N32_Linux_syscalls 324 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index 8c6d76c9b2d6..d9907e57e9b9 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -270,7 +270,7 @@ uint32_t jz_gpio_port_get_value(int port, uint32_t mask) } EXPORT_SYMBOL(jz_gpio_port_get_value); -#define IRQ_TO_BIT(irq) BIT(irq_to_gpio(irq) & 0x1f) +#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f) static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq) { diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 68e2b7db9348..b0988fd62fcc 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o obj-$(CONFIG_MIPS_CMP) += smp-cmp.o obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o -obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o obj-$(CONFIG_MIPS_SPRAM) += spram.o obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 1188e00bb120..1b992c6e3d8e 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -35,7 +35,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; int __res = 1; \ struct elfhdr *__h = (hdr); \ \ - if (__h->e_machine != EM_MIPS) \ + if (!mips_elf_check_machine(__h)) \ __res = 0; \ if (__h->e_ident[EI_CLASS] != ELFCLASS32) \ __res = 0; \ diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 928767858b86..abd3affe5fb3 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -47,7 +47,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; int __res = 1; \ struct elfhdr *__h = (hdr); \ \ - if (__h->e_machine != EM_MIPS) \ + if (!mips_elf_check_machine(__h)) \ __res = 0; \ if (__h->e_ident[EI_CLASS] != ELFCLASS32) \ __res = 0; \ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index f2975d4d1e44..eddd5fd6fdfa 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -65,12 +65,10 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK); status |= KU_USER; regs->cp0_status = status; + lose_fpu(0); + clear_thread_flag(TIF_MSA_CTX_LIVE); clear_used_math(); - clear_fpu_owner(); init_dsp(); - clear_thread_flag(TIF_USEDMSA); - clear_thread_flag(TIF_MSA_CTX_LIVE); - disable_msa(); regs->cp0_epc = pc; regs->regs[29] = sp; } diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 5ce3b746cedc..b4ac6374a38f 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -125,7 +125,7 @@ LEAF(_restore_fp_context) END(_restore_fp_context) .set reorder - .type fault@function + .type fault, @function .ent fault fault: li v0, -EFAULT jr ra diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index f09546ee2cdc..17732f876eff 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -358,7 +358,7 @@ LEAF(_restore_msa_all_upper) .set reorder - .type fault@function + .type fault, @function .ent fault fault: li v0, -EFAULT # failure jr ra diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 2d23c834ba96..a56317444bda 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -595,3 +595,4 @@ EXPORT(sys_call_table) PTR sys_userfaultfd PTR sys_membarrier PTR sys_mlock2 + PTR sys_copy_file_range /* 4360 */ diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index deac63315d0e..2b2dc14610d0 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -433,4 +433,5 @@ EXPORT(sys_call_table) PTR sys_userfaultfd PTR sys_membarrier PTR sys_mlock2 + PTR sys_copy_file_range /* 5320 */ .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 5a69eb48d0a8..2bf5c8593d91 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -423,4 +423,5 @@ EXPORT(sysn32_call_table) PTR sys_userfaultfd PTR sys_membarrier PTR sys_mlock2 + PTR sys_copy_file_range .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index e4b6d7c97822..c5b759e584c7 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -578,4 +578,5 @@ EXPORT(sys32_call_table) PTR sys_userfaultfd PTR sys_membarrier PTR sys_mlock2 + PTR sys_copy_file_range /* 4360 */ .size sys32_call_table,.-sys32_call_table diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 569a7d5242dd..4f607341a793 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -732,21 +732,23 @@ static void __init resource_init(void) end = HIGHMEM_START - 1; res = alloc_bootmem(sizeof(struct resource)); + + res->start = start; + res->end = end; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + switch (boot_mem_map.map[i].type) { case BOOT_MEM_RAM: case BOOT_MEM_INIT_RAM: case BOOT_MEM_ROM_DATA: res->name = "System RAM"; + res->flags |= IORESOURCE_SYSRAM; break; case BOOT_MEM_RESERVED: default: res->name = "reserved"; } - res->start = start; - res->end = end; - - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); /* @@ -782,6 +784,7 @@ static inline void prefill_possible_map(void) {} void __init setup_arch(char **cmdline_p) { cpu_probe(); + mips_cm_probe(); prom_init(); setup_early_fdc_console(); diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index d5e0f949dc48..76923349b4fe 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus) } struct plat_smp_ops cmp_smp_ops = { - .send_ipi_single = gic_send_ipi_single, - .send_ipi_mask = gic_send_ipi_mask, + .send_ipi_single = mips_smp_send_ipi_single, + .send_ipi_mask = mips_smp_send_ipi_mask, .init_secondary = cmp_init_secondary, .smp_finish = cmp_smp_finish, .boot_secondary = cmp_boot_secondary, diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 2ad4e4c96d61..253e1409338c 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = { .boot_secondary = cps_boot_secondary, .init_secondary = cps_init_secondary, .smp_finish = cps_smp_finish, - .send_ipi_single = gic_send_ipi_single, - .send_ipi_mask = gic_send_ipi_mask, + .send_ipi_single = mips_smp_send_ipi_single, + .send_ipi_mask = mips_smp_send_ipi_mask, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 86311a164ef1..4f9570a57e8d 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action) #ifdef CONFIG_MIPS_GIC if (gic_present) { - gic_send_ipi_single(cpu, action); + mips_smp_send_ipi_single(cpu, action); return; } #endif diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index bd4385a8e6e8..37708d9af638 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -33,12 +33,16 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/ftrace.h> +#include <linux/irqdomain.h> +#include <linux/of.h> +#include <linux/of_irq.h> #include <linux/atomic.h> #include <asm/cpu.h> #include <asm/processor.h> #include <asm/idle.h> #include <asm/r4k-timer.h> +#include <asm/mips-cpc.h> #include <asm/mmu_context.h> #include <asm/time.h> #include <asm/setup.h> @@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map; cpumask_t cpu_coherent_mask; +#ifdef CONFIG_GENERIC_IRQ_IPI +static struct irq_desc *call_desc; +static struct irq_desc *sched_desc; +#endif + static inline void set_cpu_sibling_map(int cpu) { int i; @@ -121,6 +130,7 @@ static inline void calculate_cpu_foreign_map(void) cpumask_t temp_foreign_map; /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); for_each_online_cpu(i) { core_present = 0; for_each_cpu(k, &temp_foreign_map) @@ -145,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops) mp_ops = ops; } +#ifdef CONFIG_GENERIC_IRQ_IPI +void mips_smp_send_ipi_single(int cpu, unsigned int action) +{ + mips_smp_send_ipi_mask(cpumask_of(cpu), action); +} + +void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + unsigned long flags; + unsigned int core; + int cpu; + + local_irq_save(flags); + + switch (action) { + case SMP_CALL_FUNCTION: + __ipi_send_mask(call_desc, mask); + break; + + case SMP_RESCHEDULE_YOURSELF: + __ipi_send_mask(sched_desc, mask); + break; + + default: + BUG(); + } + + if (mips_cpc_present()) { + for_each_cpu(cpu, mask) { + core = cpu_data[cpu].core; + + if (core == current_cpu_data.core) + continue; + + while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) { + mips_cpc_lock_other(core); + write_cpc_co_cmd(CPC_Cx_CMD_PWRUP); + mips_cpc_unlock_other(); + } + } + } + + local_irq_restore(flags); +} + + +static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) +{ + scheduler_ipi(); + + return IRQ_HANDLED; +} + +static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) +{ + generic_smp_call_function_interrupt(); + + return IRQ_HANDLED; +} + +static struct irqaction irq_resched = { + .handler = ipi_resched_interrupt, + .flags = IRQF_PERCPU, + .name = "IPI resched" +}; + +static struct irqaction irq_call = { + .handler = ipi_call_interrupt, + .flags = IRQF_PERCPU, + .name = "IPI call" +}; + +static __init void smp_ipi_init_one(unsigned int virq, + struct irqaction *action) +{ + int ret; + + irq_set_handler(virq, handle_percpu_irq); + ret = setup_irq(virq, action); + BUG_ON(ret); +} + +static int __init mips_smp_ipi_init(void) +{ + unsigned int call_virq, sched_virq; + struct irq_domain *ipidomain; + struct device_node *node; + + node = of_irq_find_parent(of_root); + ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI); + + /* + * Some platforms have half DT setup. So if we found irq node but + * didn't find an ipidomain, try to search for one that is not in the + * DT. + */ + if (node && !ipidomain) + ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI); + + BUG_ON(!ipidomain); + + call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask); + BUG_ON(!call_virq); + + sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask); + BUG_ON(!sched_virq); + + if (irq_domain_is_ipi_per_cpu(ipidomain)) { + int cpu; + + for_each_cpu(cpu, cpu_possible_mask) { + smp_ipi_init_one(call_virq + cpu, &irq_call); + smp_ipi_init_one(sched_virq + cpu, &irq_resched); + } + } else { + smp_ipi_init_one(call_virq, &irq_call); + smp_ipi_init_one(sched_virq, &irq_resched); + } + + call_desc = irq_to_desc(call_virq); + sched_desc = irq_to_desc(sched_virq); + + return 0; +} +early_initcall(mips_smp_ipi_init); +#endif + /* * First C code run on the secondary CPUs after being started up by * the master. @@ -191,7 +328,7 @@ asmlinkage void start_secondary(void) WARN_ON_ONCE(!irqs_disabled()); mp_ops->smp_finish(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } static void stop_this_cpu(void *dummy) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index bafcb7ad5c85..bf14da9f3e33 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -663,7 +663,7 @@ static int simulate_rdhwr_normal(struct pt_regs *regs, unsigned int opcode) return -1; } -static int simulate_rdhwr_mm(struct pt_regs *regs, unsigned short opcode) +static int simulate_rdhwr_mm(struct pt_regs *regs, unsigned int opcode) { if ((opcode & MM_POOL32A_FUNC) == MM_RDHWR) { int rd = (opcode & MM_RS) >> 16; @@ -690,15 +690,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; - siginfo_t info; + siginfo_t info = { + .si_signo = SIGFPE, + .si_code = FPE_INTOVF, + .si_addr = (void __user *)regs->cp0_epc, + }; prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); exception_exit(prev_state); } @@ -874,7 +874,7 @@ out: void do_trap_or_bp(struct pt_regs *regs, unsigned int code, const char *str) { - siginfo_t info; + siginfo_t info = { 0 }; char b[40]; #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -903,7 +903,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, else info.si_code = FPE_INTOVF; info.si_signo = SIGFPE; - info.si_errno = 0; info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); break; @@ -1119,11 +1118,12 @@ no_r2_instr: if (get_isa16_mode(regs->cp0_epc)) { unsigned short mmop[2] = { 0 }; - if (unlikely(get_user(mmop[0], epc) < 0)) + if (unlikely(get_user(mmop[0], (u16 __user *)epc + 0) < 0)) status = SIGSEGV; - if (unlikely(get_user(mmop[1], epc) < 0)) + if (unlikely(get_user(mmop[1], (u16 __user *)epc + 1) < 0)) status = SIGSEGV; - opcode = (mmop[0] << 16) | mmop[1]; + opcode = mmop[0]; + opcode = (opcode << 16) | mmop[1]; if (status < 0) status = simulate_rdhwr_mm(regs, opcode); @@ -1369,26 +1369,12 @@ asmlinkage void do_cpu(struct pt_regs *regs) if (unlikely(compute_return_epc(regs) < 0)) break; - if (get_isa16_mode(regs->cp0_epc)) { - unsigned short mmop[2] = { 0 }; - - if (unlikely(get_user(mmop[0], epc) < 0)) - status = SIGSEGV; - if (unlikely(get_user(mmop[1], epc) < 0)) - status = SIGSEGV; - opcode = (mmop[0] << 16) | mmop[1]; - - if (status < 0) - status = simulate_rdhwr_mm(regs, opcode); - } else { + if (!get_isa16_mode(regs->cp0_epc)) { if (unlikely(get_user(opcode, epc) < 0)) status = SIGSEGV; if (!cpu_has_llsc && status < 0) status = simulate_llsc(regs, opcode); - - if (status < 0) - status = simulate_rdhwr_normal(regs, opcode); } if (status < 0) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 8bc3977576e6..70ef1a43c114 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, dvcpu->arch.wait = 0; - if (waitqueue_active(&dvcpu->wq)) - wake_up_interruptible(&dvcpu->wq); + if (swait_active(&dvcpu->wq)) + swake_up(&dvcpu->wq); return 0; } @@ -702,7 +702,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_to_user(uaddr, vs, 16); + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -732,7 +732,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_from_user(vs, uaddr, 16); + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(unsigned long data) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); + if (swait_active(&vcpu->wq)) + swake_up(&vcpu->wq); } /* low level hrtimer wake routine */ diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 5c81fdd032c3..353037699512 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void) { unsigned long rnd; - rnd = (unsigned long)get_random_int(); + rnd = get_random_long(); rnd <<= PAGE_SHIFT; if (TASK_IS_32BIT_ADDR) rnd &= 0xfffffful; @@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) static inline unsigned long brk_rnd(void) { - unsigned long rnd = get_random_int(); + unsigned long rnd = get_random_long(); rnd = rnd << PAGE_SHIFT; /* 8MB for 32bit, 256MB for 64bit */ diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 3bd0597d9c3d..91dec32c77b7 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -164,11 +164,13 @@ static int __init mips_sc_probe_cm3(void) sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK; sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF; - c->scache.sets = 64 << sets; + if (sets) + c->scache.sets = 64 << sets; line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK; line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF; - c->scache.linesz = 2 << line_sz; + if (line_sz) + c->scache.linesz = 2 << line_sz; assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK; assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF; @@ -176,13 +178,12 @@ static int __init mips_sc_probe_cm3(void) c->scache.waysize = c->scache.sets * c->scache.linesz; c->scache.waybit = __ffs(c->scache.waysize); - c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + if (c->scache.linesz) { + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + return 1; + } - return 1; -} - -void __weak platform_early_l2_init(void) -{ + return 0; } static inline int __init mips_sc_probe(void) @@ -194,12 +195,6 @@ static inline int __init mips_sc_probe(void) /* Mark as not present until probe completed */ c->scache.flags |= MIPS_CACHE_NOT_PRESENT; - /* - * Do we need some platform specific probing before - * we configure L2? - */ - platform_early_l2_init(); - if (mips_cm_revision() >= CM_REV_CM3) return mips_sc_probe_cm3(); diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index 571148c5fd0b..dc2c5214809d 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -293,7 +293,6 @@ mips_pci_controller: console_config(); #endif /* Early detection of CMP support */ - mips_cm_probe(); mips_cpc_probe(); if (!register_cps_smp_ops()) @@ -304,10 +303,3 @@ mips_pci_controller: return; register_up_smp_ops(); } - -void platform_early_l2_init(void) -{ - /* L2 configuration lives in the CM3 */ - if (mips_cm_revision() >= CM_REV_CM3) - mips_cm_probe(); -} diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c index a009ee458934..1ae932c2d78b 100644 --- a/arch/mips/pci/pci-mt7620.c +++ b/arch/mips/pci/pci-mt7620.c @@ -297,12 +297,12 @@ static int mt7620_pci_probe(struct platform_device *pdev) return PTR_ERR(rstpcie0); bridge_base = devm_ioremap_resource(&pdev->dev, bridge_res); - if (!bridge_base) - return -ENOMEM; + if (IS_ERR(bridge_base)) + return PTR_ERR(bridge_base); pcie_base = devm_ioremap_resource(&pdev->dev, pcie_res); - if (!pcie_base) - return -ENOMEM; + if (IS_ERR(pcie_base)) + return PTR_ERR(pcie_base); iomem_resource.start = 0; iomem_resource.end = ~0; diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index f984193718b1..426173c4b0b9 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -675,7 +675,7 @@ int __init start_secondary(void *unused) #ifdef CONFIG_GENERIC_CLOCKEVENTS init_clockevents(); #endif - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); return 0; } diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 3d0e17bcc8e9..df0f52bd18b4 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,6 +22,9 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ void disable_sr_hashing(void); /* turns off space register hashing */ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 845272ce9cc5..7bd69bd43a01 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma } } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #include <asm/kmap_types.h> #define ARCH_HAS_KMAP diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h index f84ff12574b7..6d8276cd25ca 100644 --- a/arch/parisc/include/asm/floppy.h +++ b/arch/parisc/include/asm/floppy.h @@ -33,7 +33,7 @@ * floppy accesses go through the track buffer. */ #define _CROSS_64KB(a,s,vdma) \ -(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) +(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) #define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1) diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 35bdccbb2036..b75039f92116 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -361,8 +361,9 @@ #define __NR_membarrier (__NR_Linux + 343) #define __NR_userfaultfd (__NR_Linux + 344) #define __NR_mlock2 (__NR_Linux + 345) +#define __NR_copy_file_range (__NR_Linux + 346) -#define __NR_Linux_syscalls (__NR_mlock2 + 1) +#define __NR_Linux_syscalls (__NR_copy_file_range + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 9585c81f755f..ce0b2b4075c7 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - /* Do the secure computing check first. */ secure_computing_strict(regs->gr[20]); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - ret = -1L; + tracehook_report_syscall_entry(regs)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + regs->gr[20] = -1UL; + goto out; + } #ifdef CONFIG_64BIT if (!is_compat_task()) @@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); - return ret ? : regs->gr[20]; +out: + return regs->gr[20]; } void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 52e85973a283..c2a9cc55a62f 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -305,7 +305,7 @@ void __init smp_callin(void) local_irq_enable(); /* Interrupts have been off until now */ - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* NOTREACHED */ panic("smp_callin() AAAAaaaaahhhh....\n"); diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 3fbd7252a4b2..fbafa0d0e2bf 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -343,7 +343,7 @@ tracesys_next: #endif comiclr,>>= __NR_Linux_syscalls, %r20, %r0 - b,n .Lsyscall_nosys + b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 @@ -359,6 +359,9 @@ tracesys_next: be 0(%sr7,%r19) ldo R%tracesys_exit(%r2),%r2 +.Ltracesys_nosys: + ldo -ENOSYS(%r0),%r28 /* set errno */ + /* Do *not* call this function on the gateway page, because it makes a direct call to syscall_trace. */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index d4ffcfbc9885..585d50fc75c0 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -441,6 +441,7 @@ ENTRY_SAME(membarrier) ENTRY_SAME(userfaultfd) ENTRY_SAME(mlock2) /* 345 */ + ENTRY_SAME(copy_file_range) .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1b366c477687..3c07d6b96877 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -55,12 +55,12 @@ signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource pdcdata_resource = { @@ -201,7 +201,7 @@ static void __init setup_bootmem(void) res->name = "System RAM"; res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT; res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e4824fd04bb7..9faa18c4f3f7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -557,7 +557,7 @@ choice config PPC_4K_PAGES bool "4k page size" - select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S + select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_16K_PAGES bool "16k page size" @@ -566,7 +566,7 @@ config PPC_16K_PAGES config PPC_64K_PAGES bool "64k page size" depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) - select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S + select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES bool "256k page size" diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 8d1c41d28318..ac07a30a7934 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -281,6 +281,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index c5eb86f3d452..867c39b45df6 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -81,6 +81,7 @@ struct pci_dn; #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */ +#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9d08d8cbed1a..c98afa538b3a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -289,7 +289,7 @@ struct kvmppc_vcore { struct list_head runnable_threads; struct list_head preempt_list; spinlock_t lock; - wait_queue_head_t wq; + struct swait_queue_head wq; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; @@ -629,7 +629,7 @@ struct kvm_vcpu_arch { u8 prodded; u32 last_inst; - wait_queue_head_t *wqp; + struct swait_queue_head *wqp; struct kvmppc_vcore *vcore; int ret; int trap; diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 8e86b48d0369..32e36b16773f 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, extern void hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); -TRACE_EVENT_FN(hcall_entry, +TRACE_EVENT_FN_COND(hcall_entry, TP_PROTO(unsigned long opcode, unsigned long *args), TP_ARGS(opcode, args), + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field(unsigned long, opcode) ), @@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry, hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc ); -TRACE_EVENT_FN(hcall_exit, +TRACE_EVENT_FN_COND(hcall_exit, TP_PROTO(unsigned long opcode, unsigned long retval, unsigned long *retbuf), TP_ARGS(opcode, retval, retbuf), + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field(unsigned long, opcode) __field(unsigned long, retval) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 938742135ee0..650cfb31ea3d 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata) eeh_pcid_put(dev); if (driver->err_handler && driver->err_handler->error_detected && - driver->err_handler->slot_reset && - driver->err_handler->resume) + driver->err_handler->slot_reset) return NULL; } @@ -564,6 +563,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pcibios_remove_pci_devices(bus); pci_unlock_rescan_remove(); @@ -803,6 +803,7 @@ perm_error: * the their PCI config any more. */ if (frozen_bus) { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); pci_lock_rescan_remove(); @@ -886,6 +887,7 @@ static void eeh_handle_special_event(void) continue; /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index ca9e5371930e..98f81800e00c 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -928,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) bus = pe->phb->bus; } else if (pe->type & EEH_PE_BUS || pe->type & EEH_PE_DEVICE) { - if (pe->bus) { + if (pe->state & EEH_PE_PRI_BUS) { bus = pe->bus; goto out; } diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 05e804cdecaa..aec9a1b1d25b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -109,8 +109,9 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp) * If the breakpoint is unregistered between a hw_breakpoint_handler() * and the single_step_dabr_instruction(), then cleanup the breakpoint * restoration variables to prevent dangling pointers. + * FIXME, this should not be using bp->ctx at all! Sayeth peterz. */ - if (bp->ctx && bp->ctx->task) + if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) bp->ctx->task->thread.last_hit_ubp = NULL; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ac64ffdb52c8..08b7a40de5f8 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -340,7 +340,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) if (name[0] == '.') { if (strcmp(name+1, "TOC.") == 0) syms[i].st_shndx = SHN_ABS; - memmove(name, name+1, strlen(name)); + syms[i].st_name++; } } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dccc87e8fee5..3c5736e52a14 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1768,9 +1768,9 @@ static inline unsigned long brk_rnd(void) /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); + rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT))); else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); + rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT))); return rnd << PAGE_SHIFT; } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ad8c9db61237..d544fa311757 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { - lockdep_init(); - /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5c03a6a9b054..f98be8383a39 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr) setup_paca(&boot_paca); fixup_boot_paca(); - /* Initialize lockdep early or else spinlocks will blow */ - lockdep_init(); - /* -------- printk is now safe to use ------- */ /* Enable early debugging if any specified (see udbg.h) */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ec9ec2058d2d..cc13d4c83291 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -727,7 +727,7 @@ void start_secondary(void *unused) local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); BUG(); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index baeddb06811d..f1187bb6dd4d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -114,11 +114,11 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int cpu; - wait_queue_head_t *wqp; + struct swait_queue_head *wqp; wqp = kvm_arch_vcpu_wq(vcpu); - if (waitqueue_active(wqp)) { - wake_up_interruptible(wqp); + if (swait_active(wqp)) { + swake_up(wqp); ++vcpu->stat.halt_wakeup; } @@ -701,8 +701,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) tvcpu->arch.prodded = 1; smp_mb(); if (vcpu->arch.ceded) { - if (waitqueue_active(&vcpu->wq)) { - wake_up_interruptible(&vcpu->wq); + if (swait_active(&vcpu->wq)) { + swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -1459,7 +1459,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); - init_waitqueue_head(&vcore->wq); + init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = core * threads_per_subcore; @@ -2531,10 +2531,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { struct kvm_vcpu *vcpu; int do_sleep = 1; + DECLARE_SWAITQUEUE(wait); - DEFINE_WAIT(wait); - - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); /* * Check one last time for pending exceptions and ceded state after @@ -2548,7 +2547,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } if (!do_sleep) { - finish_wait(&vc->wq, &wait); + finish_swait(&vc->wq, &wait); return; } @@ -2556,7 +2555,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); - finish_wait(&vc->wq, &wait); + finish_swait(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); @@ -2612,7 +2611,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - wake_up(&vc->wq); + swake_up(&vc->wq); } } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6ee26de9a1de..25ae2c9913c3 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6, VCPU_ACOP(r9) stw r7, VCPU_GUEST_PID(r9) std r8, VCPU_WORT(r9) + /* + * Restore various registers to 0, where non-zero values + * set by the guest could disrupt the host. + */ + li r0, 0 + mtspr SPRN_IAMR, r0 + mtspr SPRN_CIABR, r0 + mtspr SPRN_DAWRX, r0 + mtspr SPRN_TCSCR, r0 + mtspr SPRN_WORT, r0 + /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ + li r0, 1 + sldi r0, r0, 31 + mtspr SPRN_MMCRS, r0 8: /* Save and reset AMR and UAMOR before turning on the MMU */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 0762c1e08c88..edb09912f0c9 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, */ if (!(old_pte & _PAGE_COMBO)) { flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags); - old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND; + /* + * clear the old slot details from the old and new pte. + * On hash insert failure we use old pte value and we don't + * want slot information there if we have a insert failure. + */ + old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND); + new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND); goto htab_insert_hpte; } /* diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 49b152b0f926..eb2accdd76fd 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * base page size. This is because demote_segment won't flush * hash page table entries. */ - if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) { flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, ssize, flags); + /* + * With THP, we also clear the slot information with + * respect to all the 64K hash pte mapping the 16MB + * page. They are all invalid now. This make sure we + * don't find the slot valid when we fault with 4k + * base page size. + * + */ + memset(hpte_slot_array, 0, PTE_FRAG_SIZE); + } } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 7e6d0880813f..83a8be791e06 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -8,6 +8,8 @@ #include <linux/mm.h> #include <linux/hugetlb.h> +#include <asm/mmu.h> + #ifdef CONFIG_PPC_FSL_BOOK3E #ifdef CONFIG_PPC64 static inline int tlb1_next(void) @@ -60,6 +62,14 @@ static inline void book3e_tlb_lock(void) unsigned long tmp; int token = smp_processor_id() + 1; + /* + * Besides being unnecessary in the absence of SMT, this + * check prevents trying to do lbarx/stbcx. on e5500 which + * doesn't implement either feature. + */ + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + asm volatile("1: lbarx %0, 0, %1;" "cmpwi %0, 0;" "bne 2f;" @@ -80,6 +90,9 @@ static inline void book3e_tlb_unlock(void) { struct paca_struct *paca = get_paca(); + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + isync(); paca->tcd_ptr->lock = 0; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0f0a514b04e..f078a1f94fc2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void) res->name = "System RAM"; res->start = base; res->end = base + size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; WARN_ON(request_resource(&iomem_resource, res) < 0); } } diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 0f0502e12f6c..4087705ba90f 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void) /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); + rnd = get_random_long() % (1<<(23-PAGE_SHIFT)); else - rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); + rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT)); return rnd << PAGE_SHIFT; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3124a20d0fab..cdf2123d46db 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } +void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); + + /* + * We can't mark the pmd none here, because that will cause a race + * against exit_mmap. We need to continue mark pmd TRANS HUGE, while + * we spilt, but at the same time we wan't rest of the ppc64 code + * not to insert hash pte on this, because we will be modifying + * the deposited pgtable in the caller of this function. Hence + * clear the _PAGE_USER so that we move the fault handling to + * higher level function and that will serialize against ptl. + * We need to flush existing hash pte entries here even though, + * the translation is still valid, because we will withdraw + * pgtable_t after this. + */ + pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); +} + + /* * set a new huge pmd. We should not be called for updating * an existing pmd entry. That should go via pmd_hugepage_update. @@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } +/* + * We use this to invalidate a pmdp entry before switching from a + * hugepte to regular pmd entry. + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); + + /* + * This ensures that generic code that rely on IRQ disabling + * to prevent a parallel THP split work as expected. + */ + kick_all_cpus_sync(); } /* diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5f152b95ca0c..87f47e55aab6 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -444,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * PCI devices of the PE are expected to be removed prior * to PE reset. */ - if (!edev->pe->bus) + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { edev->pe->bus = pci_find_bus(hose->global_number, pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } /* * Enable EEH explicitly so that we will do EEH check diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 573ae1994097..f90dc04395bf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3180,6 +3180,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, #ifdef CONFIG_PCI_MSI .setup_msi_irqs = pnv_setup_msi_irqs, .teardown_msi_irqs = pnv_teardown_msi_irqs, diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2f55c86df703..b1ef84a6c9d1 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -599,6 +599,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, u64 rpn = __pa(uaddr) >> tbl->it_page_shift; long i; + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; + for (i = 0; i < npages; i++) { unsigned long newtce = proto_tce | ((rpn + i) << tbl->it_page_shift); @@ -620,6 +623,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); *direction = iommu_tce_direction(oldtce); @@ -760,6 +766,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } +void pnv_pci_dma_bus_setup(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) + continue; + + if (!pe->pbus) + continue; + + if (bus->number == ((pe->rid >> 8) & 0xFF)) { + pe->pbus = bus; + break; + } + } +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 7f56313e8d72..00691a9b99af 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -242,6 +242,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); +extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h index ea91ddfe54eb..629c90865a07 100644 --- a/arch/s390/include/asm/fpu/internal.h +++ b/arch/s390/include/asm/fpu/internal.h @@ -40,6 +40,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) { fpregs->pad = 0; + fpregs->fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); else @@ -49,6 +50,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) { + fpu->fpc = fpregs->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); else diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8959ebb6d2c9..b0c8ad0799c7 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -467,7 +467,7 @@ struct kvm_s390_irq_payload { struct kvm_s390_local_interrupt { spinlock_t lock; struct kvm_s390_float_interrupt *float_int; - wait_queue_head_t *wq; + struct swait_queue_head *wq; atomic_t *cpuflags; DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); struct kvm_s390_irq_payload irq; diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 7aa799134a11..a52b6cca873d 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -37,7 +37,7 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) regs->psw.addr = ip; } #else -#error Live patching support is disabled; check CONFIG_LIVEPATCH +#error Include linux/livepatch.h, not asm/livepatch.h #endif #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fb1b93ea3e3f..e485817f7b1a 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,17 +15,25 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + spin_lock_init(&mm->context.list_lock); + INIT_LIST_HEAD(&mm->context.pgtable_list); + INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.attach_count, 0); mm->context.flush_mm = 0; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; - mm->context.asce_bits |= _ASCE_TYPE_REGION3; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - mm->context.asce_limit = STACK_TOP_MAX; + if (mm->context.asce_limit == 0) { + /* context created by exec, set asce limit to 4TB */ + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + mm->context.asce_limit = STACK_TOP_MAX; + } else if (mm->context.asce_limit == (1UL << 31)) { + mm_inc_nr_pmds(mm); + } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; } @@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev, static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - if (oldmm->context.asce_limit < mm->context.asce_limit) - crst_table_downgrade(mm, oldmm->context.asce_limit); } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b7858f158b4..d7cc79fb6191 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -100,12 +100,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.pgtable_list); - INIT_LIST_HEAD(&mm->context.gmap_list); - return (pgd_t *) crst_table_alloc(mm); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + if (mm->context.asce_limit == (1UL << 31)) { + /* Forking a compat process with 2 page table levels */ + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } + } + return (pgd_t *) table; +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + if (mm->context.asce_limit == (1UL << 31)) + pgtable_pmd_page_dtor(virt_to_page(pgd)); + crst_table_free(mm, (unsigned long *) pgd); } -#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 66c94417c0ba..4af60374eba0 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -271,7 +271,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, /* Restore high gprs from signal stack */ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(&sregs_ext->gprs_high))) + sizeof(sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c55576bbaa1f..a0684de5a93b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -448,7 +448,6 @@ void __init startup_init(void) rescue_initrd(); clear_bss_section(); init_kernel_storage_key(); - lockdep_init(); lockdep_off(); setup_lowcore_early(); setup_facility_list(); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index c5febe84eba6..03c2b469c472 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFLE_FAC_LIST+6,0x80 # LPP available ? + tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? jz 0f xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index cfcba2dd9bb5..0943b11a2f6e 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -260,12 +260,13 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry, void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - unsigned long head; + unsigned long head, frame_size; struct stack_frame *head_sf; if (user_mode(regs)) return; + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); head = regs->gprs[15]; head_sf = (struct stack_frame *) head; @@ -273,8 +274,9 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, return; head = head_sf->back_chain; - head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + head = __store_trace(entry, head, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); __store_trace(entry, head, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9220db5c996a..cedb0198675f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -374,17 +374,17 @@ static void __init setup_lowcore(void) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata *standard_resources[] = { @@ -408,7 +408,7 @@ static void __init setup_resources(void) for_each_memblock(memory, reg) { res = alloc_bootmem_low(sizeof(*res)); - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; res->start = reg->base; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3c65a8eae34d..40a6b4f9c36c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); inc_irq_stat(CPU_RST); local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* Upping and downing of CPUs */ diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 5acba3cb7220..8f64ebd63767 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -59,26 +59,32 @@ static unsigned long save_context_stack(struct stack_trace *trace, } } -void save_stack_trace(struct stack_trace *trace) +static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) { - register unsigned long sp asm ("15"); - unsigned long orig_sp, new_sp; + unsigned long new_sp, frame_size; - orig_sp = sp; - new_sp = save_context_stack(trace, orig_sp, - S390_lowcore.panic_stack - PAGE_SIZE, - S390_lowcore.panic_stack, 1); - if (new_sp != orig_sp) - return; + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + new_sp = save_context_stack(trace, sp, + S390_lowcore.panic_stack + frame_size - PAGE_SIZE, + S390_lowcore.panic_stack + frame_size, 1); new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack, 1); - if (new_sp != orig_sp) - return; + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size, 1); save_context_stack(trace, new_sp, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE, 1); } + +void save_stack_trace(struct stack_trace *trace) +{ + register unsigned long r15 asm ("15"); + unsigned long sp; + + sp = r15; + __save_stack_trace(trace, sp); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) @@ -86,6 +92,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) unsigned long sp, low, high; sp = tsk->thread.ksp; + if (tsk == current) { + /* Get current stack pointer. */ + asm volatile("la %0,0(15)" : "=a" (sp)); + } low = (unsigned long) task_stack_page(tsk); high = (unsigned long) task_pt_regs(tsk); save_context_stack(trace, sp, low, high, 0); @@ -93,3 +103,14 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + unsigned long sp; + + sp = kernel_stack_pointer(regs); + __save_stack_trace(trace, sp); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c index 21a5df99552b..dde7654f5c68 100644 --- a/arch/s390/kernel/trace.c +++ b/arch/s390/kernel/trace.c @@ -18,6 +18,9 @@ void trace_s390_diagnose_norecursion(int diag_nr) unsigned long flags; unsigned int *depth; + /* Avoid lockdep recursion. */ + if (IS_ENABLED(CONFIG_LOCKDEP)) + return; local_irq_save(flags); depth = this_cpu_ptr(&diagnose_trace_depth); if (*depth == 0) { diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f88ca72c3a77..9ffc73221792 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -966,13 +966,13 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - if (waitqueue_active(&vcpu->wq)) { + if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ vcpu->preempted = true; - wake_up_interruptible(&vcpu->wq); + swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4af21c771f9b..03dfe9c667f4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2381,7 +2381,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) /* manually convert vector registers if necessary */ if (MACHINE_HAS_VX) { - convert_vx_to_fp(fprs, current->thread.fpu.vxrs); + convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, fprs, 128); } else { diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index fec59c067d0d..792f9c63fbca 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -93,15 +93,19 @@ static int __memcpy_real(void *dest, void *src, size_t count) */ int memcpy_real(void *dest, void *src, size_t count) { + int irqs_disabled, rc; unsigned long flags; - int rc; if (!count) return 0; - local_irq_save(flags); - __arch_local_irq_stnsm(0xfbUL); + flags = __arch_local_irq_stnsm(0xf8UL); + irqs_disabled = arch_irqs_disabled_flags(flags); + if (!irqs_disabled) + trace_hardirqs_off(); rc = __memcpy_real(dest, src, count); - local_irq_restore(flags); + if (!irqs_disabled) + trace_hardirqs_on(); + __arch_local_irq_ssm(flags); return rc; } diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c index fe0bfe370c45..1884e1759529 100644 --- a/arch/s390/oprofile/backtrace.c +++ b/arch/s390/oprofile/backtrace.c @@ -54,12 +54,13 @@ __show_trace(unsigned int *depth, unsigned long sp, void s390_backtrace(struct pt_regs * const regs, unsigned int depth) { - unsigned long head; + unsigned long head, frame_size; struct stack_frame* head_sf; if (user_mode(regs)) return; + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); head = regs->gprs[15]; head_sf = (struct stack_frame*)head; @@ -68,8 +69,9 @@ void s390_backtrace(struct pt_regs * const regs, unsigned int depth) head = head_sf->back_chain; - head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + head = __show_trace(&depth, head, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); __show_trace(&depth, head, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c index b48459afefdd..f3a0649ab521 100644 --- a/arch/score/kernel/setup.c +++ b/arch/score/kernel/setup.c @@ -101,7 +101,7 @@ static void __init resource_init(void) res->name = "System RAM"; res->start = MEMORY_START; res->end = MEMORY_START + MEMORY_SIZE - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); request_resource(res, &code_resource); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index de19cfa768f2..3f1c18b28e8a 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -78,17 +78,17 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0, }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; unsigned long memory_start; @@ -202,7 +202,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, res->name = "System RAM"; res->start = start; res->end = end - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res)) { pr_err("unable to request memory_resource 0x%lx 0x%lx\n", diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index de6be008fc01..13f633add29a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -203,7 +203,7 @@ asmlinkage void start_secondary(void) set_cpu_online(cpu, true); per_cpu(cpu_state, cpu) = CPU_ONLINE; - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } extern struct { diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index eaee14637d93..8496a074bd0e 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -24,7 +24,13 @@ LDFLAGS := -m elf32_sparc export BITS := 32 UTS_MACHINE := sparc +# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some +# versions of gcc. Some gcc versions won't pass -Av8 to binutils when you +# give -mcpu=v8. This silently worked with older bintutils versions but +# does not any more. KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 +KBUILD_CFLAGS += -Wa,-Av8 + KBUILD_AFLAGS += -m32 -Wa,-Av8 else diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 1c26d440d288..b6de8b10a55b 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -422,8 +422,9 @@ #define __NR_listen 354 #define __NR_setsockopt 355 #define __NR_mlock2 356 +#define __NR_copy_file_range 357 -#define NR_syscalls 357 +#define NR_syscalls 358 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 33c02b15f478..a83707c83be8 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -948,7 +948,24 @@ linux_syscall_trace: cmp %o0, 0 bne 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ld [%sp + STACKFRAME_SZ + PT_G1], %g1 + sethi %hi(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I0], %i0 + or %l7, %lo(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I1], %i1 + ld [%sp + STACKFRAME_SZ + PT_I2], %i2 + ld [%sp + STACKFRAME_SZ + PT_I3], %i3 + ld [%sp + STACKFRAME_SZ + PT_I4], %i4 + ld [%sp + STACKFRAME_SZ + PT_I5], %i5 + cmp %g1, NR_syscalls + bgeu 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + ld [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index f2d30cab5b3f..cd1f592cd347 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -696,14 +696,6 @@ tlb_fixup_done: call __bzero sub %o1, %o0, %o1 -#ifdef CONFIG_LOCKDEP - /* We have this call this super early, as even prom_init can grab - * spinlocks and thus call into the lockdep code. - */ - call lockdep_init - nop -#endif - call prom_init mov %l7, %o0 ! OpenPROM cif handler diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index afbaba52d2f1..d127130bf424 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog) mov %o1, %o4 mov HV_FAST_MACH_SET_WATCHDOG, %o5 ta HV_FAST_TRAP + brnz,a,pn %o4, 0f stx %o1, [%o4] - retl +0: retl nop ENDPROC(sun4v_mach_set_watchdog) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index d88beff47bab..39aaec173f66 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) unsigned char fenab; int err; - flush_user_windows(); + synchronize_user_stack(); if (get_thread_wsaved() || (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || (!__access_ok(ucp, sizeof(*ucp)))) diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index b3a5d81b20f0..fb30e7c6a5b1 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg) local_irq_enable(); wmb(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* We should never reach here! */ BUG(); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 19cd08d18672..8a6151a628ce 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -134,7 +134,7 @@ void smp_callin(void) local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } void cpu_panic(void) diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index a92d5d2c46a3..9e034f29dcc5 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf); EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); +EXPORT_SYMBOL(sun4v_mach_set_watchdog); /* from hweight.S */ EXPORT_SYMBOL(__arch_hweight8); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index c690c8e16a96..b489e9759518 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void) unsigned long rnd = 0UL; if (current->flags & PF_RANDOMIZE) { - unsigned long val = get_random_int(); + unsigned long val = get_random_long(); if (test_thread_flag(TIF_32BIT)) rnd = (val % (1UL << (23UL-PAGE_SHIFT))); else diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index bb0008927598..c4a1b5c40e4e 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -158,7 +158,25 @@ linux_syscall_trace32: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 srl %i0, 0, %o0 + lduw [%l7 + %l4], %l7 srl %i4, 0, %o4 srl %i1, 0, %o1 srl %i2, 0, %o2 @@ -170,7 +188,25 @@ linux_syscall_trace: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + lduw [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index e663b6c78de2..6c3dd6c52f8b 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -88,4 +88,4 @@ sys_call_table: /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen -/*355*/ .long sys_setsockopt, sys_mlock2 +/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 1557121f4cdc..12b524cfcfa0 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -89,7 +89,7 @@ sys_call_table32: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word compat_sys_setsockopt, sys_mlock2 + .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range #endif /* CONFIG_COMPAT */ @@ -170,4 +170,4 @@ sys_call_table: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word sys_setsockopt, sys_mlock2 + .word sys_setsockopt, sys_mlock2, sys_copy_file_range diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6f216853f272..1cfe6aab7a11 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2863,17 +2863,17 @@ void hugetlb_setup(struct pt_regs *regs) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static inline resource_size_t compute_kern_paddr(void *addr) @@ -2909,7 +2909,7 @@ static int __init report_memory(void) res->name = "System RAM"; res->start = pavail[i].phys_addr; res->end = pavail[i].phys_addr + pavail[i].reg_size - 1; - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; if (insert_resource(&iomem_resource, res) < 0) { pr_warn("Resource insertion failed.\n"); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index bbb855de6569..a992238e9b58 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1632,14 +1632,14 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; /* @@ -1673,10 +1673,15 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) kzalloc(sizeof(struct resource), GFP_ATOMIC); if (!res) return NULL; - res->name = reserved ? "Reserved" : "System RAM"; res->start = start_pfn << PAGE_SHIFT; res->end = (end_pfn << PAGE_SHIFT) - 1; res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (reserved) { + res->name = "Reserved"; + } else { + res->name = "System RAM"; + res->flags |= IORESOURCE_SYSRAM; + } if (insert_resource(&iomem_resource, res)) { kfree(res); return NULL; diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 20d52a98e171..6c0abaacec33 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -208,7 +208,7 @@ void online_secondary(void) /* Set up tile-timer clock-event device on this cpu */ setup_tile_timer(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } int __cpu_up(unsigned int cpu, struct task_struct *tidle) diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index e13d41c392ae..f878bec23576 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -34,21 +34,18 @@ struct page; #if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) -typedef struct { unsigned long pte_low, pte_high; } pte_t; +typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; -#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32)) - -#define pte_get_bits(pte, bits) ((pte).pte_low & (bits)) -#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits)) -#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits)) -#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \ - smp_wmb(); \ - (to).pte_low = (from).pte_low; }) -#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high) -#define pte_set_val(pte, phys, prot) \ - ({ (pte).pte_high = (phys) >> 32; \ - (pte).pte_low = (phys) | pgprot_val(prot); }) +#define pte_val(p) ((p).pte) + +#define pte_get_bits(p, bits) ((p).pte & (bits)) +#define pte_set_bits(p, bits) ((p).pte |= (bits)) +#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) +#define pte_copy(to, from) ({ (to).pte = (from).pte; }) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_set_val(p, phys, prot) \ + ({ (p).pte = (phys) | pgprot_val(prot); }) #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 9bdf67a092a5..b60a9f8cda75 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -12,6 +12,7 @@ #include <skas.h> void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); static void kill_off_processes(void) { diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index fc8be0e3a4ff..57acbd67d85d 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -69,7 +69,7 @@ void do_signal(struct pt_regs *regs) struct ksignal ksig; int handled_sig = 0; - if (get_signal(&ksig)) { + while (get_signal(&ksig)) { handled_sig = 1; /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 3fa317f96122..c2bffa5614a4 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -72,13 +72,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -211,7 +211,7 @@ request_standard_resources(struct meminfo *mi) res->name = "System RAM"; res->start = mi->bank[i].start; res->end = mi->bank[i].start + mi->bank[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 1538562cc720..eb3abf8ac44e 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,6 +1,7 @@ - obj-y += entry/ +obj-$(CONFIG_PERF_EVENTS) += events/ + obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9af2e6338400..8f2e6659281b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DEBUG_RODATA + def_bool y + config PGTABLE_LEVELS int default 4 if X86_64 @@ -475,6 +478,7 @@ config X86_UV depends on X86_64 depends on X86_EXTENDED_PLATFORM depends on NUMA + depends on EFI depends on X86_X2APIC depends on PCI ---help--- @@ -777,8 +781,8 @@ config HPET_TIMER HPET is the next generation timer replacing legacy 8254s. The HPET provides a stable time base on SMP systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. You can find the HPET spec at - <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>. + as it is off-chip. The interface used is documented + in the HPET spec, revision 1. You can safely choose Y here. However, HPET will only be activated if the platform and the BIOS support this feature. @@ -1159,22 +1163,23 @@ config MICROCODE bool "CPU microcode loading support" default y depends on CPU_SUP_AMD || CPU_SUP_INTEL - depends on BLK_DEV_INITRD select FW_LOADER ---help--- - If you say Y here, you will be able to update the microcode on - certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, - Xeon etc. The AMD support is for families 0x10 and later. You will - obviously need the actual microcode binary data itself which is not - shipped with the Linux kernel. - - This option selects the general module only, you need to select - at least one vendor specific module as well. - - To compile this driver as a module, choose M here: the module - will be called microcode. + Intel and AMD processors. The Intel support is for the IA32 family, + e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The + AMD support is for families 0x10 and later. You will obviously need + the actual microcode binary data itself which is not shipped with + the Linux kernel. + + The preferred method to load microcode from a detached initrd is described + in Documentation/x86/early-microcode.txt. For that you need to enable + CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the + initrd for microcode blobs. + + In addition, you can build-in the microcode into the kernel. For that you + need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode + to the CONFIG_EXTRA_FIRMWARE config option. config MICROCODE_INTEL bool "Intel microcode loading support" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9b18ed97a8a2..67eec55093a5 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -74,28 +74,16 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_RODATA - bool "Write protect kernel read-only data structures" - default y - depends on DEBUG_KERNEL - ---help--- - Mark the kernel read-only data as write-protected in the pagetables, - in order to catch accidental (and incorrect) writes to such const - data. This is recommended so that we can catch kernel bugs sooner. - If in doubt, say "Y". - config DEBUG_RODATA_TEST - bool "Testcase for the DEBUG_RODATA feature" - depends on DEBUG_RODATA + bool "Testcase for the marking rodata read-only" default y ---help--- - This option enables a testcase for the DEBUG_RODATA - feature as well as for the change_page_attr() infrastructure. + This option enables a testcase for the setting rodata read-only + as well as for the change_page_attr() infrastructure. If in doubt, say "N" config DEBUG_WX bool "Warn on W+X mappings at boot" - depends on DEBUG_RODATA select X86_PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. @@ -350,16 +338,6 @@ config DEBUG_IMR_SELFTEST If unsure say N here. -config X86_DEBUG_STATIC_CPU_HAS - bool "Debug alternatives" - depends on DEBUG_KERNEL - ---help--- - This option causes additional code to be generated which - fails if static_cpu_has() is used before alternatives have - run. - - If unsure, say N. - config X86_DEBUG_FPU bool "Debug the x86 FPU code" depends on DEBUG_KERNEL diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h index ea97697e51e4..4cb404fd45ce 100644 --- a/arch/x86/boot/cpuflags.h +++ b/arch/x86/boot/cpuflags.h @@ -1,7 +1,7 @@ #ifndef BOOT_CPUFLAGS_H #define BOOT_CPUFLAGS_H -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/processor-flags.h> struct cpu_features { diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 637097e66a62..f72498dc90d2 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -17,7 +17,7 @@ #include "../include/asm/required-features.h" #include "../include/asm/disabled-features.h" -#include "../include/asm/cpufeature.h" +#include "../include/asm/cpufeatures.h" #include "../kernel/cpu/capflags.c" int main(void) diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a7661c430cd9..0702d2531bc7 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -49,7 +49,6 @@ typedef unsigned int u32; /* This must be large enough to hold the entire setup */ u8 buf[SETUP_SECT_MAX*512]; -int is_big_kernel; #define PECOFF_RELOC_RESERVE 0x20 diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 028be48c8839..e25a1630320c 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_FRAME_WARN=2048 +CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S index 712b13047b41..3a33124e9112 100644 --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S @@ -157,7 +157,9 @@ ENTRY(chacha20_4block_xor_ssse3) # done with the slightly better performing SSSE3 byte shuffling, # 7/12-bit word rotation uses traditional shift+OR. - sub $0x40,%rsp + mov %rsp,%r11 + sub $0x80,%rsp + and $~63,%rsp # x0..15[0-3] = s0..3[0..3] movq 0x00(%rdi),%xmm1 @@ -620,6 +622,6 @@ ENTRY(chacha20_4block_xor_ssse3) pxor %xmm1,%xmm15 movdqu %xmm15,0xf0(%rsi) - add $0x40,%rsp + mov %r11,%rsp ret ENDPROC(chacha20_4block_xor_ssse3) diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 07d2c6c86a54..27226df3f7d8 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -33,7 +33,7 @@ #include <linux/crc32.h> #include <crypto/internal/hash.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/fpu/api.h> diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 0e9871693f24..0857b1a1de3b 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -30,7 +30,7 @@ #include <linux/kernel.h> #include <crypto/internal/hash.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/fpu/internal.h> diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index a3fcfc97a311..cd4df9322501 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -30,7 +30,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <asm/fpu/api.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index e32206e09868..9a9e5884066c 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with .byte 0xf1 .endm -#else /* CONFIG_X86_64 */ - -/* - * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These - * are different from the entry_32.S versions in not changing the segment - * registers. So only suitable for in kernel use, not when transitioning - * from or to user space. The resulting stack frame is not a standard - * pt_regs frame. The main use case is calling C code from assembler - * when all the registers need to be preserved. - */ - - .macro SAVE_ALL - pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - .endm - - .macro RESTORE_ALL - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - .endm - #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03663740c866..e79d93d44ecd 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include <asm/traps.h> #include <asm/vdso.h> #include <asm/uaccess.h> +#include <asm/cpufeature.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void) CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit(); } +#else +static inline void enter_from_user_mode(void) {} #endif static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) @@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; -#ifdef CONFIG_CONTEXT_TRACKING - /* - * If TIF_NOHZ is set, we are required to call user_exit() before - * doing anything that could touch RCU. - */ - if (work & _TIF_NOHZ) { - enter_from_user_mode(); - work &= ~_TIF_NOHZ; - } -#endif - #ifdef CONFIG_SECCOMP /* * Do seccomp first -- it should minimize exposure of other @@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state. If we entered on the slow path, TF was already set. - */ - if (work & _TIF_SINGLESTEP) - regs->flags |= X86_EFLAGS_TF; - #ifdef CONFIG_SECCOMP /* * Call seccomp_phase2 before running the other hooks so that @@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* Called with IRQs disabled. */ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) { + struct thread_info *ti = pt_regs_to_thread_info(regs); u32 cached_flags; if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) @@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) lockdep_sys_exit(); - cached_flags = - READ_ONCE(pt_regs_to_thread_info(regs)->flags); + cached_flags = READ_ONCE(ti->flags); if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) exit_to_usermode_loop(regs, cached_flags); +#ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before + * returning to user mode. We need to clear it *after* signal + * handling, because syscall restart has a fixup for compat + * syscalls. The fixup is exercised by the ptrace_syscall_32 + * selftest. + */ + ti->status &= ~TS_COMPAT; +#endif + user_enter(); } @@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) syscall_slow_exit_work(regs, cached_flags); -#ifdef CONFIG_COMPAT + local_irq_disable(); + prepare_exit_to_usermode(regs); +} + +#ifdef CONFIG_X86_64 +__visible void do_syscall_64(struct pt_regs *regs) +{ + struct thread_info *ti = pt_regs_to_thread_info(regs); + unsigned long nr = regs->orig_ax; + + enter_from_user_mode(); + local_irq_enable(); + + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) + nr = syscall_trace_enter(regs); + /* - * Compat syscalls set TS_COMPAT. Make sure we clear it before - * returning to user mode. + * NB: Native and x32 syscalls are dispatched from the same + * table. The only functional difference is the x32 bit in + * regs->orig_ax, which changes the behavior of some syscalls. */ - ti->status &= ~TS_COMPAT; -#endif + if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { + regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + regs->di, regs->si, regs->dx, + regs->r10, regs->r8, regs->r9); + } - local_irq_disable(); - prepare_exit_to_usermode(regs); + syscall_return_slowpath(regs); } +#endif #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) /* - * Does a 32-bit syscall. Called with IRQs on and does all entry and - * exit work and returns with IRQs off. This function is extremely hot - * in workloads that use it, and it's usually called from + * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does + * all entry and exit work and returns with IRQs off. This function is + * extremely hot in workloads that use it, and it's usually called from * do_fast_syscall_32, so forcibly inline it to improve performance. */ -#ifdef CONFIG_X86_32 -/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */ -__visible -#else -/* 64-bit kernels use do_syscall_32_irqs_off() instead. */ -static -#endif -__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned int nr = (unsigned int)regs->orig_ax; @@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) syscall_return_slowpath(regs); } -#ifdef CONFIG_X86_64 -/* Handles INT80 on 64-bit kernels */ -__visible void do_syscall_32_irqs_off(struct pt_regs *regs) +/* Handles int $0x80 */ +__visible void do_int80_syscall_32(struct pt_regs *regs) { + enter_from_user_mode(); local_irq_enable(); do_syscall_32_irqs_on(regs); } -#endif /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ __visible long do_fast_syscall_32(struct pt_regs *regs) @@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) */ regs->ip = landing_pad; - /* - * Fetch EBP from where the vDSO stashed it. - * - * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! - */ + enter_from_user_mode(); + local_irq_enable(); + + /* Fetch EBP from where the vDSO stashed it. */ if ( #ifdef CONFIG_X86_64 /* @@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ local_irq_disable(); regs->ax = -EFAULT; -#ifdef CONFIG_CONTEXT_TRACKING - enter_from_user_mode(); -#endif prepare_exit_to_usermode(regs); return 0; /* Keep it simple: use IRET. */ } diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 77d8c5112900..10868aa734dc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -40,7 +40,7 @@ #include <asm/processor-flags.h> #include <asm/ftrace.h> #include <asm/irq_vectors.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/smap.h> @@ -287,7 +287,58 @@ need_resched: END(resume_kernel) #endif - # SYSENTER call handler stub +GLOBAL(__begin_SYSENTER_singlestep_region) +/* + * All code from here through __end_SYSENTER_singlestep_region is subject + * to being single-stepped if a user program sets TF and executes SYSENTER. + * There is absolutely nothing that we can do to prevent this from happening + * (thanks Intel!). To keep our handling of this situation as simple as + * possible, we handle TF just like AC and NT, except that our #DB handler + * will ignore all of the single-step traps generated in this range. + */ + +#ifdef CONFIG_XEN +/* + * Xen doesn't set %esp to be precisely what the normal SYSENTER + * entry point expects, so fix it up before using the normal path. + */ +ENTRY(xen_sysenter_target) + addl $5*4, %esp /* remove xen-provided frame */ + jmp sysenter_past_esp +#endif + +/* + * 32-bit SYSENTER entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * if X86_FEATURE_SEP is available. This is the preferred system call + * entry on 32-bit systems. + * + * The SYSENTER instruction, in principle, should *only* occur in the + * vDSO. In practice, a small number of Android devices were shipped + * with a copy of Bionic that inlined a SYSENTER instruction. This + * never happened in any of Google's Bionic versions -- it only happened + * in a narrow range of Intel-provided versions. + * + * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. + * IF and VM in RFLAGS are cleared (IOW: interrupts are off). + * SYSENTER does not save anything on the stack, + * and does not save old EIP (!!!), ESP, or EFLAGS. + * + * To avoid losing track of EFLAGS.VM (and thus potentially corrupting + * user and/or vm86 state), we explicitly disable the SYSENTER + * instruction in vm86 mode by reprogramming the MSRs. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp user stack + * 0(%ebp) arg6 + */ ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp), %esp sysenter_past_esp: @@ -301,6 +352,29 @@ sysenter_past_esp: SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ /* + * SYSENTER doesn't filter flags, so we need to clear NT, AC + * and TF ourselves. To save a few cycles, we can check whether + * either was set instead of doing an unconditional popfq. + * This needs to happen before enabling interrupts so that + * we don't get preempted with NT set. + * + * If TF is set, we will single-step all the way to here -- do_debug + * will ignore all the traps. (Yes, this is slow, but so is + * single-stepping in general. This allows us to avoid having + * a more complicated code to handle the case where a user program + * forces us to single-step through the SYSENTER entry code.) + * + * NB.: .Lsysenter_fix_flags is a label with the code under it moved + * out-of-line as an optimization: NT is unlikely to be set in the + * majority of the cases and instead of polluting the I$ unnecessarily, + * we're keeping that code behind a branch which will predict as + * not-taken and therefore its instructions won't be fetched. + */ + testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: + + /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. */ @@ -326,6 +400,15 @@ sysenter_past_esp: popl %eax /* pt_regs->ax */ /* + * Restore all flags except IF. (We restore IF separately because + * STI gives a one-instruction window in which we won't be interrupted, + * whereas POPF does not.) + */ + addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ + btr $X86_EFLAGS_IF_BIT, (%esp) + popfl + + /* * Return back to the vDSO, which will pop ecx and edx. * Don't bother with DS and ES (they already contain __USER_DS). */ @@ -338,28 +421,63 @@ sysenter_past_esp: .popsection _ASM_EXTABLE(1b, 2b) PTGS_TO_GS_EX + +.Lsysenter_fix_flags: + pushl $X86_EFLAGS_FIXED + popfl + jmp .Lsysenter_flags_fixed +GLOBAL(__end_SYSENTER_singlestep_region) ENDPROC(entry_SYSENTER_32) - # system call handler stub +/* + * 32-bit legacy system call entry. + * + * 32-bit x86 Linux system calls traditionally used the INT $0x80 + * instruction. INT $0x80 lands here. + * + * This entry point can be used by any 32-bit perform system calls. + * Instances of INT $0x80 can be found inline in various programs and + * libraries. It is also used by the vDSO's __kernel_vsyscall + * fallback for hardware that doesn't support a faster entry method. + * Restarted 32-bit system calls also fall back to INT $0x80 + * regardless of what instruction was originally used to do the system + * call. (64-bit programs can use INT $0x80 as well, but they can + * only run on 64-bit kernels and therefore land in + * entry_INT80_compat.) + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp arg6 + */ ENTRY(entry_INT80_32) ASM_CLAC pushl %eax /* pt_regs->orig_ax */ SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ /* - * User mode is traced as though IRQs are on. Unlike the 64-bit - * case, INT80 is a trap gate on 32-bit kernels, so interrupts - * are already on (unless user code is messing around with iopl). + * User mode is traced as though IRQs are on, and the interrupt gate + * turned them off. */ + TRACE_IRQS_OFF movl %esp, %eax - call do_syscall_32_irqs_on + call do_int80_syscall_32 .Lsyscall_32_done: restore_all: TRACE_IRQS_IRET restore_all_notrace: #ifdef CONFIG_X86_ESPFIX32 + ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS /* * Warning: PT_OLDSS(%esp) contains the wrong/random values if we @@ -386,19 +504,6 @@ ENTRY(iret_exc ) #ifdef CONFIG_X86_ESPFIX32 ldt_ss: -#ifdef CONFIG_PARAVIRT - /* - * The kernel can't run on a non-flat stack if paravirt mode - * is active. Rather than try to fixup the high bits of - * ESP, bypass this code entirely. This may break DOSemu - * and/or Wine support in a paravirt VM, although the option - * is still available to implement the setting of the high - * 16-bits in the INTERRUPT_RETURN paravirt-op. - */ - cmpl $0, pv_info+PARAVIRT_enabled - jne restore_nocheck -#endif - /* * Setup and switch to ESPFIX stack * @@ -631,14 +736,6 @@ ENTRY(spurious_interrupt_bug) END(spurious_interrupt_bug) #ifdef CONFIG_XEN -/* - * Xen doesn't set %esp to be precisely what the normal SYSENTER - * entry point expects, so fix it up before using the normal path. - */ -ENTRY(xen_sysenter_target) - addl $5*4, %esp /* remove xen-provided frame */ - jmp sysenter_past_esp - ENTRY(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL @@ -938,51 +1035,48 @@ error_code: jmp ret_from_exception END(page_fault) -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -.macro FIX_STACK offset ok label - cmpw $__KERNEL_CS, 4(%esp) - jne \ok -\label: - movl TSS_sysenter_sp0 + \offset(%esp), %esp - pushfl - pushl $__KERNEL_CS - pushl $sysenter_past_esp -.endm - ENTRY(debug) + /* + * #DB can happen at the first instruction of + * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this + * happens, then we will be running on a very small stack. We + * need to detect this condition and switch to the thread + * stack before calling any C code at all. + * + * If you edit this code, keep in mind that NMIs can happen in here. + */ ASM_CLAC - cmpl $entry_SYSENTER_32, (%esp) - jne debug_stack_correct - FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn -debug_stack_correct: pushl $-1 # mark this as an int SAVE_ALL - TRACE_IRQS_OFF xorl %edx, %edx # error code 0 movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ + PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Ldebug_from_sysenter_stack + + TRACE_IRQS_OFF + call do_debug + jmp ret_from_exception + +.Ldebug_from_sysenter_stack: + /* We're on the SYSENTER stack. Switch off. */ + movl %esp, %ebp + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + TRACE_IRQS_OFF call do_debug + movl %ebp, %esp jmp ret_from_exception END(debug) /* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. + * NMI is doubly nasty. It can happen on the first instruction of + * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning + * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 + * switched stacks. We handle both conditions by simply checking whether we + * interrupted kernel code running on the SYSENTER stack. */ ENTRY(nmi) ASM_CLAC @@ -993,41 +1087,32 @@ ENTRY(nmi) popl %eax je nmi_espfix_stack #endif - cmpl $entry_SYSENTER_32, (%esp) - je nmi_stack_fixup - pushl %eax - movl %esp, %eax - /* - * Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1), %eax - cmpl $(THREAD_SIZE-20), %eax - popl %eax - jae nmi_stack_correct - cmpl $entry_SYSENTER_32, 12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - pushl %eax + + pushl %eax # pt_regs->orig_ax SAVE_ALL xorl %edx, %edx # zero error code movl %esp, %eax # pt_regs pointer + + /* Are we currently on the SYSENTER stack? */ + PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) + subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ + cmpl $SIZEOF_SYSENTER_stack, %ecx + jb .Lnmi_from_sysenter_stack + + /* Not on SYSENTER stack. */ call do_nmi jmp restore_all_notrace -nmi_stack_fixup: - FIX_STACK 12, nmi_stack_correct, 1 - jmp nmi_stack_correct - -nmi_debug_stack_check: - cmpw $__KERNEL_CS, 16(%esp) - jne nmi_stack_correct - cmpl $debug, (%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn, (%esp) - ja nmi_stack_correct - FIX_STACK 24, nmi_stack_correct, 1 - jmp nmi_stack_correct +.Lnmi_from_sysenter_stack: + /* + * We're on the SYSENTER stack. Switch off. No one (not even debug) + * is using the thread stack right now, so it's safe for us to use it. + */ + movl %esp, %ebp + movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + call do_nmi + movl %ebp, %esp + jmp restore_all_notrace #ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9d34d3cfceb6..858b555e274b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64) /* * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. * + * This is the only entry point used for 64-bit system calls. The + * hardware interface is reasonably well designed and the register to + * argument mapping Linux uses fits well with the registers that are + * available when SYSCALL is used. + * + * SYSCALL instructions can be found inlined in libc implementations as + * well as some other programs and libraries. There are also a handful + * of SYSCALL instructions in the vDSO used, for example, as a + * clock_gettimeofday fallback. + * * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, * then loads new ss, cs, and rip from previously programmed MSRs. * rflags gets masked by a value from another MSR (so CLD and CLAC @@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + TRACE_IRQS_OFF + /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ - /* - * Re-enable interrupts. - * We use 'rsp_scratch' as a scratch space, hence irq-off block above - * must execute atomically in the face of possible interrupt-driven - * task preemption. We must enable interrupts only after we're done - * with using rsp_scratch: - */ - ENABLE_INTERRUPTS(CLBR_NONE) pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ @@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r11 /* pt_regs->r11 */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz tracesys + /* + * If we need to do entry work or if we guess we'll need to do + * exit work, go straight to the slow path. + */ + testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz entry_SYSCALL64_slow_path + entry_SYSCALL_64_fastpath: + /* + * Easy case: enable interrupts and issue the syscall. If the syscall + * needs pt_regs, we'll call a stub that disables interrupts again + * and jumps to the slow path. + */ + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) #if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max, %rax #else @@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath: #endif ja 1f /* return -ENOSYS (already in pt_regs->ax) */ movq %r10, %rcx + + /* + * This call instruction is handled specially in stub_ptregs_64. + * It might end up jumping to the slow path. If it jumps, RAX + * and all argument registers are clobbered. + */ call *sys_call_table(, %rax, 8) +.Lentry_SYSCALL_64_after_fastpath_call: + movq %rax, RAX(%rsp) 1: -/* - * Syscall return path ending with SYSRET (fast path). - * Has incompletely filled pt_regs. - */ - LOCKDEP_SYS_EXIT - /* - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, - * it is too small to ever cause noticeable irq latency. - */ - DISABLE_INTERRUPTS(CLBR_NONE) /* - * We must check ti flags with interrupts (or at least preemption) - * off because we must *never* return to userspace without - * processing exit work that is enqueued if we're preempted here. - * In particular, returning to userspace with any of the one-shot - * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is - * very bad. + * If we get here, then we know that pt_regs is clean for SYSRET64. + * If we see that no exit work is required (which we are required + * to check with IRQs off), then we can go straight to SYSRET64. */ + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ + jnz 1f - RESTORE_C_REGS_EXCEPT_RCX_R11 + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON /* user mode is traced as IRQs on */ movq RIP(%rsp), %rcx movq EFLAGS(%rsp), %r11 + RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp), %rsp - /* - * 64-bit SYSRET restores rip from rcx, - * rflags from r11 (but RF and VM bits are forced to 0), - * cs and ss are loaded from MSRs. - * Restoration of rflags re-enables interrupts. - * - * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss - * descriptor is not reinitialized. This means that we should - * avoid SYSRET with SS == NULL, which could happen if we schedule, - * exit the kernel, and re-enter using an interrupt vector. (All - * interrupt entries on x86_64 set SS to NULL.) We prevent that - * from happening by reloading SS in __switch_to. (Actually - * detecting the failure in 64-bit userspace is tricky but can be - * done.) - */ USERGS_SYSRET64 -GLOBAL(int_ret_from_sys_call_irqs_off) +1: + /* + * The fast path looked good when we started, but something changed + * along the way and we need to switch to the slow path. Calling + * raise(3) will trigger this, for example. IRQs are off. + */ TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - jmp int_ret_from_sys_call - - /* Do syscall entry tracing */ -tracesys: - movq %rsp, %rdi - movl $AUDIT_ARCH_X86_64, %esi - call syscall_trace_enter_phase1 - test %rax, %rax - jnz tracesys_phase2 /* if needed, run the slow path */ - RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ - movq ORIG_RAX(%rsp), %rax - jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ - -tracesys_phase2: SAVE_EXTRA_REGS movq %rsp, %rdi - movl $AUDIT_ARCH_X86_64, %esi - movq %rax, %rdx - call syscall_trace_enter_phase2 - - /* - * Reload registers from stack in case ptrace changed them. - * We don't reload %rax because syscall_trace_entry_phase2() returned - * the value it wants us to use in the table lookup. - */ - RESTORE_C_REGS_EXCEPT_RAX - RESTORE_EXTRA_REGS -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx /* fixup for C */ - call *sys_call_table(, %rax, 8) - movq %rax, RAX(%rsp) -1: - /* Use IRET because user could have changed pt_regs->foo */ + call syscall_return_slowpath /* returns with IRQs disabled */ + jmp return_from_SYSCALL_64 -/* - * Syscall return path ending with IRET. - * Has correct iret frame. - */ -GLOBAL(int_ret_from_sys_call) +entry_SYSCALL64_slow_path: + /* IRQs are off. */ SAVE_EXTRA_REGS movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ + call do_syscall_64 /* returns with IRQs disabled */ + +return_from_SYSCALL_64: RESTORE_EXTRA_REGS TRACE_IRQS_IRETQ /* we're about to change IF */ @@ -355,83 +324,45 @@ opportunistic_sysret_failed: jmp restore_c_regs_and_iret END(entry_SYSCALL_64) +ENTRY(stub_ptregs_64) + /* + * Syscalls marked as needing ptregs land here. + * If we are on the fast path, we need to save the extra regs, + * which we achieve by trying again on the slow path. If we are on + * the slow path, the extra regs are already saved. + * + * RAX stores a pointer to the C function implementing the syscall. + * IRQs are on. + */ + cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) + jne 1f - .macro FORK_LIKE func -ENTRY(stub_\func) - SAVE_EXTRA_REGS 8 - jmp sys_\func -END(stub_\func) - .endm - - FORK_LIKE clone - FORK_LIKE fork - FORK_LIKE vfork - -ENTRY(stub_execve) - call sys_execve -return_from_execve: - testl %eax, %eax - jz 1f - /* exec failed, can use fast SYSRET code path in this case */ - ret -1: - /* must use IRET code path (pt_regs->cs may have changed) */ - addq $8, %rsp - ZERO_EXTRA_REGS - movq %rax, RAX(%rsp) - jmp int_ret_from_sys_call -END(stub_execve) -/* - * Remaining execve stubs are only 7 bytes long. - * ENTRY() often aligns to 16 bytes, which in this case has no benefits. - */ - .align 8 -GLOBAL(stub_execveat) - call sys_execveat - jmp return_from_execve -END(stub_execveat) - -#if defined(CONFIG_X86_X32_ABI) - .align 8 -GLOBAL(stub_x32_execve) - call compat_sys_execve - jmp return_from_execve -END(stub_x32_execve) - .align 8 -GLOBAL(stub_x32_execveat) - call compat_sys_execveat - jmp return_from_execve -END(stub_x32_execveat) -#endif - -/* - * sigreturn is special because it needs to restore all registers on return. - * This cannot be done with SYSRET, so use the IRET return path instead. - */ -ENTRY(stub_rt_sigreturn) /* - * SAVE_EXTRA_REGS result is not normally needed: - * sigreturn overwrites all pt_regs->GPREGS. - * But sigreturn can fail (!), and there is no easy way to detect that. - * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error, - * we SAVE_EXTRA_REGS here. + * Called from fast path -- disable IRQs again, pop return address + * and jump to slow path */ - SAVE_EXTRA_REGS 8 - call sys_rt_sigreturn -return_from_stub: - addq $8, %rsp - RESTORE_EXTRA_REGS - movq %rax, RAX(%rsp) - jmp int_ret_from_sys_call -END(stub_rt_sigreturn) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + popq %rax + jmp entry_SYSCALL64_slow_path -#ifdef CONFIG_X86_X32_ABI -ENTRY(stub_x32_rt_sigreturn) - SAVE_EXTRA_REGS 8 - call sys32_x32_rt_sigreturn - jmp return_from_stub -END(stub_x32_rt_sigreturn) -#endif +1: + /* Called from C */ + jmp *%rax /* called from C */ +END(stub_ptregs_64) + +.macro ptregs_stub func +ENTRY(ptregs_\func) + leaq \func(%rip), %rax + jmp stub_ptregs_64 +END(ptregs_\func) +.endm + +/* Instantiate ptregs_stub for each ptregs-using syscall */ +#define __SYSCALL_64_QUAL_(sym) +#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym +#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) +#include <asm/syscalls_64.h> /* * A newly forked process directly context switches into this address. @@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn) * rdi: prev task we switched from */ ENTRY(ret_from_fork) - LOCK ; btr $TIF_FORK, TI_flags(%r8) pushq $0x0002 @@ -447,28 +377,32 @@ ENTRY(ret_from_fork) call schedule_tail /* rdi: 'prev' task parameter */ - RESTORE_EXTRA_REGS - testb $3, CS(%rsp) /* from kernel_thread? */ + jnz 1f /* - * By the time we get here, we have no idea whether our pt_regs, - * ti flags, and ti status came from the 64-bit SYSCALL fast path, - * the slow path, or one of the 32-bit compat paths. - * Use IRET code path to return, since it can safely handle - * all of the above. + * We came from kernel_thread. This code path is quite twisted, and + * someone should clean it up. + * + * copy_thread_tls stashes the function pointer in RBX and the + * parameter to be passed in RBP. The called function is permitted + * to call do_execve and thereby jump to user mode. */ - jnz int_ret_from_sys_call + movq RBP(%rsp), %rdi + call *RBX(%rsp) + movl $0, RAX(%rsp) /* - * We came from kernel_thread - * nb: we depend on RESTORE_EXTRA_REGS above + * Fall through as though we're exiting a syscall. This makes a + * twisted sort of sense if we just called do_execve. */ - movq %rbp, %rdi - call *%rbx - movl $0, RAX(%rsp) - RESTORE_EXTRA_REGS - jmp int_ret_from_sys_call + +1: + movq %rsp, %rdi + call syscall_return_slowpath /* returns with IRQs disabled */ + TRACE_IRQS_ON /* user mode is traced as IRQS on */ + SWAPGS + jmp restore_regs_and_iret END(ret_from_fork) /* diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index ff1c6d61f332..847f2f0c31e5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -19,12 +19,21 @@ .section .entry.text, "ax" /* - * 32-bit SYSENTER instruction entry. + * 32-bit SYSENTER entry. * - * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. - * IF and VM in rflags are cleared (IOW: interrupts are off). + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * on 64-bit kernels running on Intel CPUs. + * + * The SYSENTER instruction, in principle, should *only* occur in the + * vDSO. In practice, a small number of Android devices were shipped + * with a copy of Bionic that inlined a SYSENTER instruction. This + * never happened in any of Google's Bionic versions -- it only happened + * in a narrow range of Intel-provided versions. + * + * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs. + * IF and VM in RFLAGS are cleared (IOW: interrupts are off). * SYSENTER does not save anything on the stack, - * and does not save old rip (!!!) and rflags. + * and does not save old RIP (!!!), RSP, or RFLAGS. * * Arguments: * eax system call number @@ -35,10 +44,6 @@ * edi arg5 * ebp user stack * 0(%ebp) arg6 - * - * This is purely a fast path. For anything complicated we use the int 0x80 - * path below. We set up a complete hardware stack frame to share code - * with the int 0x80 path. */ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ @@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat) */ pushfq /* pt_regs->flags (except IF = 0) */ orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ - ASM_CLAC /* Clear AC after saving FLAGS */ - pushq $__USER32_CS /* pt_regs->cs */ xorq %r8,%r8 pushq %r8 /* pt_regs->ip = 0 (placeholder) */ @@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat) cld /* - * Sysenter doesn't filter flags, so we need to clear NT + * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether - * NT was set instead of doing an unconditional popfq. + * either was set instead of doing an unconditional popfq. * This needs to happen before enabling interrupts so that * we don't get preempted with NT set. * + * If TF is set, we will single-step all the way to here -- do_debug + * will ignore all the traps. (Yes, this is slow, but so is + * single-stepping in general. This allows us to avoid having + * a more complicated code to handle the case where a user program + * forces us to single-step through the SYSENTER entry code.) + * * NB.: .Lsysenter_fix_flags is a label with the code under it moved * out-of-line as an optimization: NT is unlikely to be set in the * majority of the cases and instead of polluting the I$ unnecessarily, * we're keeping that code behind a branch which will predict as * not-taken and therefore its instructions won't be fetched. */ - testl $X86_EFLAGS_NT, EFLAGS(%rsp) + testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: @@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat) pushq $X86_EFLAGS_FIXED popfq jmp .Lsysenter_flags_fixed +GLOBAL(__end_entry_SYSENTER_compat) ENDPROC(entry_SYSENTER_compat) /* - * 32-bit SYSCALL instruction entry. + * 32-bit SYSCALL entry. + * + * 32-bit system calls through the vDSO's __kernel_vsyscall enter here + * on 64-bit kernels running on AMD CPUs. + * + * The SYSCALL instruction, in principle, should *only* occur in the + * vDSO. In practice, it appears that this really is the case. + * As evidence: + * + * - The calling convention for SYSCALL has changed several times without + * anyone noticing. * - * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, - * then loads new ss, cs, and rip from previously programmed MSRs. - * rflags gets masked by a value from another MSR (so CLD and CLAC - * are not needed). SYSCALL does not save anything on the stack - * and does not change rsp. + * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything + * user task that did SYSCALL without immediately reloading SS + * would randomly crash. * - * Note: rflags saving+masking-with-MSR happens only in Long mode + * - Most programmers do not directly target AMD CPUs, and the 32-bit + * SYSCALL instruction does not exist on Intel CPUs. Even on AMD + * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels + * because the SYSCALL instruction in legacy/native 32-bit mode (as + * opposed to compat mode) is sufficiently poorly designed as to be + * essentially unusable. + * + * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves + * RFLAGS to R11, then loads new SS, CS, and RIP from previously + * programmed MSRs. RFLAGS gets masked by a value from another MSR + * (so CLD and CLAC are not needed). SYSCALL does not save anything on + * the stack and does not change RSP. + * + * Note: RFLAGS saving+masking-with-MSR happens only in Long mode * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). - * Don't get confused: rflags saving+masking depends on Long Mode Active bit + * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). * @@ -236,7 +267,21 @@ sysret32_from_system_call: END(entry_SYSCALL_compat) /* - * Emulated IA32 system calls via int 0x80. + * 32-bit legacy system call entry. + * + * 32-bit x86 Linux system calls traditionally used the INT $0x80 + * instruction. INT $0x80 lands here. + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. * * Arguments: * eax system call number @@ -245,22 +290,14 @@ END(entry_SYSCALL_compat) * edx arg3 * esi arg4 * edi arg5 - * ebp arg6 (note: not saved in the stack frame, should not be touched) - * - * Notes: - * Uses the same stack frame as the x86-64 version. - * All registers except eax must be saved (but ptrace may violate that). - * Arguments are zero extended. For system calls that want sign extension and - * take long arguments a wrapper is needed. Most calls can just be called - * directly. - * Assumes it is only called from user space and entered with interrupts off. + * ebp arg6 */ - ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. */ PARAVIRT_ADJUST_EXCEPTION_FRAME + ASM_CLAC /* Do this early to minimize exposure */ SWAPGS /* @@ -299,7 +336,7 @@ ENTRY(entry_INT80_compat) TRACE_IRQS_OFF movq %rsp, %rdi - call do_syscall_32_irqs_off + call do_int80_syscall_32 .Lsyscall_32_done: /* Go back to user mode. */ diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 9a6649857106..8f895ee13a1c 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -6,17 +6,11 @@ #include <asm/asm-offsets.h> #include <asm/syscall.h> -#ifdef CONFIG_IA32_EMULATION -#define SYM(sym, compat) compat -#else -#define SYM(sym, compat) sym -#endif - -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 -#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), +#define __SYSCALL_I386(nr, sym, qual) [nr] = sym, extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 41283d22be7a..9dbc5abb6162 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -6,19 +6,14 @@ #include <asm/asm-offsets.h> #include <asm/syscall.h> -#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) +#define __SYSCALL_64_QUAL_(sym) sym +#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat) -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif - -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include <asm/syscalls_64.h> #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, compat) [nr] = sym, +#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index dc1040a50bdc..2e5b565adacc 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -21,7 +21,7 @@ 12 common brk sys_brk 13 64 rt_sigaction sys_rt_sigaction 14 common rt_sigprocmask sys_rt_sigprocmask -15 64 rt_sigreturn stub_rt_sigreturn +15 64 rt_sigreturn sys_rt_sigreturn/ptregs 16 64 ioctl sys_ioctl 17 common pread64 sys_pread64 18 common pwrite64 sys_pwrite64 @@ -62,10 +62,10 @@ 53 common socketpair sys_socketpair 54 64 setsockopt sys_setsockopt 55 64 getsockopt sys_getsockopt -56 common clone stub_clone -57 common fork stub_fork -58 common vfork stub_vfork -59 64 execve stub_execve +56 common clone sys_clone/ptregs +57 common fork sys_fork/ptregs +58 common vfork sys_vfork/ptregs +59 64 execve sys_execve/ptregs 60 common exit sys_exit 61 common wait4 sys_wait4 62 common kill sys_kill @@ -178,7 +178,7 @@ 169 common reboot sys_reboot 170 common sethostname sys_sethostname 171 common setdomainname sys_setdomainname -172 common iopl sys_iopl +172 common iopl sys_iopl/ptregs 173 common ioperm sys_ioperm 174 64 create_module 175 common init_module sys_init_module @@ -328,7 +328,7 @@ 319 common memfd_create sys_memfd_create 320 common kexec_file_load sys_kexec_file_load 321 common bpf sys_bpf -322 64 execveat stub_execveat +322 64 execveat sys_execveat/ptregs 323 common userfaultfd sys_userfaultfd 324 common membarrier sys_membarrier 325 common mlock2 sys_mlock2 @@ -339,14 +339,14 @@ # for native 64-bit operation. # 512 x32 rt_sigaction compat_sys_rt_sigaction -513 x32 rt_sigreturn stub_x32_rt_sigreturn +513 x32 rt_sigreturn sys32_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl 515 x32 readv compat_sys_readv 516 x32 writev compat_sys_writev 517 x32 recvfrom compat_sys_recvfrom 518 x32 sendmsg compat_sys_sendmsg 519 x32 recvmsg compat_sys_recvmsg -520 x32 execve stub_x32_execve +520 x32 execve compat_sys_execve/ptregs 521 x32 ptrace compat_sys_ptrace 522 x32 rt_sigpending compat_sys_rt_sigpending 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait @@ -371,4 +371,4 @@ 542 x32 getsockopt compat_sys_getsockopt 543 x32 io_setup compat_sys_io_setup 544 x32 io_submit compat_sys_io_submit -545 x32 execveat stub_x32_execveat +545 x32 execveat compat_sys_execveat/ptregs diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 0e7f8ec071e7..cd3d3015d7df 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh @@ -3,13 +3,63 @@ in="$1" out="$2" +syscall_macro() { + abi="$1" + nr="$2" + entry="$3" + + # Entry can be either just a function name or "function/qualifier" + real_entry="${entry%%/*}" + qualifier="${entry:${#real_entry}}" # Strip the function name + qualifier="${qualifier:1}" # Strip the slash, if any + + echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)" +} + +emit() { + abi="$1" + nr="$2" + entry="$3" + compat="$4" + + if [ "$abi" == "64" -a -n "$compat" ]; then + echo "a compat entry for a 64-bit syscall makes no sense" >&2 + exit 1 + fi + + if [ -z "$compat" ]; then + if [ -n "$entry" ]; then + syscall_macro "$abi" "$nr" "$entry" + fi + else + echo "#ifdef CONFIG_X86_32" + if [ -n "$entry" ]; then + syscall_macro "$abi" "$nr" "$entry" + fi + echo "#else" + syscall_macro "$abi" "$nr" "$compat" + echo "#endif" + fi +} + grep '^[0-9]' "$in" | sort -n | ( while read nr abi name entry compat; do abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` - if [ -n "$compat" ]; then - echo "__SYSCALL_${abi}($nr, $entry, $compat)" - elif [ -n "$entry" ]; then - echo "__SYSCALL_${abi}($nr, $entry, $entry)" + if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then + # COMMON is the same as 64, except that we don't expect X32 + # programs to use it. Our expectation has nothing to do with + # any generated code, so treat them the same. + emit 64 "$nr" "$entry" "$compat" + elif [ "$abi" == "X32" ]; then + # X32 is equivalent to 64 on an X32-compatible kernel. + echo "#ifdef CONFIG_X86_X32_ABI" + emit 64 "$nr" "$entry" "$compat" + echo "#endif" + elif [ "$abi" == "I386" ]; then + emit "$abi" "$nr" "$entry" "$compat" + else + echo "Unknown abi $abi" >&2 + exit 1 fi done ) > "$out" diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 0224987556ce..63a03bb91497 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "#include <asm/vdso.h>\n"); fprintf(outfile, "\n"); fprintf(outfile, - "static unsigned char raw_data[%lu] __page_aligned_data = {", + "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", mapping_size); for (j = 0; j < stripped_len; j++) { if (j % 10 == 0) @@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, } fprintf(outfile, "\n};\n\n"); - fprintf(outfile, "static struct page *pages[%lu];\n\n", - mapping_size / 4096); - fprintf(outfile, "const struct vdso_image %s = {\n", name); fprintf(outfile, "\t.data = raw_data,\n"); fprintf(outfile, "\t.size = %lu,\n", mapping_size); - fprintf(outfile, "\t.text_mapping = {\n"); - fprintf(outfile, "\t\t.name = \"[vdso]\",\n"); - fprintf(outfile, "\t\t.pages = pages,\n"); - fprintf(outfile, "\t},\n"); if (alt_sec) { fprintf(outfile, "\t.alt = %lu,\n", (unsigned long)GET_LE(&alt_sec->sh_offset)); diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 08a317a9ae4b..7853b53959cd 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/mm_types.h> -#include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/vdso.h> diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 3a1d9297074b..0109ac6cb79c 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -3,7 +3,7 @@ */ #include <asm/dwarf2.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index b8f69e264ac4..10f704584922 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -20,6 +20,7 @@ #include <asm/page.h> #include <asm/hpet.h> #include <asm/desc.h> +#include <asm/cpufeature.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; @@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1; void __init init_vdso_image(const struct vdso_image *image) { - int i; - int npages = (image->size) / PAGE_SIZE; - BUG_ON(image->size % PAGE_SIZE != 0); - for (i = 0; i < npages; i++) - image->text_mapping.pages[i] = - virt_to_page(image->data + i*PAGE_SIZE); apply_alternatives((struct alt_instr *)(image->data + image->alt), (struct alt_instr *)(image->data + image->alt + @@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) #endif } +static int vdso_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + + if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); + get_page(vmf->page); + return 0; +} + +static const struct vm_special_mapping text_mapping = { + .name = "[vdso]", + .fault = vdso_fault, +}; + +static int vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + const struct vdso_image *image = vma->vm_mm->context.vdso_image; + long sym_offset; + int ret = -EFAULT; + + if (!image) + return VM_FAULT_SIGBUS; + + sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + + image->sym_vvar_start; + + /* + * Sanity check: a symbol offset of zero means that the page + * does not exist for this vdso image, not that the page is at + * offset zero relative to the text mapping. This should be + * impossible here, because sym_offset should only be zero for + * the page past the end of the vvar mapping. + */ + if (sym_offset == 0) + return VM_FAULT_SIGBUS; + + if (sym_offset == image->sym_vvar_page) { + ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, + __pa_symbol(&__vvar_page) >> PAGE_SHIFT); + } else if (sym_offset == image->sym_hpet_page) { +#ifdef CONFIG_HPET_TIMER + if (hpet_address && vclock_was_used(VCLOCK_HPET)) { + ret = vm_insert_pfn_prot( + vma, + (unsigned long)vmf->virtual_address, + hpet_address >> PAGE_SHIFT, + pgprot_noncached(PAGE_READONLY)); + } +#endif + } else if (sym_offset == image->sym_pvclock_page) { + struct pvclock_vsyscall_time_info *pvti = + pvclock_pvti_cpu0_va(); + if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { + ret = vm_insert_pfn( + vma, + (unsigned long)vmf->virtual_address, + __pa(pvti) >> PAGE_SHIFT); + } + } + + if (ret == 0 || ret == -EBUSY) + return VM_FAULT_NOPAGE; + + return VM_FAULT_SIGBUS; +} + static int map_vdso(const struct vdso_image *image, bool calculate_addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long addr, text_start; int ret = 0; - static struct page *no_pages[] = {NULL}; - static struct vm_special_mapping vvar_mapping = { + static const struct vm_special_mapping vvar_mapping = { .name = "[vvar]", - .pages = no_pages, + .fault = vvar_fault, }; - struct pvclock_vsyscall_time_info *pvti; if (calculate_addr) { addr = vdso_addr(current->mm->start_stack, @@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) text_start = addr - image->sym_vvar_start; current->mm->context.vdso = (void __user *)text_start; + current->mm->context.vdso_image = image; /* * MAYWRITE to allow gdb to COW and set breakpoints @@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) image->size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &image->text_mapping); + &text_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); @@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) vma = _install_special_mapping(mm, addr, -image->sym_vvar_start, - VM_READ|VM_MAYREAD, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP, &vvar_mapping); if (IS_ERR(vma)) { @@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) goto up_fail; } - if (image->sym_vvar_page) - ret = remap_pfn_range(vma, - text_start + image->sym_vvar_page, - __pa_symbol(&__vvar_page) >> PAGE_SHIFT, - PAGE_SIZE, - PAGE_READONLY); - - if (ret) - goto up_fail; - -#ifdef CONFIG_HPET_TIMER - if (hpet_address && image->sym_hpet_page) { - ret = io_remap_pfn_range(vma, - text_start + image->sym_hpet_page, - hpet_address >> PAGE_SHIFT, - PAGE_SIZE, - pgprot_noncached(PAGE_READONLY)); - - if (ret) - goto up_fail; - } -#endif - - pvti = pvclock_pvti_cpu0_va(); - if (pvti && image->sym_pvclock_page) { - ret = remap_pfn_range(vma, - text_start + image->sym_pvclock_page, - __pa(pvti) >> PAGE_SHIFT, - PAGE_SIZE, - PAGE_READONLY); - - if (ret) - goto up_fail; - } - up_fail: if (ret) current->mm->context.vdso = NULL; @@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg) #ifdef CONFIG_NUMA node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) + if (static_cpu_has(X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c index 51e330416995..0fb3a104ac62 100644 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c @@ -16,6 +16,8 @@ #include <asm/vgtod.h> #include <asm/vvar.h> +int vclocks_used __read_mostly; + DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); void update_vsyscall_tz(void) @@ -26,12 +28,17 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timekeeper *tk) { + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->vclock_mode = vclock_mode; vdata->cycle_last = tk->tkr_mono.cycle_last; vdata->mask = tk->tkr_mono.mask; vdata->mult = tk->tkr_mono.mult; diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile new file mode 100644 index 000000000000..fdfea1511cc0 --- /dev/null +++ b/arch/x86/events/Makefile @@ -0,0 +1,13 @@ +obj-y += core.o + +obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o +obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o +endif +obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c index 58610539b048..049ada8d4e9c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/events/amd/core.c @@ -5,7 +5,7 @@ #include <linux/slab.h> #include <asm/apicdef.h> -#include "perf_event.h" +#include "../perf_event.h" static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c index aa12f9509cfb..51087c29b2c2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -14,7 +14,7 @@ #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" static u32 ibs_caps; diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c index 97242a9242bd..635e5eba0caf 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -16,8 +16,8 @@ #include <linux/cpumask.h> #include <linux/slab.h> -#include "perf_event.h" -#include "perf_event_amd_iommu.h" +#include "../perf_event.h" +#include "iommu.h" #define COUNTER_SHIFT 16 diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h index 845d173278e3..845d173278e3 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.h +++ b/arch/x86/events/amd/iommu.h diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c index 19a17363a21d..3db9569e658c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -323,6 +323,8 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) return 0; fail: + if (amd_uncore_nb) + *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; kfree(uncore_nb); return -ENOMEM; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c index 7402c8182813..5e830d0c95c9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/events/core.c @@ -597,6 +597,19 @@ void x86_pmu_disable_all(void) } } +/* + * There may be PMI landing after enabled=0. The PMI hitting could be before or + * after disable_all. + * + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler. + * It will not be re-enabled in the NMI handler again, because enabled=0. After + * handling the NMI, disable_all will be called, which will not change the + * state either. If PMI hits after disable_all, the PMU is already disabled + * before entering NMI handler. The NMI handler will not change the state + * either. + * + * So either situation is harmless. + */ static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c index 2cad71d1b14c..b99dc9258c0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/events/intel/bts.c @@ -26,7 +26,7 @@ #include <asm-generic/sizes.h> #include <asm/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" struct bts_ctx { struct perf_output_handle handle; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c index fed2ab1f1065..68fa55b4d42e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/events/intel/core.c @@ -18,7 +18,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Intel PerfMon, used on Core and later. @@ -1502,7 +1502,15 @@ static __initconst const u64 knl_hw_cache_extra_regs }; /* - * Use from PMIs where the LBRs are already disabled. + * Used from PMIs where the LBRs are already disabled. + * + * This function could be called consecutively. It is required to remain in + * disabled state if called consecutively. + * + * During consecutive calls, the same disable value will be written to related + * registers, so the PMU state remains unchanged. hw.state in + * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive + * calls. */ static void __intel_pmu_disable_all(void) { @@ -1884,6 +1892,16 @@ again: if (__test_and_clear_bit(62, (unsigned long *)&status)) { handled++; x86_pmu.drain_pebs(regs); + /* + * There are cases where, even though, the PEBS ovfl bit is set + * in GLOBAL_OVF_STATUS, the PEBS events may also have their + * overflow bits set for their counters. We must clear them + * here because they have been processed as exact samples in + * the drain_pebs() routine. They must not be processed again + * in the for_each_bit_set() loop for regular samples below. + */ + status &= ~cpuc->pebs_enabled; + status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; } /* @@ -1929,7 +1947,10 @@ again: goto again; done: - __intel_pmu_enable_all(0, true); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + __intel_pmu_enable_all(0, true); + /* * Only unmask the NMI after the overflow counters * have been reset. This avoids spurious NMIs on @@ -3396,6 +3417,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); x86_add_quirk(intel_nehalem_quirk); pr_cont("Nehalem events, "); @@ -3459,6 +3481,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); pr_cont("Westmere events, "); break; @@ -3581,7 +3604,7 @@ __init int intel_pmu_init(void) intel_pmu_lbr_init_hsw(); x86_pmu.event_constraints = intel_bdw_event_constraints; - x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c index a316ca96f1b6..93cb412a5579 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -7,7 +7,7 @@ #include <linux/perf_event.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define MSR_IA32_PQR_ASSOC 0x0c8f #define MSR_IA32_QM_CTR 0x0c8e @@ -1244,15 +1244,12 @@ static struct pmu intel_cqm_pmu = { static inline void cqm_pick_event_reader(int cpu) { - int phys_id = topology_physical_package_id(cpu); - int i; + int reader; - for_each_cpu(i, &cqm_cpumask) { - if (phys_id == topology_physical_package_id(i)) - return; /* already got reader for this socket */ - } - - cpumask_set_cpu(cpu, &cqm_cpumask); + /* First online cpu in package becomes the reader */ + reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu)); + if (reader >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cqm_cpumask); } static void intel_cqm_cpu_starting(unsigned int cpu) @@ -1270,24 +1267,17 @@ static void intel_cqm_cpu_starting(unsigned int cpu) static void intel_cqm_cpu_exit(unsigned int cpu) { - int phys_id = topology_physical_package_id(cpu); - int i; + int target; - /* - * Is @cpu a designated cqm reader? - */ + /* Is @cpu the current cqm reader for this package ? */ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) return; - for_each_online_cpu(i) { - if (i == cpu) - continue; + /* Find another online reader in this package */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - if (phys_id == topology_physical_package_id(i)) { - cpumask_set_cpu(i, &cqm_cpumask); - break; - } - } + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &cqm_cpumask); } static int intel_cqm_cpu_notifier(struct notifier_block *nb, diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c index 75a38b5a2e26..7946c4231169 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,7 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c index 7c79261ed939..ce7211a07c0b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/events/intel/ds.c @@ -5,7 +5,7 @@ #include <asm/perf_event.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -51,7 +51,8 @@ union intel_x86_pebs_dse { #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) -static const u64 pebs_data_source[] = { +/* Version for Sandy Bridge and later */ +static u64 pebs_data_source[] = { P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ @@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = { OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ }; +/* Patch up minor differences in the bits */ +void __init intel_pmu_pebs_data_source_nhm(void) +{ + pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT); + pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); + pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -269,7 +278,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); + buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -286,7 +295,7 @@ static int alloc_pebs_buffer(int cpu) per_cpu(insn_buffer, cpu) = ibuffer; } - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; ds->pebs_index = ds->pebs_buffer_base; @@ -722,6 +731,30 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_bdw_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + + struct event_constraint intel_skl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ @@ -1319,6 +1352,7 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; int format = x86_pmu.intel_cap.pebs_format; @@ -1327,6 +1361,14 @@ void __init intel_ds_init(void) case 0: pr_cont("PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + /* + * Using >PAGE_SIZE buffers makes the WRMSR to + * PERF_GLOBAL_CTRL in intel_pmu_enable_all() + * mysteriously hang on Core2. + * + * As a workaround, we don't do this. + */ + x86_pmu.pebs_buffer_size = PAGE_SIZE; x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; break; diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c index 5b0c232d1ee6..548d5f774b07 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/events/intel/knc.c @@ -5,7 +5,7 @@ #include <asm/hardirq.h> -#include "perf_event.h" +#include "../perf_event.h" static const u64 knc_perfmon_event_map[] = { @@ -263,7 +263,9 @@ again: goto again; done: - knc_pmu_enable_all(0); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + knc_pmu_enable_all(0); return handled; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c index 288f22ddc6d8..6c3b7c1780c9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -5,7 +5,7 @@ #include <asm/msr.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" enum { LBR_FORMAT_32 = 0x00, diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c index f2e56783af3d..0a5ede187d9c 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/events/intel/p4.c @@ -13,7 +13,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" #define P4_CNTR_LIMIT 3 /* diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c index 7c1a0c07b607..1f5c47ab4c65 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/events/intel/p6.c @@ -1,7 +1,7 @@ #include <linux/perf_event.h> #include <linux/types.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Not sure about some of these diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c index c0bbd1033b7c..6af7cf71d6b2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/events/intel/pt.c @@ -29,8 +29,8 @@ #include <asm/io.h> #include <asm/intel_pt.h> -#include "perf_event.h" -#include "intel_pt.h" +#include "../perf_event.h" +#include "pt.h" static DEFINE_PER_CPU(struct pt, pt_ctx); diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h index 336878a5d205..336878a5d205 100644 --- a/arch/x86/kernel/cpu/intel_pt.h +++ b/arch/x86/events/intel/pt.h diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c index 24a351ad628d..b834a3f55a01 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -44,11 +44,14 @@ * the duration of the measurement. Tools may use a function such as * ldexp(raw_count, -32); */ + +#define pr_fmt(fmt) "RAPL PMU: " fmt + #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" /* * RAPL energy status counters @@ -107,7 +110,7 @@ static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ static struct kobj_attribute format_attr_##_var = \ __ATTR(_name, 0444, __rapl_##_var##_show, NULL) -#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ +#define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ static struct perf_pmu_events_attr event_attr_##v = { \ @@ -117,23 +120,33 @@ static struct perf_pmu_events_attr event_attr_##v = { \ }; struct rapl_pmu { - spinlock_t lock; - int n_active; /* number of active events */ - struct list_head active_list; - struct pmu *pmu; /* pointer to rapl_pmu_class */ - ktime_t timer_interval; /* in ktime_t unit */ - struct hrtimer hrtimer; + raw_spinlock_t lock; + int n_active; + int cpu; + struct list_head active_list; + struct pmu *pmu; + ktime_t timer_interval; + struct hrtimer hrtimer; }; -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */ -static struct pmu rapl_pmu_class; +struct rapl_pmus { + struct pmu pmu; + unsigned int maxpkg; + struct rapl_pmu *pmus[]; +}; + + /* 1/2^hw_unit Joule */ +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; +static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; -static int rapl_cntr_mask; +static unsigned int rapl_cntr_mask; +static u64 rapl_timer_ms; -static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); -static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); +static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +{ + return rapl_pmus->pmus[topology_logical_package_id(cpu)]; +} -static struct x86_pmu_quirk *rapl_quirks; static inline u64 rapl_read_counter(struct perf_event *event) { u64 raw; @@ -141,19 +154,10 @@ static inline u64 rapl_read_counter(struct perf_event *event) return raw; } -#define rapl_add_quirk(func_) \ -do { \ - static struct x86_pmu_quirk __quirk __initdata = { \ - .func = func_, \ - }; \ - __quirk.next = rapl_quirks; \ - rapl_quirks = &__quirk; \ -} while (0) - static inline u64 rapl_scale(u64 v, int cfg) { if (cfg > NR_RAPL_DOMAINS) { - pr_warn("invalid domain %d, failed to scale data\n", cfg); + pr_warn("Invalid domain %d, failed to scale data\n", cfg); return v; } /* @@ -206,27 +210,21 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) HRTIMER_MODE_REL_PINNED); } -static void rapl_stop_hrtimer(struct rapl_pmu *pmu) -{ - hrtimer_cancel(&pmu->hrtimer); -} - static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); struct perf_event *event; unsigned long flags; if (!pmu->n_active) return HRTIMER_NORESTART; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); - list_for_each_entry(event, &pmu->active_list, active_entry) { + list_for_each_entry(event, &pmu->active_list, active_entry) rapl_event_update(event); - } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); hrtimer_forward_now(hrtimer, pmu->timer_interval); @@ -260,28 +258,28 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); /* mark event as deactivated and stopped */ if (!(hwc->state & PERF_HES_STOPPED)) { WARN_ON_ONCE(pmu->n_active <= 0); pmu->n_active--; if (pmu->n_active == 0) - rapl_stop_hrtimer(pmu); + hrtimer_cancel(&pmu->hrtimer); list_del(&event->active_entry); @@ -299,23 +297,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (mode & PERF_EF_START) __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); return 0; } @@ -329,15 +327,19 @@ static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; int bit, msr, ret = 0; + struct rapl_pmu *pmu; /* only look at RAPL events */ - if (event->attr.type != rapl_pmu_class.type) + if (event->attr.type != rapl_pmus->pmu.type) return -ENOENT; /* check only supported bits are set */ if (event->attr.config & ~RAPL_EVENT_MASK) return -EINVAL; + if (event->cpu < 0) + return -EINVAL; + /* * check event is known (determines counter) */ @@ -376,6 +378,9 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; /* must be done before validate_group */ + pmu = cpu_to_rapl_pmu(event->cpu); + event->cpu = pmu->cpu; + event->pmu_private = pmu; event->hw.event_base = msr; event->hw.config = cfg; event->hw.idx = bit; @@ -506,139 +511,62 @@ const struct attribute_group *rapl_attr_groups[] = { NULL, }; -static struct pmu rapl_pmu_class = { - .attr_groups = rapl_attr_groups, - .task_ctx_nr = perf_invalid_context, /* system-wide only */ - .event_init = rapl_pmu_event_init, - .add = rapl_pmu_event_add, /* must have */ - .del = rapl_pmu_event_del, /* must have */ - .start = rapl_pmu_event_start, - .stop = rapl_pmu_event_stop, - .read = rapl_pmu_event_read, -}; - static void rapl_cpu_exit(int cpu) { - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - int i, phys_id = topology_physical_package_id(cpu); - int target = -1; + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; - /* find a new cpu on same package */ - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (phys_id == topology_physical_package_id(i)) { - target = i; - break; - } - } - /* - * clear cpu from cpumask - * if was set in cpumask and still some cpu on package, - * then move to new cpu - */ - if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) - cpumask_set_cpu(target, &rapl_cpu_mask); + /* Check if exiting cpu is used for collecting rapl events */ + if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) + return; - WARN_ON(cpumask_empty(&rapl_cpu_mask)); - /* - * migrate events and context to new cpu - */ - if (target >= 0) - perf_pmu_migrate_context(pmu->pmu, cpu, target); + pmu->cpu = -1; + /* Find a new cpu to collect rapl events */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* cancel overflow polling timer for CPU */ - rapl_stop_hrtimer(pmu); + /* Migrate rapl events to the new target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &rapl_cpu_mask); + pmu->cpu = target; + perf_pmu_migrate_context(pmu->pmu, cpu, target); + } } static void rapl_cpu_init(int cpu) { - int i, phys_id = topology_physical_package_id(cpu); + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; - /* check if phys_is is already covered */ - for_each_cpu(i, &rapl_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) - return; - } - /* was not found, so add it */ - cpumask_set_cpu(cpu, &rapl_cpu_mask); -} - -static __init void rapl_hsw_server_quirk(void) -{ /* - * DRAM domain on HSW server has fixed energy unit which can be - * different than the unit from power unit MSR. - * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 - * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + * Check if there is an online cpu in the package which collects rapl + * events already. */ - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); + if (target < nr_cpu_ids) + return; + + cpumask_set_cpu(cpu, &rapl_cpu_mask); + pmu->cpu = cpu; } static int rapl_cpu_prepare(int cpu) { - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - int phys_id = topology_physical_package_id(cpu); - u64 ms; + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); if (pmu) return 0; - if (phys_id < 0) - return -1; - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); if (!pmu) - return -1; - spin_lock_init(&pmu->lock); + return -ENOMEM; + raw_spin_lock_init(&pmu->lock); INIT_LIST_HEAD(&pmu->active_list); - - pmu->pmu = &rapl_pmu_class; - - /* - * use reference of 200W for scaling the timeout - * to avoid missing counter overflows. - * 200W = 200 Joules/sec - * divide interval by 2 to avoid lockstep (2 * 100) - * if hw unit is 32, then we use 2 ms 1/200/2 - */ - if (rapl_hw_unit[0] < 32) - ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1)); - else - ms = 2; - - pmu->timer_interval = ms_to_ktime(ms); - + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + pmu->cpu = -1; rapl_hrtimer_init(pmu); - - /* set RAPL pmu for this cpu for now */ - per_cpu(rapl_pmu, cpu) = pmu; - per_cpu(rapl_pmu_to_free, cpu) = NULL; - - return 0; -} - -static void rapl_cpu_kfree(int cpu) -{ - struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); - - kfree(pmu); - - per_cpu(rapl_pmu_to_free, cpu) = NULL; -} - -static int rapl_cpu_dying(int cpu) -{ - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - - if (!pmu) - return 0; - - per_cpu(rapl_pmu, cpu) = NULL; - - per_cpu(rapl_pmu_to_free, cpu) = pmu; - + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; return 0; } @@ -651,28 +579,20 @@ static int rapl_cpu_notifier(struct notifier_block *self, case CPU_UP_PREPARE: rapl_cpu_prepare(cpu); break; - case CPU_STARTING: - rapl_cpu_init(cpu); - break; - case CPU_UP_CANCELED: - case CPU_DYING: - rapl_cpu_dying(cpu); - break; + + case CPU_DOWN_FAILED: case CPU_ONLINE: - case CPU_DEAD: - rapl_cpu_kfree(cpu); + rapl_cpu_init(cpu); break; + case CPU_DOWN_PREPARE: rapl_cpu_exit(cpu); break; - default: - break; } - return NOTIFY_OK; } -static int rapl_check_hw_unit(void) +static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; int i; @@ -683,28 +603,107 @@ static int rapl_check_hw_unit(void) for (i = 0; i < NR_RAPL_DOMAINS; i++) rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + /* + * DRAM domain on HSW server and KNL has fixed energy unit which can be + * different than the unit from power unit MSR. See + * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 + * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + */ + if (apply_quirk) + rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + + /* + * Calculate the timer rate: + * Use reference of 200W for scaling the timeout to avoid counter + * overflows. 200W = 200 Joules/sec + * Divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + rapl_timer_ms = 2; + if (rapl_hw_unit[0] < 32) { + rapl_timer_ms = (1000 / (2 * 100)); + rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); + } + return 0; +} + +static void __init rapl_advertise(void) +{ + int i; + + pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", + hweight32(rapl_cntr_mask), rapl_timer_ms); + + for (i = 0; i < NR_RAPL_DOMAINS; i++) { + if (rapl_cntr_mask & (1 << i)) { + pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_domain_names[i], rapl_hw_unit[i]); + } + } +} + +static int __init rapl_prepare_cpus(void) +{ + unsigned int cpu, pkg; + int ret; + + for_each_online_cpu(cpu) { + pkg = topology_logical_package_id(cpu); + if (rapl_pmus->pmus[pkg]) + continue; + + ret = rapl_cpu_prepare(cpu); + if (ret) + return ret; + rapl_cpu_init(cpu); + } + return 0; +} + +static void __init cleanup_rapl_pmus(void) +{ + int i; + + for (i = 0; i < rapl_pmus->maxpkg; i++) + kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus); +} + +static int __init init_rapl_pmus(void) +{ + int maxpkg = topology_max_packages(); + size_t size; + + size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); + rapl_pmus = kzalloc(size, GFP_KERNEL); + if (!rapl_pmus) + return -ENOMEM; + + rapl_pmus->maxpkg = maxpkg; + rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; + rapl_pmus->pmu.event_init = rapl_pmu_event_init; + rapl_pmus->pmu.add = rapl_pmu_event_add; + rapl_pmus->pmu.del = rapl_pmu_event_del; + rapl_pmus->pmu.start = rapl_pmu_event_start; + rapl_pmus->pmu.stop = rapl_pmu_event_stop; + rapl_pmus->pmu.read = rapl_pmu_event_read; return 0; } -static const struct x86_cpu_id rapl_cpu_match[] = { +static const struct x86_cpu_id rapl_cpu_match[] __initconst = { [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, [1] = {}, }; static int __init rapl_pmu_init(void) { - struct rapl_pmu *pmu; - int cpu, ret; - struct x86_pmu_quirk *quirk; - int i; + bool apply_quirk = false; + int ret; - /* - * check for Intel processor family 6 - */ if (!x86_match_cpu(rapl_cpu_match)) - return 0; + return -ENODEV; - /* check supported CPU */ switch (boot_cpu_data.x86_model) { case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ @@ -712,7 +711,7 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_cln_attr; break; case 63: /* Haswell-Server */ - rapl_add_quirk(rapl_hsw_server_quirk); + apply_quirk = true; rapl_cntr_mask = RAPL_IDX_SRV; rapl_pmu_events_group.attrs = rapl_events_srv_attr; break; @@ -728,56 +727,41 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_srv_attr; break; case 87: /* Knights Landing */ - rapl_add_quirk(rapl_hsw_server_quirk); + apply_quirk = true; rapl_cntr_mask = RAPL_IDX_KNL; rapl_pmu_events_group.attrs = rapl_events_knl_attr; - + break; default: - /* unsupported */ - return 0; + return -ENODEV; } - ret = rapl_check_hw_unit(); + + ret = rapl_check_hw_unit(apply_quirk); if (ret) return ret; - /* run cpu model quirks */ - for (quirk = rapl_quirks; quirk; quirk = quirk->next) - quirk->func(); - cpu_notifier_register_begin(); + ret = init_rapl_pmus(); + if (ret) + return ret; - for_each_online_cpu(cpu) { - ret = rapl_cpu_prepare(cpu); - if (ret) - goto out; - rapl_cpu_init(cpu); - } + cpu_notifier_register_begin(); - __perf_cpu_notifier(rapl_cpu_notifier); + ret = rapl_prepare_cpus(); + if (ret) + goto out; - ret = perf_pmu_register(&rapl_pmu_class, "power", -1); - if (WARN_ON(ret)) { - pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); - cpu_notifier_register_done(); - return -1; - } + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); + if (ret) + goto out; - pmu = __this_cpu_read(rapl_pmu); + __perf_cpu_notifier(rapl_cpu_notifier); + cpu_notifier_register_done(); + rapl_advertise(); + return 0; - pr_info("RAPL PMU detected," - " API unit is 2^-32 Joules," - " %d fixed counters" - " %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), - ktime_to_ms(pmu->timer_interval)); - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { - pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); - } - } out: + pr_warn("Initialization failed (%d), disabled\n", ret); + cleanup_rapl_pmus(); cpu_notifier_register_done(); - - return 0; + return ret; } device_initcall(rapl_pmu_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c index 3bf41d413775..7012d18bb293 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,4 @@ -#include "perf_event_intel_uncore.h" +#include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; @@ -9,9 +9,9 @@ struct pci_driver *uncore_pci_driver; /* pci bus to socket mapping */ DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); -struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +struct pci_extra_dev *uncore_extra_pci_dev; +static int max_packages; -static DEFINE_RAW_SPINLOCK(uncore_box_lock); /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -21,7 +21,7 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); -int uncore_pcibus_to_physid(struct pci_bus *bus) +static int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; int phys_id = -1; @@ -38,6 +38,16 @@ int uncore_pcibus_to_physid(struct pci_bus *bus) return phys_id; } +static void uncore_free_pcibus_map(void) +{ + struct pci2phy_map *map, *tmp; + + list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { + list_del(&map->list); + kfree(map); + } +} + struct pci2phy_map *__find_pci2phy_map(int segment) { struct pci2phy_map *map, *alloc = NULL; @@ -82,43 +92,9 @@ ssize_t uncore_event_show(struct kobject *kobj, return sprintf(buf, "%s", event->config); } -struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) -{ - return container_of(event->pmu, struct intel_uncore_pmu, pmu); -} - struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - struct intel_uncore_box *box; - - box = *per_cpu_ptr(pmu->box, cpu); - if (box) - return box; - - raw_spin_lock(&uncore_box_lock); - /* Recheck in lock to handle races. */ - if (*per_cpu_ptr(pmu->box, cpu)) - goto out; - list_for_each_entry(box, &pmu->box_list, list) { - if (box->phys_id == topology_physical_package_id(cpu)) { - atomic_inc(&box->refcnt); - *per_cpu_ptr(pmu->box, cpu) = box; - break; - } - } -out: - raw_spin_unlock(&uncore_box_lock); - - return *per_cpu_ptr(pmu->box, cpu); -} - -struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) -{ - /* - * perf core schedules event on the basis of cpu, uncore events are - * collected by one of the cpus inside a physical package. - */ - return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); + return pmu->boxes[topology_logical_package_id(cpu)]; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -207,7 +183,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) return config; } -static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) +static void uncore_assign_hw_event(struct intel_uncore_box *box, + struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -302,24 +279,25 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, + int node) { + int i, size, numshared = type->num_shared_regs ; struct intel_uncore_box *box; - int i, size; - size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); + size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; - for (i = 0; i < type->num_shared_regs; i++) + for (i = 0; i < numshared; i++) raw_spin_lock_init(&box->shared_regs[i].lock); uncore_pmu_init_hrtimer(box); - atomic_set(&box->refcnt, 1); box->cpu = -1; - box->phys_id = -1; + box->pci_phys_id = -1; + box->pkgid = -1; /* set default hrtimer timeout */ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; @@ -341,7 +319,8 @@ static bool is_uncore_event(struct perf_event *event) } static int -uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) +uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, + bool dogrp) { struct perf_event *event; int n, max_count; @@ -402,7 +381,8 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve return &type->unconstrainted; } -static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) +static void uncore_put_event_constraint(struct intel_uncore_box *box, + struct perf_event *event) { if (box->pmu->type->ops->put_constraint) box->pmu->type->ops->put_constraint(box, event); @@ -582,7 +562,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags) if (event == box->event_list[i]) { uncore_put_event_constraint(box, event); - while (++i < box->n_events) + for (++i; i < box->n_events; i++) box->event_list[i - 1] = box->event_list[i]; --box->n_events; @@ -676,6 +656,7 @@ static int uncore_pmu_event_init(struct perf_event *event) if (!box || box->cpu < 0) return -EINVAL; event->cpu = box->cpu; + event->pmu_private = box; event->hw.idx = -1; event->hw.last_tag = ~0ULL; @@ -760,64 +741,110 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) } ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); + if (!ret) + pmu->registered = true; return ret; } +static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) +{ + if (!pmu->registered) + return; + perf_pmu_unregister(&pmu->pmu); + pmu->registered = false; +} + +static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) +{ + struct intel_uncore_pmu *pmu = type->pmus; + struct intel_uncore_box *box; + int i, pkg; + + if (pmu) { + pkg = topology_physical_package_id(cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box) + uncore_box_exit(box); + } + } +} + +static void __init uncore_exit_boxes(void *dummy) +{ + struct intel_uncore_type **types; + + for (types = uncore_msr_uncores; *types; types++) + __uncore_exit_boxes(*types++, smp_processor_id()); +} + +static void uncore_free_boxes(struct intel_uncore_pmu *pmu) +{ + int pkg; + + for (pkg = 0; pkg < max_packages; pkg++) + kfree(pmu->boxes[pkg]); + kfree(pmu->boxes); +} + static void __init uncore_type_exit(struct intel_uncore_type *type) { + struct intel_uncore_pmu *pmu = type->pmus; int i; - for (i = 0; i < type->num_boxes; i++) - free_percpu(type->pmus[i].box); - kfree(type->pmus); - type->pmus = NULL; + if (pmu) { + for (i = 0; i < type->num_boxes; i++, pmu++) { + uncore_pmu_unregister(pmu); + uncore_free_boxes(pmu); + } + kfree(type->pmus); + type->pmus = NULL; + } kfree(type->events_group); type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) { - int i; - for (i = 0; types[i]; i++) - uncore_type_exit(types[i]); + for (; *types; types++) + uncore_type_exit(*types); } -static int __init uncore_type_init(struct intel_uncore_type *type) +static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) { struct intel_uncore_pmu *pmus; struct attribute_group *attr_group; struct attribute **attrs; + size_t size; int i, j; pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); if (!pmus) return -ENOMEM; - type->pmus = pmus; + size = max_packages * sizeof(struct intel_uncore_box *); + for (i = 0; i < type->num_boxes; i++) { + pmus[i].func_id = setid ? i : -1; + pmus[i].pmu_idx = i; + pmus[i].type = type; + pmus[i].boxes = kzalloc(size, GFP_KERNEL); + if (!pmus[i].boxes) + return -ENOMEM; + } + + type->pmus = pmus; type->unconstrainted = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 0, type->num_counters, 0, 0); - for (i = 0; i < type->num_boxes; i++) { - pmus[i].func_id = -1; - pmus[i].pmu_idx = i; - pmus[i].type = type; - INIT_LIST_HEAD(&pmus[i].box_list); - pmus[i].box = alloc_percpu(struct intel_uncore_box *); - if (!pmus[i].box) - goto fail; - } - if (type->event_descs) { - i = 0; - while (type->event_descs[i].attr.attr.name) - i++; + for (i = 0; type->event_descs[i].attr.attr.name; i++); attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + sizeof(*attr_group), GFP_KERNEL); if (!attr_group) - goto fail; + return -ENOMEM; attrs = (struct attribute **)(attr_group + 1); attr_group->name = "events"; @@ -831,25 +858,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type) type->pmu_group = &uncore_pmu_attr_group; return 0; -fail: - uncore_type_exit(type); - return -ENOMEM; } -static int __init uncore_types_init(struct intel_uncore_type **types) +static int __init +uncore_types_init(struct intel_uncore_type **types, bool setid) { - int i, ret; + int ret; - for (i = 0; types[i]; i++) { - ret = uncore_type_init(types[i]); + for (; *types; types++) { + ret = uncore_type_init(*types, setid); if (ret) - goto fail; + return ret; } return 0; -fail: - while (--i >= 0) - uncore_type_exit(types[i]); - return ret; } /* @@ -857,28 +878,28 @@ fail: */ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - struct intel_uncore_type *type; - int phys_id; - bool first_box = false; + int phys_id, pkg, ret; phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; + pkg = topology_phys_to_logical_pkg(phys_id); + if (WARN_ON_ONCE(pkg < 0)) + return -EINVAL; + if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { int idx = UNCORE_PCI_DEV_IDX(id->driver_data); - uncore_extra_pci_dev[phys_id][idx] = pdev; + + uncore_extra_pci_dev[pkg].dev[idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, NUMA_NO_NODE); - if (!box) - return -ENOMEM; - /* * for performance monitoring unit with multiple boxes, * each box has a different function id. @@ -890,44 +911,60 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id * some device types. Hence PCI device idx would be 0 for all devices. * So increment pmu pointer to point to an unused array element. */ - if (boot_cpu_data.x86_model == 87) + if (boot_cpu_data.x86_model == 87) { while (pmu->func_id >= 0) pmu++; + } + + if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) + return -EINVAL; + + box = uncore_alloc_box(type, NUMA_NO_NODE); + if (!box) + return -ENOMEM; + if (pmu->func_id < 0) pmu->func_id = pdev->devfn; else WARN_ON_ONCE(pmu->func_id != pdev->devfn); - box->phys_id = phys_id; + atomic_inc(&box->refcnt); + box->pci_phys_id = phys_id; + box->pkgid = pkg; box->pci_dev = pdev; box->pmu = pmu; uncore_box_init(box); pci_set_drvdata(pdev, box); - raw_spin_lock(&uncore_box_lock); - if (list_empty(&pmu->box_list)) - first_box = true; - list_add_tail(&box->list, &pmu->box_list); - raw_spin_unlock(&uncore_box_lock); + pmu->boxes[pkg] = box; + if (atomic_inc_return(&pmu->activeboxes) > 1) + return 0; - if (first_box) - uncore_pmu_register(pmu); - return 0; + /* First active box registers the pmu */ + ret = uncore_pmu_register(pmu); + if (ret) { + pci_set_drvdata(pdev, NULL); + pmu->boxes[pkg] = NULL; + uncore_box_exit(box); + kfree(box); + } + return ret; } static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id; - bool last_box = false; + int i, phys_id, pkg; phys_id = uncore_pcibus_to_physid(pdev->bus); + pkg = topology_phys_to_logical_pkg(phys_id); + box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (uncore_extra_pci_dev[phys_id][i] == pdev) { - uncore_extra_pci_dev[phys_id][i] = NULL; + if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { + uncore_extra_pci_dev[pkg].dev[i] = NULL; break; } } @@ -936,33 +973,20 @@ static void uncore_pci_remove(struct pci_dev *pdev) } pmu = box->pmu; - if (WARN_ON_ONCE(phys_id != box->phys_id)) + if (WARN_ON_ONCE(phys_id != box->pci_phys_id)) return; pci_set_drvdata(pdev, NULL); - - raw_spin_lock(&uncore_box_lock); - list_del(&box->list); - if (list_empty(&pmu->box_list)) - last_box = true; - raw_spin_unlock(&uncore_box_lock); - - for_each_possible_cpu(cpu) { - if (*per_cpu_ptr(pmu->box, cpu) == box) { - *per_cpu_ptr(pmu->box, cpu) = NULL; - atomic_dec(&box->refcnt); - } - } - - WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); + pmu->boxes[pkg] = NULL; + if (atomic_dec_return(&pmu->activeboxes) == 0) + uncore_pmu_unregister(pmu); + uncore_box_exit(box); kfree(box); - - if (last_box) - perf_pmu_unregister(&pmu->pmu); } static int __init uncore_pci_init(void) { + size_t size; int ret; switch (boot_cpu_data.x86_model) { @@ -999,25 +1023,40 @@ static int __init uncore_pci_init(void) ret = skl_uncore_pci_init(); break; default: - return 0; + return -ENODEV; } if (ret) return ret; - ret = uncore_types_init(uncore_pci_uncores); + size = max_packages * sizeof(struct pci_extra_dev); + uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); + if (!uncore_extra_pci_dev) { + ret = -ENOMEM; + goto err; + } + + ret = uncore_types_init(uncore_pci_uncores, false); if (ret) - return ret; + goto errtype; uncore_pci_driver->probe = uncore_pci_probe; uncore_pci_driver->remove = uncore_pci_remove; ret = pci_register_driver(uncore_pci_driver); - if (ret == 0) - pcidrv_registered = true; - else - uncore_types_exit(uncore_pci_uncores); + if (ret) + goto errtype; + + pcidrv_registered = true; + return 0; +errtype: + uncore_types_exit(uncore_pci_uncores); + kfree(uncore_extra_pci_dev); + uncore_extra_pci_dev = NULL; + uncore_free_pcibus_map(); +err: + uncore_pci_uncores = empty_uncore; return ret; } @@ -1027,173 +1066,139 @@ static void __init uncore_pci_exit(void) pcidrv_registered = false; pci_unregister_driver(uncore_pci_driver); uncore_types_exit(uncore_pci_uncores); - } -} - -/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ -static LIST_HEAD(boxes_to_free); - -static void uncore_kfree_boxes(void) -{ - struct intel_uncore_box *box; - - while (!list_empty(&boxes_to_free)) { - box = list_entry(boxes_to_free.next, - struct intel_uncore_box, list); - list_del(&box->list); - kfree(box); + kfree(uncore_extra_pci_dev); + uncore_free_pcibus_map(); } } static void uncore_cpu_dying(int cpu) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - box = *per_cpu_ptr(pmu->box, cpu); - *per_cpu_ptr(pmu->box, cpu) = NULL; - if (box && atomic_dec_and_test(&box->refcnt)) - list_add(&box->list, &boxes_to_free); + int i, pkg; + + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); } } } -static int uncore_cpu_starting(int cpu) +static void uncore_cpu_starting(int cpu, bool init) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box, *exist; - int i, j, k, phys_id; - - phys_id = topology_physical_package_id(cpu); - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - box = *per_cpu_ptr(pmu->box, cpu); - /* called by uncore_cpu_init? */ - if (box && box->phys_id >= 0) { - uncore_box_init(box); - continue; - } + struct intel_uncore_box *box; + int i, pkg, ncpus = 1; - for_each_online_cpu(k) { - exist = *per_cpu_ptr(pmu->box, k); - if (exist && exist->phys_id == phys_id) { - atomic_inc(&exist->refcnt); - *per_cpu_ptr(pmu->box, cpu) = exist; - if (box) { - list_add(&box->list, - &boxes_to_free); - box = NULL; - } - break; - } - } + if (init) { + /* + * On init we get the number of online cpus in the package + * and set refcount for all of them. + */ + ncpus = cpumask_weight(topology_core_cpumask(cpu)); + } - if (box) { - box->phys_id = phys_id; + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box) + continue; + /* The first cpu on a package activates the box */ + if (atomic_add_return(ncpus, &box->refcnt) == ncpus) uncore_box_init(box); - } } } - return 0; } -static int uncore_cpu_prepare(int cpu, int phys_id) +static int uncore_cpu_prepare(int cpu) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - if (pmu->func_id < 0) - pmu->func_id = j; - + int i, pkg; + + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[pkg]) + continue; + /* First cpu of a package allocates the box */ box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; - box->pmu = pmu; - box->phys_id = phys_id; - *per_cpu_ptr(pmu->box, cpu) = box; + box->pkgid = pkg; + pmu->boxes[pkg] = box; } } return 0; } -static void -uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) +static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, + int new_cpu) { - struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; - int i, j; + int i, pkg; - for (i = 0; uncores[i]; i++) { - type = uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - if (old_cpu < 0) - box = uncore_pmu_to_box(pmu, new_cpu); - else - box = uncore_pmu_to_box(pmu, old_cpu); - if (!box) - continue; - - if (old_cpu < 0) { - WARN_ON_ONCE(box->cpu != -1); - box->cpu = new_cpu; - continue; - } + pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box) + continue; - WARN_ON_ONCE(box->cpu != old_cpu); - if (new_cpu >= 0) { - uncore_pmu_cancel_hrtimer(box); - perf_pmu_migrate_context(&pmu->pmu, - old_cpu, new_cpu); - box->cpu = new_cpu; - } else { - box->cpu = -1; - } + if (old_cpu < 0) { + WARN_ON_ONCE(box->cpu != -1); + box->cpu = new_cpu; + continue; } + + WARN_ON_ONCE(box->cpu != old_cpu); + box->cpu = -1; + if (new_cpu < 0) + continue; + + uncore_pmu_cancel_hrtimer(box); + perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); + box->cpu = new_cpu; } } +static void uncore_change_context(struct intel_uncore_type **uncores, + int old_cpu, int new_cpu) +{ + for (; *uncores; uncores++) + uncore_change_type_ctx(*uncores, old_cpu, new_cpu); +} + static void uncore_event_exit_cpu(int cpu) { - int i, phys_id, target; + int target; - /* if exiting cpu is used for collecting uncore events */ + /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) return; - /* find a new cpu to collect uncore events */ - phys_id = topology_physical_package_id(cpu); - target = -1; - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (phys_id == topology_physical_package_id(i)) { - target = i; - break; - } - } + /* Find a new cpu to collect uncore events */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* migrate uncore events to the new cpu */ - if (target >= 0) + /* Migrate uncore events to the new target */ + if (target < nr_cpu_ids) cpumask_set_cpu(target, &uncore_cpu_mask); + else + target = -1; uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); @@ -1201,13 +1206,15 @@ static void uncore_event_exit_cpu(int cpu) static void uncore_event_init_cpu(int cpu) { - int i, phys_id; + int target; - phys_id = topology_physical_package_id(cpu); - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) - return; - } + /* + * Check if there is an online cpu in the package + * which collects uncore events already. + */ + target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); + if (target < nr_cpu_ids) + return; cpumask_set_cpu(cpu, &uncore_cpu_mask); @@ -1220,39 +1227,25 @@ static int uncore_cpu_notifier(struct notifier_block *self, { unsigned int cpu = (long)hcpu; - /* allocate/free data structure for uncore box */ switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - uncore_cpu_prepare(cpu, -1); - break; + return notifier_from_errno(uncore_cpu_prepare(cpu)); + case CPU_STARTING: - uncore_cpu_starting(cpu); + uncore_cpu_starting(cpu, false); + case CPU_DOWN_FAILED: + uncore_event_init_cpu(cpu); break; + case CPU_UP_CANCELED: case CPU_DYING: uncore_cpu_dying(cpu); break; - case CPU_ONLINE: - case CPU_DEAD: - uncore_kfree_boxes(); - break; - default: - break; - } - /* select the cpu that collects uncore events */ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_STARTING: - uncore_event_init_cpu(cpu); - break; case CPU_DOWN_PREPARE: uncore_event_exit_cpu(cpu); break; - default: - break; } - return NOTIFY_OK; } @@ -1265,9 +1258,29 @@ static struct notifier_block uncore_cpu_nb = { .priority = CPU_PRI_PERF + 1, }; -static void __init uncore_cpu_setup(void *dummy) +static int __init type_pmu_register(struct intel_uncore_type *type) { - uncore_cpu_starting(smp_processor_id()); + int i, ret; + + for (i = 0; i < type->num_boxes; i++) { + ret = uncore_pmu_register(&type->pmus[i]); + if (ret) + return ret; + } + return 0; +} + +static int __init uncore_msr_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_msr_uncores; + int ret; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + return ret; + } + return 0; } static int __init uncore_cpu_init(void) @@ -1311,71 +1324,61 @@ static int __init uncore_cpu_init(void) knl_uncore_cpu_init(); break; default: - return 0; + return -ENODEV; } - ret = uncore_types_init(uncore_msr_uncores); + ret = uncore_types_init(uncore_msr_uncores, true); if (ret) - return ret; + goto err; + ret = uncore_msr_pmus_register(); + if (ret) + goto err; return 0; +err: + uncore_types_exit(uncore_msr_uncores); + uncore_msr_uncores = empty_uncore; + return ret; } -static int __init uncore_pmus_register(void) +static void __init uncore_cpu_setup(void *dummy) { - struct intel_uncore_pmu *pmu; - struct intel_uncore_type *type; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - uncore_pmu_register(pmu); - } - } - - return 0; + uncore_cpu_starting(smp_processor_id(), true); } -static void __init uncore_cpumask_init(void) -{ - int cpu; - - /* - * ony invoke once from msr or pci init code - */ - if (!cpumask_empty(&uncore_cpu_mask)) - return; +/* Lazy to avoid allocation of a few bytes for the normal case */ +static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); - cpu_notifier_register_begin(); +static int __init uncore_cpumask_init(bool msr) +{ + unsigned int cpu; for_each_online_cpu(cpu) { - int i, phys_id = topology_physical_package_id(cpu); + unsigned int pkg = topology_logical_package_id(cpu); + int ret; - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) { - phys_id = -1; - break; - } - } - if (phys_id < 0) + if (test_and_set_bit(pkg, packages)) continue; - - uncore_cpu_prepare(cpu, phys_id); + /* + * The first online cpu of each package allocates and takes + * the refcounts for all other online cpus in that package. + * If msrs are not enabled no allocation is required. + */ + if (msr) { + ret = uncore_cpu_prepare(cpu); + if (ret) + return ret; + } uncore_event_init_cpu(cpu); + smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); } - on_each_cpu(uncore_cpu_setup, NULL, 1); - __register_cpu_notifier(&uncore_cpu_nb); - - cpu_notifier_register_done(); + return 0; } - static int __init intel_uncore_init(void) { - int ret; + int pret, cret, ret; if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; @@ -1383,19 +1386,27 @@ static int __init intel_uncore_init(void) if (cpu_has_hypervisor) return -ENODEV; - ret = uncore_pci_init(); - if (ret) - goto fail; - ret = uncore_cpu_init(); - if (ret) { - uncore_pci_exit(); - goto fail; - } - uncore_cpumask_init(); + max_packages = topology_max_packages(); + + pret = uncore_pci_init(); + cret = uncore_cpu_init(); - uncore_pmus_register(); + if (cret && pret) + return -ENODEV; + + cpu_notifier_register_begin(); + ret = uncore_cpumask_init(!cret); + if (ret) + goto err; + cpu_notifier_register_done(); return 0; -fail: + +err: + /* Undo box->init_box() */ + on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); + uncore_types_exit(uncore_msr_uncores); + uncore_pci_exit(); + cpu_notifier_register_done(); return ret; } device_initcall(intel_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h index a7086b862156..79766b9a3580 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -1,8 +1,10 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/pci.h> +#include <asm/apicdef.h> + #include <linux/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" #define UNCORE_PMU_NAME_LEN 32 #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) @@ -19,11 +21,12 @@ #define UNCORE_EXTRA_PCI_DEV 0xff #define UNCORE_EXTRA_PCI_DEV_MAX 3 -/* support up to 8 sockets */ -#define UNCORE_SOCKET_MAX 8 - #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) +struct pci_extra_dev { + struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX]; +}; + struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -61,6 +64,7 @@ struct intel_uncore_type { struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); + void (*exit_box)(struct intel_uncore_box *); void (*disable_box)(struct intel_uncore_box *); void (*enable_box)(struct intel_uncore_box *); void (*disable_event)(struct intel_uncore_box *, struct perf_event *); @@ -73,13 +77,14 @@ struct intel_uncore_ops { }; struct intel_uncore_pmu { - struct pmu pmu; - char name[UNCORE_PMU_NAME_LEN]; - int pmu_idx; - int func_id; - struct intel_uncore_type *type; - struct intel_uncore_box ** __percpu box; - struct list_head box_list; + struct pmu pmu; + char name[UNCORE_PMU_NAME_LEN]; + int pmu_idx; + int func_id; + bool registered; + atomic_t activeboxes; + struct intel_uncore_type *type; + struct intel_uncore_box **boxes; }; struct intel_uncore_extra_reg { @@ -89,7 +94,8 @@ struct intel_uncore_extra_reg { }; struct intel_uncore_box { - int phys_id; + int pci_phys_id; + int pkgid; int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ @@ -123,7 +129,6 @@ struct pci2phy_map { int pbus_to_physid[256]; }; -int uncore_pcibus_to_physid(struct pci_bus *bus); struct pci2phy_map *__find_pci2phy_map(int segment); ssize_t uncore_event_show(struct kobject *kobj, @@ -305,14 +310,30 @@ static inline void uncore_box_init(struct intel_uncore_box *box) } } +static inline void uncore_box_exit(struct intel_uncore_box *box) +{ + if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->exit_box) + box->pmu->type->ops->exit_box(box); + } +} + static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { - return (box->phys_id < 0); + return (box->pkgid < 0); +} + +static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ + return event->pmu_private; } -struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event); struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); -struct intel_uncore_box *uncore_event_to_box(struct perf_event *event); u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); @@ -328,7 +349,7 @@ extern struct intel_uncore_type **uncore_pci_uncores; extern struct pci_driver *uncore_pci_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; -extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; /* perf_event_intel_uncore_snb.c */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 2749965afed0..cda569332005 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,5 +1,5 @@ /* Nehalem-EX/Westmere-EX uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* NHM-EX event control */ #define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff @@ -201,6 +201,11 @@ static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); } +static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); +} + static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); @@ -250,6 +255,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p #define NHMEX_UNCORE_OPS_COMMON_INIT() \ .init_box = nhmex_uncore_msr_init_box, \ + .exit_box = nhmex_uncore_msr_exit_box, \ .disable_box = nhmex_uncore_msr_disable_box, \ .enable_box = nhmex_uncore_msr_enable_box, \ .disable_event = nhmex_uncore_msr_disable_event, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 2bd030ddd0db..96531d2b843f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,5 +1,5 @@ /* Nehalem/SandBridge/Haswell uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 @@ -95,6 +95,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0); +} + static struct uncore_event_desc snb_uncore_events[] = { INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), { /* end: all zeroes */ }, @@ -116,6 +122,7 @@ static struct attribute_group snb_uncore_format_group = { static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, + .exit_box = snb_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, .read_counter = uncore_msr_read_counter, @@ -231,6 +238,11 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } +static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) +{ + iounmap(box->io_addr); +} + static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) {} @@ -301,6 +313,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) return -EINVAL; event->cpu = box->cpu; + event->pmu_private = box; event->hw.idx = -1; event->hw.last_tag = ~0ULL; @@ -458,6 +471,7 @@ static struct pmu snb_uncore_imc_pmu = { static struct intel_uncore_ops snb_uncore_imc_ops = { .init_box = snb_uncore_imc_init_box, + .exit_box = snb_uncore_imc_exit_box, .enable_box = snb_uncore_imc_enable_box, .disable_box = snb_uncore_imc_disable_box, .disable_event = snb_uncore_imc_disable_event, diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 33acb884ccf1..93f6bd9bf761 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,5 @@ /* SandyBridge-EP/IvyTown uncore support */ -#include "perf_event_intel_uncore.h" - +#include "uncore.h" /* SNB-EP Box level control */ #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) @@ -987,7 +986,9 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx]; + int pkg = topology_phys_to_logical_pkg(box->pci_phys_id); + struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; + if (filter_pdev) { pci_write_config_dword(filter_pdev, reg1->reg, (u32)reg1->config); @@ -2521,14 +2522,16 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { void hswep_uncore_cpu_init(void) { + int pkg = topology_phys_to_logical_pkg(0); + if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { + if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { u32 capid4; - pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], + pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], 0x94, &capid4); if (((capid4 >> 6) & 0x3) == 0) hswep_uncore_sbox.num_boxes = 2; @@ -2875,11 +2878,13 @@ static struct intel_uncore_type bdx_uncore_sbox = { .format_group = &hswep_uncore_sbox_format_group, }; +#define BDX_MSR_UNCORE_SBOX 3 + static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, - &bdx_uncore_sbox, &hswep_uncore_pcu, + &bdx_uncore_sbox, NULL, }; @@ -2888,6 +2893,10 @@ void bdx_uncore_cpu_init(void) if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; + + /* BDX-DE doesn't have SBOX */ + if (boot_cpu_data.x86_model == 86) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; } static struct intel_uncore_type bdx_uncore_ha = { diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c index ec863b9a9f78..ec863b9a9f78 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/events/msr.c diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h index 3ea127f05d0d..ba6ef18528c9 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -586,6 +586,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1; int pebs_record_size; + int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); @@ -860,6 +861,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[]; extern struct event_constraint intel_hsw_pebs_event_constraints[]; +extern struct event_constraint intel_bdw_pebs_event_constraints[]; + extern struct event_constraint intel_skl_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); @@ -904,6 +907,8 @@ void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); +void intel_pmu_pebs_data_source_nhm(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7bfc85bbb8ff..99afb665a004 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection" /* - * This must be included *after* the definition of ALTERNATIVE due to - * <asm/arch_hweight.h> - */ -#include <asm/cpufeature.h> - -/* * Alternative instructions for different CPU types or capabilities. * * This allows to use optimized instructions even on generic binary diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 3c56ef1ae068..5e828da2e18f 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -27,15 +27,23 @@ struct amd_l3_cache { }; struct threshold_block { - unsigned int block; - unsigned int bank; - unsigned int cpu; - u32 address; - u16 interrupt_enable; - bool interrupt_capable; - u16 threshold_limit; - struct kobject kobj; - struct list_head miscj; + unsigned int block; /* Number within bank */ + unsigned int bank; /* MCA bank the block belongs to */ + unsigned int cpu; /* CPU which controls MCA bank */ + u32 address; /* MSR address for the block */ + u16 interrupt_enable; /* Enable/Disable APIC interrupt */ + bool interrupt_capable; /* Bank can generate an interrupt. */ + + u16 threshold_limit; /* + * Value upon which threshold + * interrupt is generated. + */ + + struct kobject kobj; /* sysfs object */ + struct list_head miscj; /* + * List of threshold blocks + * within a bank. + */ }; struct threshold_bank { diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c80f6b6f3da2..0899cfc8dfe8 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -6,7 +6,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <asm/processor.h> #include <asm/apicdef.h> #include <linux/atomic.h> #include <asm/fixmap.h> diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 259a7c1ef709..02e799fa43d1 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_HWEIGHT_H #define _ASM_X86_HWEIGHT_H +#include <asm/cpufeatures.h> + #ifdef CONFIG_64BIT /* popcnt %edi, %eax -- redundant REX prefix for alignment */ #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 189679aba703..f5063b6659eb 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -44,19 +44,22 @@ /* Exception table entry */ #ifdef __ASSEMBLY__ -# define _ASM_EXTABLE(from,to) \ +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ .pushsection "__ex_table","a" ; \ - .balign 8 ; \ + .balign 4 ; \ .long (from) - . ; \ .long (to) - . ; \ + .long (handler) - . ; \ .popsection -# define _ASM_EXTABLE_EX(from,to) \ - .pushsection "__ex_table","a" ; \ - .balign 8 ; \ - .long (from) - . ; \ - .long (to) - . + 0x7ffffff0 ; \ - .popsection +# define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +# define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + +# define _ASM_EXTABLE_EX(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ @@ -89,19 +92,24 @@ .endm #else -# define _ASM_EXTABLE(from,to) \ +# define _EXPAND_EXTABLE_HANDLE(x) #x +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 8\n" \ + " .balign 4\n" \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ + " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ " .popsection\n" -# define _ASM_EXTABLE_EX(from,to) \ - " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 8\n" \ - " .long (" #from ") - .\n" \ - " .long (" #to ") - . + 0x7ffffff0\n" \ - " .popsection\n" +# define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +# define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + +# define _ASM_EXTABLE_EX(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) + /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index a584e1c50918..bfb28caf97b1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -6,18 +6,17 @@ /* * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking + * And yes, this might be required on UP too when we're talking * to devices. */ #ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") +#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \ + X86_FEATURE_XMM2) ::: "memory", "cc") #else #define mb() asm volatile("mfence":::"memory") #define rmb() asm volatile("lfence":::"memory") diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index cfe3b954d5e4..7766d1cf096e 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr) * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ -static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } -static inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } @@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr) * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ -static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); @@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } @@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void change_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" @@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); } @@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) { int oldbit; @@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); } @@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) { int oldbit; @@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) { int oldbit; @@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); } @@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -static inline int variable_test_bit(long nr, volatile const unsigned long *addr) +static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) { int oldbit; @@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); * * Undefined if no bit exists, so code should check against 0 first. */ -static inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned long __ffs(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) @@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word) * * Undefined if no zero exists, so code should check against ~0UL first. */ -static inline unsigned long ffz(unsigned long word) +static __always_inline unsigned long ffz(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) @@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word) * * Undefined if no set bit exists, so code should check against 0 first. */ -static inline unsigned long __fls(unsigned long word) +static __always_inline unsigned long __fls(unsigned long word) { asm("bsr %1,%0" : "=r" (word) @@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word) * set bit if value is nonzero. The first (least significant) bit * is at position 1. */ -static inline int ffs(int x) +static __always_inline int ffs(int x) { int r; @@ -434,7 +434,7 @@ static inline int ffs(int x) * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ -static inline int fls(int x) +static __always_inline int fls(int x) { int r; diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e63aa38e85fb..61518cf79437 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size); #define mmio_flush_range(addr, size) clflush_cache_range(addr, size) -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); extern const int rodata_test_data; extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); -#else -static inline void set_kernel_text_rw(void) { } -static inline void set_kernel_text_ro(void) { } -#endif #ifdef CONFIG_DEBUG_RODATA_TEST int rodata_test(void); diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index eda81dc0f4ae..d194266acb28 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,10 +3,11 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ -#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ +#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ +#define VCLOCK_MAX 3 struct arch_clocksource_data { int vclock_mode; diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index ad19841eddfe..9733361fed6f 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -2,6 +2,7 @@ #define ASM_X86_CMPXCHG_H #include <linux/compiler.h> +#include <asm/cpufeatures.h> #include <asm/alternative.h> /* Provides LOCK_PREFIX */ /* diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ad8c9464297..68e4e8258b84 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -1,288 +1,7 @@ -/* - * Defines x86 CPU feature bits - */ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H -#ifndef _ASM_X86_REQUIRED_FEATURES_H -#include <asm/required-features.h> -#endif - -#ifndef _ASM_X86_DISABLED_FEATURES_H -#include <asm/disabled-features.h> -#endif - -#define NCAPINTS 16 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ - -/* - * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. - */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ - /* (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ - -/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ -/* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ - -/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ - -/* Other features, Linux-defined mapping, word 3 */ -/* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -/* cpu types for specific tunings: */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ -/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ -/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ - -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ - -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ -#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ - -/* - * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc, word 7. - * - * Reuse free bits when adding new feature flags! - */ - -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ - -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ - -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ - -/* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ - -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ - - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ - -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ - -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ - -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ - -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ - -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ - -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ - -/* - * BUG word(s) - */ -#define X86_BUG(x) (NCAPINTS*32 + (x)) - -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#include <asm/processor.h> #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; * is not relevant. */ #define cpu_feature_enabled(bit) \ - (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \ - cpu_has(&boot_cpu_data, bit)) + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) @@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) /* - * Do not add any more of those clumsy macros - use static_cpu_has_safe() for + * Do not add any more of those clumsy macros - use static_cpu_has() for * fast paths and boot_cpu_has() otherwise! */ -#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) -extern void warn_pre_alternatives(void); -extern bool __static_cpu_has_safe(u16 bit); - +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). - * These are only valid after alternatives have run, but will statically - * patch the target code for additional performance. + * These will statically patch the target code for additional + * performance. */ -static __always_inline __pure bool __static_cpu_has(u16 bit) -{ -#ifdef CC_HAVE_ASM_GOTO - -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS - - /* - * Catch too early usage of this before alternatives - * have run. - */ - asm_volatile_goto("1: jmp %l[t_warn]\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" - " .long 0\n" /* no replacement */ - " .word %P0\n" /* 1: do replace */ - " .byte 2b - 1b\n" /* source len */ - " .byte 0\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - /* skipping size check since replacement size = 0 */ - : : "i" (X86_FEATURE_ALWAYS) : : t_warn); - -#endif - - asm_volatile_goto("1: jmp %l[t_no]\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" - " .long 0\n" /* no replacement */ - " .word %P0\n" /* feature bit */ - " .byte 2b - 1b\n" /* source len */ - " .byte 0\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - /* skipping size check since replacement size = 0 */ - : : "i" (bit) : : t_no); - return true; - t_no: - return false; - -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS - t_warn: - warn_pre_alternatives(); - return false; -#endif - -#else /* CC_HAVE_ASM_GOTO */ - - u8 flag; - /* Open-coded due to __stringify() in ALTERNATIVE() */ - asm volatile("1: movb $0,%0\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" - " .long 3f - .\n" - " .word %P1\n" /* feature bit */ - " .byte 2b - 1b\n" /* source len */ - " .byte 4f - 3f\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .discard,\"aw\",@progbits\n" - " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "3: movb $1,%0\n" - "4:\n" - ".previous\n" - : "=qm" (flag) : "i" (bit)); - return flag; - -#endif /* CC_HAVE_ASM_GOTO */ -} - -#define static_cpu_has(bit) \ -( \ - __builtin_constant_p(boot_cpu_has(bit)) ? \ - boot_cpu_has(bit) : \ - __builtin_constant_p(bit) ? \ - __static_cpu_has(bit) : \ - boot_cpu_has(bit) \ -) - -static __always_inline __pure bool _static_cpu_has_safe(u16 bit) +static __always_inline __pure bool _static_cpu_has(u16 bit) { -#ifdef CC_HAVE_ASM_GOTO - asm_volatile_goto("1: jmp %l[t_dynamic]\n" + asm_volatile_goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" @@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) " .byte 0\n" /* repl len */ " .byte 0\n" /* pad len */ ".previous\n" - : : "i" (bit), "i" (X86_FEATURE_ALWAYS) - : : t_dynamic, t_no); + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" + : : "i" (bit), "i" (X86_FEATURE_ALWAYS), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); + t_yes: return true; t_no: return false; - t_dynamic: - return __static_cpu_has_safe(bit); -#else - u8 flag; - /* Open-coded due to __stringify() in ALTERNATIVE() */ - asm volatile("1: movb $2,%0\n" - "2:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 3f - .\n" /* repl offset */ - " .word %P2\n" /* always replace */ - " .byte 2b - 1b\n" /* source len */ - " .byte 4f - 3f\n" /* replacement len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .discard,\"aw\",@progbits\n" - " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "3: movb $0,%0\n" - "4:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 5f - .\n" /* repl offset */ - " .word %P1\n" /* feature bit */ - " .byte 4b - 3b\n" /* src len */ - " .byte 6f - 5f\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .discard,\"aw\",@progbits\n" - " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "5: movb $1,%0\n" - "6:\n" - ".previous\n" - : "=qm" (flag) - : "i" (bit), "i" (X86_FEATURE_ALWAYS)); - return (flag == 2 ? __static_cpu_has_safe(bit) : flag); -#endif /* CC_HAVE_ASM_GOTO */ } -#define static_cpu_has_safe(bit) \ +#define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ boot_cpu_has(bit) : \ - _static_cpu_has_safe(bit) \ + _static_cpu_has(bit) \ ) #else /* - * gcc 3.x is too stupid to do the static test; fall back to dynamic. + * Fall back to dynamic for gcc versions which don't support asm goto. Should be + * a minority now anyway. */ #define static_cpu_has(bit) boot_cpu_has(bit) -#define static_cpu_has_safe(bit) boot_cpu_has(bit) #endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) @@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) #define static_cpu_has_bug(bit) static_cpu_has((bit)) -#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit)) #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) #define MAX_CPU_FEATURES (NCAPINTS * 32) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h new file mode 100644 index 000000000000..074b7604bd51 --- /dev/null +++ b/arch/x86/include/asm/cpufeatures.h @@ -0,0 +1,300 @@ +#ifndef _ASM_X86_CPUFEATURES_H +#define _ASM_X86_CPUFEATURES_H + +#ifndef _ASM_X86_REQUIRED_FEATURES_H +#include <asm/required-features.h> +#endif + +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include <asm/disabled-features.h> +#endif + +/* + * Defines x86 CPU feature bits + */ +#define NCAPINTS 16 /* N 32-bit words worth of info */ +#define NBUGINTS 1 /* N 32-bit bug flags */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ + /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ + +/* + * Auxiliary flags: Linux defined - For features scattered in various + * CPUID levels like 0x6, 0xA etc, word 7. + * + * Reuse free bits when adding new feature flags! + */ + +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ + +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ + +/* Virtualization flags: Linux defined, word 8 */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ + +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ + + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ + +/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ + +/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ + +/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ + +/* + * BUG word(s) + */ +#define X86_BUG(x) (NCAPINTS*32 + (x)) + +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ + +#ifdef CONFIG_X86_32 +/* + * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional + * to avoid confusion. + */ +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#endif + +#endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 278441f39856..eb5deb42484d 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -98,4 +98,27 @@ struct desc_ptr { #endif /* !__ASSEMBLY__ */ +/* Access rights as returned by LAR */ +#define AR_TYPE_RODATA (0 * (1 << 9)) +#define AR_TYPE_RWDATA (1 * (1 << 9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9)) +#define AR_TYPE_XOCODE (4 * (1 << 9)) +#define AR_TYPE_XRCODE (5 * (1 << 9)) +#define AR_TYPE_XOCODE_CONF (6 * (1 << 9)) +#define AR_TYPE_XRCODE_CONF (7 * (1 << 9)) +#define AR_TYPE_MASK (7 * (1 << 9)) + +#define AR_DPL0 (0 * (1 << 13)) +#define AR_DPL3 (3 * (1 << 13)) +#define AR_DPL_MASK (3 * (1 << 13)) + +#define AR_A (1 << 8) /* "Accessed" */ +#define AR_S (1 << 12) /* If clear, "System" segment */ +#define AR_P (1 << 15) /* "Present" */ +#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */ +#define AR_L (1 << 21) /* "Long mode" for code segments */ +#define AR_DB (1 << 22) /* D/B, effect depends on type */ +#define AR_G (1 << 23) /* "Granularity" (limit in pages) */ + #endif /* _ASM_X86_DESC_DEFS_H */ diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 535192f6bfad..3c69fed215c5 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len) /* Use early IO mappings for DMI because it's initialized early */ #define dmi_early_remap early_ioremap #define dmi_early_unmap early_iounmap -#define dmi_remap ioremap +#define dmi_remap ioremap_cache #define dmi_unmap iounmap #endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6d7d0e52ed5a..8554f960e21b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve); extern int fixmaps_set; extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; +#define kmap_prot PAGE_KERNEL extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0fd440df63f1..a2124343edf5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -17,6 +17,7 @@ #include <asm/user.h> #include <asm/fpu/api.h> #include <asm/fpu/xstate.h> +#include <asm/cpufeature.h> /* * High level FPU state handling functions: @@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void); */ static __always_inline __pure bool use_eager_fpu(void) { - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); + return static_cpu_has(X86_FEATURE_EAGER_FPU); } static __always_inline __pure bool use_xsaveopt(void) { - return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); + return static_cpu_has(X86_FEATURE_XSAVEOPT); } static __always_inline __pure bool use_xsave(void) { - return static_cpu_has_safe(X86_FEATURE_XSAVE); + return static_cpu_has(X86_FEATURE_XSAVE); } static __always_inline __pure bool use_fxsr(void) { - return static_cpu_has_safe(X86_FEATURE_FXSR); + return static_cpu_has(X86_FEATURE_FXSR); } /* @@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) WARN_ON(system_state != SYSTEM_BOOTING); - if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + if (static_cpu_has(X86_FEATURE_XSAVES)) XSTATE_OP(XSAVES, xstate, lmask, hmask, err); else XSTATE_OP(XSAVE, xstate, lmask, hmask, err); @@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) WARN_ON(system_state != SYSTEM_BOOTING); - if (static_cpu_has_safe(X86_FEATURE_XSAVES)) + if (static_cpu_has(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); @@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) * pending. Clear the x87 state here by setting it to fixed values. * "m" is a random variable that should be in L1. */ - if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { asm volatile( "fnclex\n\t" "emms\n\t" @@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = new_fpu->fpstate_active && + fpu.preload = static_cpu_has(X86_FEATURE_FPU) && + new_fpu->fpstate_active && (use_eager_fpu() || new_fpu->counter > 5); if (old_fpu->fpregs_active) { diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index af30fdeb140d..f23cd8c80b1c 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -20,16 +20,15 @@ /* Supported features which support lazy state saving */ #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE) - -/* Supported features which require eager state saving */ -#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM) +/* Supported features which require eager state saving */ +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) + /* All currently supported features */ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 793179cf8e21..6e4d170726b7 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -1,23 +1,44 @@ -#ifdef __ASSEMBLY__ +#ifndef _ASM_X86_FRAME_H +#define _ASM_X86_FRAME_H #include <asm/asm.h> -/* The annotation hides the frame from the unwinder and makes it look - like a ordinary ebp save/restore. This avoids some special cases for - frame pointer later */ +/* + * These are stack frame creation macros. They should be used by every + * callable non-leaf asm function to make kernel stack traces more reliable. + */ + #ifdef CONFIG_FRAME_POINTER - .macro FRAME - __ASM_SIZE(push,) %__ASM_REG(bp) - __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) - .endm - .macro ENDFRAME - __ASM_SIZE(pop,) %__ASM_REG(bp) - .endm -#else - .macro FRAME - .endm - .macro ENDFRAME - .endm -#endif - -#endif /* __ASSEMBLY__ */ + +#ifdef __ASSEMBLY__ + +.macro FRAME_BEGIN + push %_ASM_BP + _ASM_MOV %_ASM_SP, %_ASM_BP +.endm + +.macro FRAME_END + pop %_ASM_BP +.endm + +#else /* !__ASSEMBLY__ */ + +#define FRAME_BEGIN \ + "push %" _ASM_BP "\n" \ + _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n" + +#define FRAME_END "pop %" _ASM_BP "\n" + +#endif /* __ASSEMBLY__ */ + +#define FRAME_OFFSET __ASM_SEL(4, 8) + +#else /* !CONFIG_FRAME_POINTER */ + +#define FRAME_BEGIN +#define FRAME_END +#define FRAME_OFFSET 0 + +#endif /* CONFIG_FRAME_POINTER */ + +#endif /* _ASM_X86_FRAME_H */ diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h index cd2ce4068441..ebea2c9d2cdc 100644 --- a/arch/x86/include/asm/imr.h +++ b/arch/x86/include/asm/imr.h @@ -53,7 +53,7 @@ #define IMR_MASK (IMR_ALIGN - 1) int imr_add_range(phys_addr_t base, size_t size, - unsigned int rmask, unsigned int wmask, bool lock); + unsigned int rmask, unsigned int wmask); int imr_remove_range(phys_addr_t base, size_t size); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index cfc9a0d2d07c..a4fe16e42b7b 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void) cpu_relax(); } -static inline void -__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - __xapic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector, dest); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - native_apic_mem_write(APIC_ICR, cfg); -} +void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); /* * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void - __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) -{ - unsigned long cfg; - - /* - * Wait for idle. - */ - if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); - else - __xapic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - native_apic_mem_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector, dest); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - native_apic_mem_write(APIC_ICR, cfg); -} +void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); extern void default_send_IPI_single(int cpu, int vector); extern void default_send_IPI_single_phys(int cpu, int vector); diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 78162f8e248b..d0afb05c84fc 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -1,7 +1,7 @@ #ifndef _ASM_IRQ_WORK_H #define _ASM_IRQ_WORK_H -#include <asm/processor.h> +#include <asm/cpufeature.h> static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c1adf33fdd0d..bc62e7cbf1b1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -#ifdef CONFIG_DEBUG_RODATA #define KVM_HYPERCALL \ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) -#else -/* On AMD processors, vmcall will generate a trap that we will - * then rewrite to the appropriate instruction. - */ -#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index 19c099afa861..e795f5274217 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -41,7 +41,7 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } #else -#error Live patching support is disabled; check CONFIG_LIVEPATCH +#error Include linux/livepatch.h, not asm/livepatch.h #endif #endif /* _ASM_X86_LIVEPATCH_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 2ea4527e462f..92b6f651fa4f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,8 +40,20 @@ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ /* AMD-specific bits */ -#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ +#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ + +/* + * McaX field if set indicates a given bank supports MCA extensions: + * - Deferred error interrupt type is specifiable by bank. + * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers, + * But should not be used to determine MSR numbers. + * - TCC bit is present in MCx_STATUS. + */ +#define MCI_CONFIG_MCAX 0x1 +#define MCI_IPID_MCATYPE 0xFFFF0000 +#define MCI_IPID_HWID 0xFFF /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is @@ -91,6 +103,16 @@ #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" +/* AMD Scalable MCA */ +#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 +#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 +#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 +#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a +#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) +#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) + /* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external @@ -113,6 +135,7 @@ struct mca_config { bool ignore_ce; bool disabled; bool ser; + bool recovery; bool bios_cmci_threshold; u8 banks; s8 bootlog; @@ -287,4 +310,49 @@ struct cper_sec_mem_err; extern void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); +/* + * Enumerate new IP types and HWID values in AMD processors which support + * Scalable MCA. + */ +#ifdef CONFIG_X86_MCE_AMD +enum amd_ip_types { + SMCA_F17H_CORE = 0, /* Core errors */ + SMCA_DF, /* Data Fabric */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ + SMCA_SMU, /* System Management Unit */ + N_AMD_IP_TYPES +}; + +struct amd_hwid { + const char *name; + unsigned int hwid; +}; + +extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES]; + +enum amd_core_mca_blocks { + SMCA_LS = 0, /* Load Store */ + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 cache */ + SMCA_DE, /* Decoder unit */ + RES, /* Reserved */ + SMCA_EX, /* Execution unit */ + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 cache */ + N_CORE_MCA_BLOCKS +}; + +extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS]; + +enum amd_df_mca_blocks { + SMCA_CS = 0, /* Coherent Slave */ + SMCA_PIE, /* Power management, Interrupts, etc */ + N_DF_BLOCKS +}; + +extern const char * const amd_df_mcablock_names[N_DF_BLOCKS]; +#endif + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 1e1b07a5a738..9d3a96c4da78 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -3,6 +3,7 @@ #include <asm/cpu.h> #include <linux/earlycpio.h> +#include <linux/initrd.h> #define native_rdmsr(msr, val1, val2) \ do { \ @@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { } static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif + +static inline unsigned long get_initrd_start(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + return initrd_start; +#else + return 0; +#endif +} + +static inline unsigned long get_initrd_start_addr(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + + return (unsigned long)__pa_nodebug(*initrd_start_p); +#else + return get_initrd_start(); +#endif +#else /* CONFIG_BLK_DEV_INITRD */ + return 0; +#endif +} + #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 8559b0102ea1..603417f8dd6c 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -40,7 +40,6 @@ struct extended_sigtable { #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode_intel *)mc)->hdr.datasize ? \ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 55234d5e7160..1ea0baef1175 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -19,7 +19,8 @@ typedef struct { #endif struct mutex lock; - void __user *vdso; + void __user *vdso; /* vdso base address */ + const struct vdso_image *vdso_image; /* vdso image in use */ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ } mm_context_t; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b05402ef3b84..984ab75bf621 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1,7 +1,12 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H -/* CPU model specific register (MSR) numbers */ +/* + * CPU model specific register (MSR) numbers. + * + * Do not add new entries to this file unless the definitions are shared + * between multiple compilation units. + */ /* x86-64 specific MSRs */ #define MSR_EFER 0xc0000080 /* extended feature register */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index c70689b5e5aa..0deeb2d26df7 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -3,6 +3,8 @@ #include <linux/sched.h> +#include <asm/cpufeature.h> + #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 46873fbd44e1..d08eacd298c2 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); +extern bool mp_should_keep_irq(struct device *dev); + struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 7bcb861a04e5..5a2ed3ed2f26 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -165,6 +165,7 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_ASIF BIT_ULL(60) #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) /* * IBS cpuid feature detection diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2d5a50cb61a2..983738ac014c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -13,7 +13,7 @@ struct vm86; #include <asm/types.h> #include <uapi/asm/sigcontext.h> #include <asm/current.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> @@ -24,7 +24,6 @@ struct vm86; #include <asm/fpu/types.h> #include <linux/personality.h> -#include <linux/cpumask.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/math64.h> @@ -129,6 +128,8 @@ struct cpuinfo_x86 { u16 booted_cores; /* Physical processor id: */ u16 phys_proc_id; + /* Logical processor id: */ + u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; /* Compute unit id */ @@ -298,10 +299,13 @@ struct tss_struct { */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; +#ifdef CONFIG_X86_32 /* - * Space for the temporary SYSENTER stack: + * Space for the temporary SYSENTER stack. */ + unsigned long SYSENTER_stack_canary; unsigned long SYSENTER_stack[64]; +#endif } ____cacheline_aligned; @@ -766,7 +770,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); * Return saved PC of a blocked thread. * What is this good for? it will be always the scheduler or ret_from_fork. */ -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) +#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index a4a77286cb1d..9b9b30b19441 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -7,12 +7,23 @@ void syscall_init(void); +#ifdef CONFIG_X86_64 void entry_SYSCALL_64(void); -void entry_SYSCALL_compat(void); +#endif + +#ifdef CONFIG_X86_32 void entry_INT80_32(void); -void entry_INT80_compat(void); void entry_SYSENTER_32(void); +void __begin_SYSENTER_singlestep_region(void); +void __end_SYSENTER_singlestep_region(void); +#endif + +#ifdef CONFIG_IA32_EMULATION void entry_SYSENTER_compat(void); +void __end_entry_SYSENTER_compat(void); +void entry_SYSCALL_compat(void); +void entry_INT80_compat(void); +#endif void x86_configure_nx(void); void x86_report_nx(void); diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 0a5242428659..13b6cdd0af57 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -7,7 +7,7 @@ extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; #endif diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 89db46752a8f..452c88b8ad06 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -13,7 +13,6 @@ X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask); diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index ba665ebd17bb..db333300bd4b 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -15,7 +15,7 @@ #include <linux/stringify.h> #include <asm/nops.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> /* "Raw" instruction opcodes */ #define __ASM_CLAC .byte 0x0f,0x01,0xca diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index dfcf0727623b..20a3de5cb3b0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -16,7 +16,6 @@ #endif #include <asm/thread_info.h> #include <asm/cpumask.h> -#include <asm/cpufeature.h> extern int smp_num_siblings; extern unsigned int num_processors; diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index ff8b9a17dc4b..90dbbd9666d4 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -78,6 +78,19 @@ int strcmp(const char *cs, const char *ct); #define memset(s, c, n) __memset(s, c, n) #endif +/** + * memcpy_mcsafe - copy memory with indication if a machine check happened + * + * @dst: destination address + * @src: source address + * @cnt: number of bytes to copy + * + * Low level memory copy function that catches machine checks + * + * Return 0 for success, -EFAULT for fail + */ +int memcpy_mcsafe(void *dst, const void *src, size_t cnt); + #endif /* __KERNEL__ */ #endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c7b551028740..82866697fcf1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -49,7 +49,7 @@ */ #ifndef __ASSEMBLY__ struct task_struct; -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <linux/atomic.h> struct thread_info { @@ -134,10 +134,13 @@ struct thread_info { #define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_X32 (1 << TIF_X32) -/* work to do in syscall_trace_enter() */ +/* + * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for + * enter_from_user_mode() + */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) /* work to do on any return to user space */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6df2029405a3..c24b4224d439 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -5,8 +5,57 @@ #include <linux/sched.h> #include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/special_insns.h> +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void) { unsigned long flags; + if (static_cpu_has(X86_FEATURE_INVPCID)) { + /* + * Using INVPCID is considerably faster than a pair of writes + * to CR4 sandwiched inside an IRQ flag save/restore. + */ + invpcid_flush_all(); + return; + } + /* * Read-modify-write to CR4 - protect it from preemption and * from interrupts. (Use the raw variant because this code can diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0fb46482dfde..7f991bd5031b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { } extern const struct cpumask *cpu_coregroup_mask(int cpu); +#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #ifdef ENABLE_TOPO_DEFINES #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +extern unsigned int __max_logical_packages; +#define topology_max_packages() (__max_logical_packages) +int topology_update_package_map(unsigned int apicid, unsigned int cpu); +extern int topology_phys_to_logical_pkg(unsigned int pkg); +#else +#define topology_max_packages() (1) +static inline int +topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 6d7c5479bcea..174c4212780a 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void) return rdtsc(); } +extern struct system_counterval_t convert_art_to_tsc(cycle_t art); + extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a4a30e4b2d34..c0f27d7ea7ff 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un likely(!__range_not_ok(addr, size, user_addr_max())) /* - * The exception table consists of pairs of addresses relative to the - * exception table enty itself: the first is the address of an - * instruction that is allowed to fault, and the second is the address - * at which the program should continue. No registers are modified, - * so it is entirely up to the continuation code to figure out what to - * do. + * The exception table consists of triples of addresses relative to the + * exception table entry itself. The first address is of an instruction + * that is allowed to fault, the second is the target at which the program + * should continue. The third is a handler function to deal with the fault + * caused by the instruction in the first field. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un */ struct exception_table_entry { - int insn, fixup; + int insn, fixup, handler; }; /* This is not the generic standard exception_table_entry format */ #define ARCH_HAS_SORT_EXTABLE #define ARCH_HAS_SEARCH_EXTABLE -extern int fixup_exception(struct pt_regs *regs); +extern int fixup_exception(struct pt_regs *regs, int trapnr); +extern bool ex_has_fault_handler(unsigned long ip); extern int early_fixup_exception(unsigned long *ip); /* diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index f5dcb5204dcd..3fe0eac59462 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -48,20 +48,28 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) switch (n) { case 1: + __uaccess_begin(); __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret, 1); + __uaccess_end(); return ret; case 2: + __uaccess_begin(); __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret, 2); + __uaccess_end(); return ret; case 4: + __uaccess_begin(); __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); + __uaccess_end(); return ret; case 8: + __uaccess_begin(); __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret, 8); + __uaccess_end(); return ret; } } @@ -103,13 +111,19 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) switch (n) { case 1: + __uaccess_begin(); __get_user_size(*(u8 *)to, from, 1, ret, 1); + __uaccess_end(); return ret; case 2: + __uaccess_begin(); __get_user_size(*(u16 *)to, from, 2, ret, 2); + __uaccess_end(); return ret; case 4: + __uaccess_begin(); __get_user_size(*(u32 *)to, from, 4, ret, 4); + __uaccess_end(); return ret; } } @@ -148,13 +162,19 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: + __uaccess_begin(); __get_user_size(*(u8 *)to, from, 1, ret, 1); + __uaccess_end(); return ret; case 2: + __uaccess_begin(); __get_user_size(*(u16 *)to, from, 2, ret, 2); + __uaccess_end(); return ret; case 4: + __uaccess_begin(); __get_user_size(*(u32 *)to, from, 4, ret, 4); + __uaccess_end(); return ret; } } @@ -170,13 +190,19 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, switch (n) { case 1: + __uaccess_begin(); __get_user_size(*(u8 *)to, from, 1, ret, 1); + __uaccess_end(); return ret; case 2: + __uaccess_begin(); __get_user_size(*(u16 *)to, from, 2, ret, 2); + __uaccess_end(); return ret; case 4: + __uaccess_begin(); __get_user_size(*(u32 *)to, from, 4, ret, 4); + __uaccess_end(); return ret; } } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index b89c34c4019b..307698688fa1 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -8,7 +8,7 @@ #include <linux/errno.h> #include <linux/lockdep.h> #include <asm/alternative.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/page.h> /* diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index deabaf9759b6..43dc55be524e 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -13,9 +13,6 @@ struct vdso_image { void *data; unsigned long size; /* Always a multiple of PAGE_SIZE */ - /* text_mapping.pages is big enough for data/size page pointers */ - struct vm_special_mapping text_mapping; - unsigned long alt, alt_len; long sym_vvar_start; /* Negative offset to the vvar area */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index f556c4843aa1..e728699db774 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -37,6 +37,12 @@ struct vsyscall_gtod_data { }; extern struct vsyscall_gtod_data vsyscall_gtod_data; +extern int vclocks_used; +static inline bool vclock_was_used(int vclock) +{ + return READ_ONCE(vclocks_used) & (1 << vclock); +} + static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) { unsigned ret; diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 968d57dd54c9..f320ee32d5a1 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -57,7 +57,7 @@ static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, { if (xen_pci_frontend && xen_pci_frontend->enable_msi) return xen_pci_frontend->enable_msi(dev, vectors); - return -ENODEV; + return -ENOSYS; } static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) { @@ -69,7 +69,7 @@ static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, { if (xen_pci_frontend && xen_pci_frontend->enable_msix) return xen_pci_frontend->enable_msix(dev, vectors, nvec); - return -ENODEV; + return -ENOSYS; } static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) { diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index d485232f1e9f..62d4111c1c54 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -256,7 +256,7 @@ struct sigcontext_64 { __u16 cs; __u16 gs; __u16 fs; - __u16 __pad0; + __u16 ss; __u64 err; __u64 trapno; __u64 oldmask; @@ -341,9 +341,37 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; + + /* + * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), + * Linux saved and restored fs and gs in these slots. This + * was counterproductive, as fsbase and gsbase were never + * saved, so arch_prctl was presumably unreliable. + * + * These slots should never be reused without extreme caution: + * + * - Some DOSEMU versions stash fs and gs in these slots manually, + * thus overwriting anything the kernel expects to be preserved + * in these slots. + * + * - If these slots are ever needed for any other purpose, + * there is some risk that very old 64-bit binaries could get + * confused. I doubt that many such binaries still work, + * though, since the same patch in 2.5.64 also removed the + * 64-bit set_thread_area syscall, so it appears that there + * is no TLS API beyond modify_ldt that works in both pre- + * and post-2.5.64 kernels. + * + * If the kernel ever adds explicit fs, gs, fsbase, and gsbase + * save/restore, it will most likely need to be opt-in and use + * different context slots. + */ __u16 gs; __u16 fs; - __u16 __pad0; + union { + __u16 ss; /* If UC_SIGCONTEXT_SS */ + __u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */ + }; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h index b7c29c8017f2..e3d1ec90616e 100644 --- a/arch/x86/include/uapi/asm/ucontext.h +++ b/arch/x86/include/uapi/asm/ucontext.h @@ -1,11 +1,54 @@ #ifndef _ASM_X86_UCONTEXT_H #define _ASM_X86_UCONTEXT_H -#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state - * information in the memory layout pointed - * by the fpstate pointer in the ucontext's - * sigcontext struct (uc_mcontext). - */ +/* + * Indicates the presence of extended state information in the memory + * layout pointed by the fpstate pointer in the ucontext's sigcontext + * struct (uc_mcontext). + */ +#define UC_FP_XSTATE 0x1 + +#ifdef __x86_64__ +/* + * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on + * kernels that save SS in the sigcontext. All kernels that set + * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp + * regardless of SS (i.e. they implement espfix). + * + * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS + * when delivering a signal that came from 64-bit code. + * + * Sigreturn restores SS as follows: + * + * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || + * saved CS is not 64-bit) + * new SS = saved SS (will fail IRET and signal if invalid) + * else + * new SS = a flat 32-bit data segment + * + * This behavior serves three purposes: + * + * - Legacy programs that construct a 64-bit sigcontext from scratch + * with zero or garbage in the SS slot (e.g. old CRIU) and call + * sigreturn will still work. + * + * - Old DOSEMU versions sometimes catch a signal from a segmented + * context, delete the old SS segment (with modify_ldt), and change + * the saved CS to a 64-bit segment. These DOSEMU versions expect + * sigreturn to send them back to 64-bit mode without killing them, + * despite the fact that the SS selector when the signal was raised is + * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel + * will fix up SS for these DOSEMU versions. + * + * - Old and new programs that catch a signal and return without + * modifying the saved context will end up in exactly the state they + * started in, even if they were running in a segmented context when + * the signal was raised.. Old kernels would lose track of the + * previous SS value. + */ +#define UC_SIGCONTEXT_SS 0x2 +#define UC_STRICT_RESTORE_SS 0x4 +#endif #include <asm-generic/ucontext.h> diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d1daead5fcdd..adb3eaf8fe2a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include <asm/cacheflush.h> #include <asm/realmode.h> +#include <linux/ftrace.h> #include "../../realmode/rm/wakeup.h" #include "sleep.h" @@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ + /* + * Pause/unpause graph tracing around do_suspend_lowlevel as it has + * inconsistent call/return info after it jumps to the wakeup vector. + */ + pause_graph_tracing(); do_suspend_lowlevel(); + unpause_graph_tracing(); return 0; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3b892bbdd902..d356987a04e9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2078,6 +2078,20 @@ int generic_processor_info(int apicid, int version) cpu = cpumask_next_zero(-1, cpu_present_mask); /* + * This can happen on physical hotplug. The sanity check at boot time + * is done from native_smp_prepare_cpus() after num_possible_cpus() is + * established. + */ + if (topology_update_package_map(apicid, cpu) < 0) { + int thiscpu = max + disabled_cpus; + + pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n", + thiscpu, apicid); + disabled_cpus++; + return -ENOSPC; + } + + /* * Validate version */ if (version == 0x0) { diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 9968f30cca3e..76f89e2b245a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -53,7 +53,7 @@ void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void _flat_send_IPI_mask(unsigned long mask, int vector) +static void _flat_send_IPI_mask(unsigned long mask, int vector) { unsigned long flags; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c80c02c6ec49..ab5c2c685a3c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) unsigned long value; unsigned int id = (x >> 24) & 0xff; - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { rdmsrl(MSR_FAM10H_NODE_ID, value); id |= (value << 2) & 0xff00; } @@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) this_cpu_write(cpu_llc_id, node); /* Account for nodes per socket in multi-core-module processors */ - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { rdmsrl(MSR_FAM10H_NODE_ID, val); nodes = ((val >> 3) & 7) + 1; } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index eb45fc9b6124..28bde88b0085 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -18,6 +18,66 @@ #include <asm/proto.h> #include <asm/ipi.h> +void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + __xapic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + +/* + * This is used to send an IPI with no shorthand notation (the destination is + * specified in bits 56 to 63 of the ICR). + */ +void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) +{ + unsigned long cfg; + + /* + * Wait for idle. + */ + if (unlikely(vector == NMI_VECTOR)) + safe_apic_wait_icr_idle(); + else + __xapic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + native_apic_mem_write(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); +} + void default_send_IPI_single_phys(int cpu, int vector) { unsigned long flags; diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 84a7524b202c..5c042466f274 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -59,7 +59,6 @@ void common(void) { #ifdef CONFIG_PARAVIRT BLANK(); - OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6ce39025f467..ecdc1d217dc0 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -7,7 +7,7 @@ #include <linux/lguest.h> #include "../../../drivers/lguest/lg.h" -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include <asm/syscalls_32.h> }; @@ -52,6 +52,11 @@ void foo(void) DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - offsetofend(struct tss_struct, SYSENTER_stack)); + /* Offset from cpu_tss to SYSENTER_stack */ + OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); + /* Size of SYSENTER_stack */ + DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f2edafb5f24e..d875f97d4e0b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -4,17 +4,11 @@ #include <asm/ia32.h> -#define __SYSCALL_64(nr, sym, compat) [nr] = 1, -#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) [nr] = 1, -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif +#define __SYSCALL_64(nr, sym, qual) [nr] = 1, static char syscalls_64[] = { #include <asm/syscalls_64.h> }; -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls_ia32[] = { #include <asm/syscalls_32.h> }; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 58031303e304..0d373d7affc8 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o - -ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o -endif -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o - -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ - perf_event_intel_uncore_snb.o \ - perf_event_intel_uncore_snbep.o \ - perf_event_intel_uncore_nhmex.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o -endif - - obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o @@ -64,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ -cpufeature = $(src)/../../include/asm/cpufeature.h +cpufeature = $(src)/../../include/asm/cpufeatures.h targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index ce197bb7c129..1661d8ec9280 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,7 +1,7 @@ #include <linux/bitops.h> #include <linux/kernel.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/e820.h> #include <asm/mtrr.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7c3120f5177b..62590aa064c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s) } __setup("nompx", x86_mpx_setup); +static int __init x86_noinvpcid_setup(char *s) +{ + /* noinvpcid doesn't accept parameters */ + if (s) + return -EINVAL; + + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_INVPCID)) + return 0; + + setup_clear_cpu_cap(X86_FEATURE_INVPCID); + pr_info("noinvpcid: INVPCID feature disabled\n"); + return 0; +} +early_param("noinvpcid", x86_noinvpcid_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; @@ -801,6 +817,31 @@ static void detect_nopl(struct cpuinfo_x86 *c) #else set_cpu_cap(c, X86_FEATURE_NOPL); #endif + + /* + * ESPFIX is a strange bug. All real CPUs have it. Paravirt + * systems that run Linux at CPL > 0 may or may not have the + * issue, but, even if they have the issue, there's absolutely + * nothing we can do about it because we can't use the real IRET + * instruction. + * + * NB: For the time being, only 32-bit kernels support + * X86_BUG_ESPFIX as such. 64-bit kernels directly choose + * whether to apply espfix using paravirt hooks. If any + * non-paravirt system ever shows up that does *not* have the + * ESPFIX issue, we can change this. + */ +#ifdef CONFIG_X86_32 +#ifdef CONFIG_PARAVIRT + do { + extern void native_iret(void); + if (pv_cpu_ops.iret == native_iret) + set_cpu_bug(c, X86_BUG_ESPFIX); + } while (0); +#else + set_cpu_bug(c, X86_BUG_ESPFIX); +#endif +#endif } static void generic_identify(struct cpuinfo_x86 *c) @@ -975,6 +1016,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif + /* The boot/hotplug time assigment got cleared, restore it */ + c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); } /* @@ -1473,20 +1516,6 @@ void cpu_init(void) } #endif -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS -void warn_pre_alternatives(void) -{ - WARN(1, "You're using static_cpu_has before alternatives have run!\n"); -} -EXPORT_SYMBOL_GPL(warn_pre_alternatives); -#endif - -inline bool __static_cpu_has_safe(u16 bit) -{ - return boot_cpu_has(bit); -} -EXPORT_SYMBOL_GPL(__static_cpu_has_safe); - static void bsp_resume(void) { if (this_cpu->c_bsp_resume) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 187bb583d0df..6adef9cac23e 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -8,6 +8,7 @@ #include <linux/timer.h> #include <asm/pci-direct.h> #include <asm/tsc.h> +#include <asm/cpufeature.h> #include "cpu.h" diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 05b9211ea0f7..1f7fdb91a818 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -8,7 +8,7 @@ #include <linux/module.h> #include <linux/uaccess.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/pgtable.h> #include <asm/msr.h> #include <asm/bugs.h> @@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6ed779efff26..de6626c18e42 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -14,7 +14,7 @@ #include <linux/sysfs.h> #include <linux/pci.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/amd_nb.h> #include <asm/smp.h> diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index afa9f0d487ea..fbb5e90557a5 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -1,5 +1,5 @@ #include <asm/cpu_device_id.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/slab.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 9c682c222071..5119766d9889 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/debugfs.h> #include <asm/mce.h> +#include <asm/uaccess.h> #include "mce-internal.h" @@ -29,7 +30,7 @@ * panic situations) */ -enum context { IN_KERNEL = 1, IN_USER = 2 }; +enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 }; enum ser { SER_REQUIRED = 1, NO_SER = 2 }; enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; @@ -48,6 +49,7 @@ static struct severity { #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } #define KERNEL .context = IN_KERNEL #define USER .context = IN_USER +#define KERNEL_RECOV .context = IN_KERNEL_RECOV #define SER .ser = SER_REQUIRED #define NOSER .ser = NO_SER #define EXCP .excp = EXCP_CONTEXT @@ -87,6 +89,10 @@ static struct severity { EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) ), MCESEV( + PANIC, "In kernel and no restart IP", + EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0) + ), + MCESEV( DEFERRED, "Deferred error", NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) ), @@ -123,6 +129,11 @@ static struct severity { MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) ), MCESEV( + AR, "Action required: data load in error recoverable area of kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + KERNEL_RECOV + ), + MCESEV( AR, "Action required: data load error in a user process", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), USER @@ -170,6 +181,9 @@ static struct severity { ) /* always matches. keep at end */ }; +#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \ + (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) + /* * If mcgstatus indicated that ip/cs on the stack were * no good, then "m->cs" will be zero and we will have @@ -183,7 +197,11 @@ static struct severity { */ static int error_context(struct mce *m) { - return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; + if ((m->cs & 3) == 3) + return IN_USER; + if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) + return IN_KERNEL_RECOV; + return IN_KERNEL; } /* diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a006f4cd792b..f0c921b03e42 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear) } } +static int do_memory_failure(struct mce *m) +{ + int flags = MF_ACTION_REQUIRED; + int ret; + + pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); + if (!(m->mcgstatus & MCG_STATUS_RIPV)) + flags |= MF_MUST_KILL; + ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); + if (ret) + pr_err("Memory error not recovered"); + return ret; +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); char *msg = "Unknown"; - u64 recover_paddr = ~0ull; - int flags = MF_ACTION_REQUIRED; int lmce = 0; /* If this CPU is offline, just bail out. */ @@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code) } /* - * At insane "tolerant" levels we take no action. Otherwise - * we only die if we have no other choice. For less serious - * issues we try to recover, or limit damage to the current - * process. + * If tolerant is at an insane level we drop requests to kill + * processes and continue even when there is no way out. */ - if (cfg->tolerant < 3) { - if (no_way_out) - mce_panic("Fatal machine check on current CPU", &m, msg); - if (worst == MCE_AR_SEVERITY) { - recover_paddr = m.addr; - if (!(m.mcgstatus & MCG_STATUS_RIPV)) - flags |= MF_MUST_KILL; - } else if (kill_it) { - force_sig(SIGBUS, current); - } - } + if (cfg->tolerant == 3) + kill_it = 0; + else if (no_way_out) + mce_panic("Fatal machine check on current CPU", &m, msg); if (worst > 0) mce_report_event(regs); @@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code) out: sync_core(); - if (recover_paddr == ~0ull) - goto done; + if (worst != MCE_AR_SEVERITY && !kill_it) + goto out_ist; - pr_err("Uncorrected hardware memory error in user-access at %llx", - recover_paddr); - /* - * We must call memory_failure() here even if the current process is - * doomed. We still need to mark the page as poisoned and alert any - * other users of the page. - */ - ist_begin_non_atomic(regs); - local_irq_enable(); - if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { - pr_err("Memory error not recovered"); - force_sig(SIGBUS, current); + /* Fault was in user mode and we need to take some action */ + if ((m.cs & 3) == 3) { + ist_begin_non_atomic(regs); + local_irq_enable(); + + if (kill_it || do_memory_failure(&m)) + force_sig(SIGBUS, current); + local_irq_disable(); + ist_end_non_atomic(); + } else { + if (!fixup_exception(regs, X86_TRAP_MC)) + mce_panic("Failed kernel mode recovery", &m, NULL); } - local_irq_disable(); - ist_end_non_atomic(); -done: + +out_ist: ist_exit(regs); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1576,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; + /* + * MCG_CAP.MCG_SER_P is necessary but not sufficient to know + * whether this processor will actually generate recoverable + * machine checks. Check to see if this is an E7 model Xeon. + * We can't do a model number check because E5 and E7 use the + * same model number. E5 doesn't support recovery, E7 does. + */ + if (mca_cfg.recovery || (mca_cfg.ser && + !strncmp(c->x86_model_id, + "Intel(R) Xeon(R) CPU E7-", 24))) + set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); } if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; @@ -1617,10 +1630,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) case X86_VENDOR_AMD: { u32 ebx = cpuid_ebx(0x80000007); - mce_amd_feature_init(c); mce_flags.overflow_recov = !!(ebx & BIT(0)); mce_flags.succor = !!(ebx & BIT(1)); mce_flags.smca = !!(ebx & BIT(3)); + mce_amd_feature_init(c); break; } @@ -2028,6 +2041,8 @@ static int __init mcheck_enable(char *str) cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) cfg->bios_cmci_threshold = true; + else if (!strcmp(str, "recovery")) + cfg->recovery = true; else if (isdigit(str[0])) { if (get_option(&str, &cfg->tolerant) == 2) get_option(&str, &(cfg->monarch_timeout)); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e99b15077e94..9d656fd436ef 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1,5 +1,5 @@ /* - * (c) 2005-2015 Advanced Micro Devices, Inc. + * (c) 2005-2016 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -28,7 +28,7 @@ #include <asm/msr.h> #include <asm/trace/irq_vectors.h> -#define NR_BLOCKS 9 +#define NR_BLOCKS 5 #define THRESHOLD_MAX 0xFFF #define INT_TYPE_APIC 0x00020000 #define MASK_VALID_HI 0x80000000 @@ -49,6 +49,19 @@ #define DEF_LVT_OFF 0x2 #define DEF_INT_TYPE_APIC 0x2 +/* Scalable MCA: */ + +/* Threshold LVT offset is at MSR0xC0000410[15:12] */ +#define SMCA_THR_LVT_OFF 0xF000 + +/* + * OS is required to set the MCAX bit to acknowledge that it is now using the + * new MSR ranges and new registers under each bank. It also means that the OS + * will configure deferred errors in the new MCx_CONFIG register. If the bit is + * not set, uncorrectable errors will cause a system panic. + */ +#define SMCA_MCAX_EN_OFF 0x1 + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -58,6 +71,35 @@ static const char * const th_names[] = { "execution_unit", }; +/* Define HWID to IP type mappings for Scalable MCA */ +struct amd_hwid amd_hwids[] = { + [SMCA_F17H_CORE] = { "f17h_core", 0xB0 }, + [SMCA_DF] = { "data_fabric", 0x2E }, + [SMCA_UMC] = { "umc", 0x96 }, + [SMCA_PB] = { "param_block", 0x5 }, + [SMCA_PSP] = { "psp", 0xFF }, + [SMCA_SMU] = { "smu", 0x1 }, +}; +EXPORT_SYMBOL_GPL(amd_hwids); + +const char * const amd_core_mcablock_names[] = { + [SMCA_LS] = "load_store", + [SMCA_IF] = "insn_fetch", + [SMCA_L2_CACHE] = "l2_cache", + [SMCA_DE] = "decode_unit", + [RES] = "", + [SMCA_EX] = "execution_unit", + [SMCA_FP] = "floating_point", + [SMCA_L3_CACHE] = "l3_cache", +}; +EXPORT_SYMBOL_GPL(amd_core_mcablock_names); + +const char * const amd_df_mcablock_names[] = { + [SMCA_CS] = "coherent_slave", + [SMCA_PIE] = "pie", +}; +EXPORT_SYMBOL_GPL(amd_df_mcablock_names); + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ @@ -84,6 +126,13 @@ struct thresh_restart { static inline bool is_shared_bank(int bank) { + /* + * Scalable MCA provides for only one core to have access to the MSRs of + * a shared bank. + */ + if (mce_flags.smca) + return false; + /* Bank 4 is for northbridge reporting and is thus shared */ return (bank == 4); } @@ -135,6 +184,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) } if (apic != msr) { + /* + * On SMCA CPUs, LVT offset is programmed at a different MSR, and + * the BIOS provides the value. The original field where LVT offset + * was set is reserved. Return early here: + */ + if (mce_flags.smca) + return 0; + pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); @@ -144,10 +201,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) return 1; }; -/* - * Called via smp_call_function_single(), must be called with correct - * cpu affinity. - */ +/* Reprogram MCx_MISC MSR behind this threshold bank. */ static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; @@ -247,27 +301,116 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } +static u32 get_block_address(u32 current_addr, u32 low, u32 high, + unsigned int bank, unsigned int block) +{ + u32 addr = 0, offset = 0; + + if (mce_flags.smca) { + if (!block) { + addr = MSR_AMD64_SMCA_MCx_MISC(bank); + } else { + /* + * For SMCA enabled processors, BLKPTR field of the + * first MISC register (MCx_MISC0) indicates presence of + * additional MISC register set (MISC1-4). + */ + u32 low, high; + + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return addr; + + if (!(low & MCI_CONFIG_MCAX)) + return addr; + + if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + (low & MASK_BLKPTR_LO)) + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + } + return addr; + } + + /* Fall back to method we used for older processors: */ + switch (block) { + case 0: + addr = MSR_IA32_MCx_MISC(bank); + break; + case 1: + offset = ((low & MASK_BLKPTR_LO) >> 21); + if (offset) + addr = MCG_XBLK_ADDR + offset; + break; + default: + addr = ++current_addr; + } + return addr; +} + +static int +prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + int offset, u32 misc_high) +{ + unsigned int cpu = smp_processor_id(); + struct threshold_block b; + int new; + + if (!block) + per_cpu(bank_map, cpu) |= (1 << bank); + + memset(&b, 0, sizeof(b)); + b.cpu = cpu; + b.bank = bank; + b.block = block; + b.address = addr; + b.interrupt_capable = lvt_interrupt_supported(bank, misc_high); + + if (!b.interrupt_capable) + goto done; + + b.interrupt_enable = 1; + + if (mce_flags.smca) { + u32 smca_low, smca_high; + u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); + + if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { + smca_high |= SMCA_MCAX_EN_OFF; + wrmsr(smca_addr, smca_low, smca_high); + } + + /* Gather LVT offset for thresholding: */ + if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) + goto out; + + new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + } else { + new = (misc_high & MASK_LVTOFF_HI) >> 20; + } + + offset = setup_APIC_mce_threshold(offset, new); + + if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) + mce_threshold_vector = amd_threshold_interrupt; + +done: + mce_threshold_block_init(&b, offset); + +out: + return offset; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - struct threshold_block b; - unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int offset = -1, new; + int offset = -1; for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) - address = MSR_IA32_MCx_MISC(bank); - else if (block == 1) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - break; - - address += MCG_XBLK_ADDR; - } else - ++address; + address = get_block_address(address, low, high, bank, block); + if (!address) + break; if (rdmsr_safe(address, &low, &high)) break; @@ -279,29 +422,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) (high & MASK_LOCKED_HI)) continue; - if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); - - memset(&b, 0, sizeof(b)); - b.cpu = cpu; - b.bank = bank; - b.block = block; - b.address = address; - b.interrupt_capable = lvt_interrupt_supported(bank, high); - - if (!b.interrupt_capable) - goto init; - - b.interrupt_enable = 1; - new = (high & MASK_LVTOFF_HI) >> 20; - offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && - (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; - -init: - mce_threshold_block_init(&b, offset); + offset = prepare_threshold_block(bank, block, address, offset, high); } } @@ -394,16 +515,9 @@ static void amd_threshold_interrupt(void) if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { - if (block == 0) { - address = MSR_IA32_MCx_MISC(bank); - } else if (block == 1) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - break; - address += MCG_XBLK_ADDR; - } else { - ++address; - } + address = get_block_address(address, low, high, bank, block); + if (!address) + break; if (rdmsr_safe(address, &low, &high)) break; @@ -623,16 +737,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - if (!block) { - address = (low & MASK_BLKPTR_LO) >> 21; - if (!address) - return 0; - address += MCG_XBLK_ADDR; - } else { - ++address; - } + address = get_block_address(address, low, high, bank, ++block); + if (!address) + return 0; - err = allocate_threshold_blocks(cpu, bank, ++block, address); + err = allocate_threshold_blocks(cpu, bank, block, address); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 75d3aab5f7b2..8581963894c7 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -431,10 +431,6 @@ int __init save_microcode_in_initrd_amd(void) else container = cont_va; - if (ucode_new_rev) - pr_info("microcode: updated early to new patch_level=0x%08x\n", - ucode_new_rev); - eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); @@ -469,8 +465,7 @@ void reload_ucode_amd(void) if (mc && rev < mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { ucode_new_rev = mc->hdr.patch_id; - pr_info("microcode: reload patch_level=0x%08x\n", - ucode_new_rev); + pr_info("reload patch_level=0x%08x\n", ucode_new_rev); } } } @@ -793,15 +788,13 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) return -EINVAL; } - patch->data = kzalloc(patch_size, GFP_KERNEL); + patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL); if (!patch->data) { pr_err("Patch data allocation failure.\n"); kfree(patch); return -EINVAL; } - /* All looks ok, copy patch... */ - memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); INIT_LIST_HEAD(&patch->plist); patch->patch_id = mc_hdr->patch_id; patch->equiv_cpu = proc_id; @@ -957,6 +950,10 @@ struct microcode_ops * __init init_amd_microcode(void) return NULL; } + if (ucode_new_rev) + pr_info_once("microcode updated early to new patch_level=0x%08x\n", + ucode_new_rev); + return µcode_amd_ops; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index faec7120c508..ac360bfbbdb6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -43,16 +43,8 @@ #define MICROCODE_VERSION "2.01" static struct microcode_ops *microcode_ops; - static bool dis_ucode_ldr; -static int __init disable_loader(char *str) -{ - dis_ucode_ldr = true; - return 1; -} -__setup("dis_ucode_ldr", disable_loader); - /* * Synchronization. * @@ -81,15 +73,16 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { + static const char *__dis_opt_str = "dis_ucode_ldr"; + #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); - const char *opt = "dis_ucode_ldr"; - const char *option = (const char *)__pa_nodebug(opt); + const char *option = (const char *)__pa_nodebug(__dis_opt_str); bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); #else /* CONFIG_X86_64 */ const char *cmdline = boot_command_line; - const char *option = "dis_ucode_ldr"; + const char *option = __dis_opt_str; bool *res = &dis_ucode_ldr; #endif @@ -479,7 +472,7 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) enum ucode_state ustate; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - if (uci && uci->valid) + if (uci->valid) return UCODE_OK; if (collect_cpu_info(cpu)) @@ -630,7 +623,7 @@ int __init microcode_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; int error; - if (paravirt_enabled() || dis_ucode_ldr) + if (dis_ucode_ldr) return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ee81c544ee0d..cbb3cf09b065 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -39,9 +39,15 @@ #include <asm/setup.h> #include <asm/msr.h> -static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +/* + * Temporary microcode blobs pointers storage. We note here the pointers to + * microcode blobs we've got from whatever storage (detached initrd, builtin). + * Later on, we put those into final storage mc_saved_data.mc_saved. + */ +static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT]; + static struct mc_saved_data { - unsigned int mc_saved_count; + unsigned int num_saved; struct microcode_intel **mc_saved; } mc_saved_data; @@ -78,53 +84,50 @@ load_microcode_early(struct microcode_intel **saved, } static inline void -copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd, - unsigned long off, int num_saved) +copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs, + unsigned long off, int num_saved) { int i; for (i = 0; i < num_saved; i++) - mc_saved[i] = (struct microcode_intel *)(initrd[i] + off); + mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off); } #ifdef CONFIG_X86_32 static void -microcode_phys(struct microcode_intel **mc_saved_tmp, - struct mc_saved_data *mc_saved_data) +microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs) { int i; struct microcode_intel ***mc_saved; - mc_saved = (struct microcode_intel ***) - __pa_nodebug(&mc_saved_data->mc_saved); - for (i = 0; i < mc_saved_data->mc_saved_count; i++) { + mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved); + + for (i = 0; i < mcs->num_saved; i++) { struct microcode_intel *p; - p = *(struct microcode_intel **) - __pa_nodebug(mc_saved_data->mc_saved + i); + p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i); mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); } } #endif static enum ucode_state -load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, - unsigned long initrd_start, struct ucode_cpu_info *uci) +load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, + unsigned long offset, struct ucode_cpu_info *uci) { struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int count = mc_saved_data->mc_saved_count; + unsigned int count = mcs->num_saved; - if (!mc_saved_data->mc_saved) { - copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count); + if (!mcs->mc_saved) { + copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count); return load_microcode_early(mc_saved_tmp, count, uci); } else { #ifdef CONFIG_X86_32 - microcode_phys(mc_saved_tmp, mc_saved_data); + microcode_phys(mc_saved_tmp, mcs); return load_microcode_early(mc_saved_tmp, count, uci); #else - return load_microcode_early(mc_saved_data->mc_saved, - count, uci); + return load_microcode_early(mcs->mc_saved, count, uci); #endif } } @@ -175,25 +178,25 @@ matching_model_microcode(struct microcode_header_intel *mc_header, } static int -save_microcode(struct mc_saved_data *mc_saved_data, +save_microcode(struct mc_saved_data *mcs, struct microcode_intel **mc_saved_src, - unsigned int mc_saved_count) + unsigned int num_saved) { int i, j; struct microcode_intel **saved_ptr; int ret; - if (!mc_saved_count) + if (!num_saved) return -EINVAL; /* * Copy new microcode data. */ - saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL); + saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL); if (!saved_ptr) return -ENOMEM; - for (i = 0; i < mc_saved_count; i++) { + for (i = 0; i < num_saved; i++) { struct microcode_header_intel *mc_hdr; struct microcode_intel *mc; unsigned long size; @@ -207,20 +210,18 @@ save_microcode(struct mc_saved_data *mc_saved_data, mc_hdr = &mc->hdr; size = get_totalsize(mc_hdr); - saved_ptr[i] = kmalloc(size, GFP_KERNEL); + saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL); if (!saved_ptr[i]) { ret = -ENOMEM; goto err; } - - memcpy(saved_ptr[i], mc, size); } /* * Point to newly saved microcode. */ - mc_saved_data->mc_saved = saved_ptr; - mc_saved_data->mc_saved_count = mc_saved_count; + mcs->mc_saved = saved_ptr; + mcs->num_saved = num_saved; return 0; @@ -284,22 +285,20 @@ static unsigned int _save_mc(struct microcode_intel **mc_saved, * BSP can stay in the platform. */ static enum ucode_state __init -get_matching_model_microcode(int cpu, unsigned long start, - void *data, size_t size, - struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, +get_matching_model_microcode(unsigned long start, void *data, size_t size, + struct mc_saved_data *mcs, unsigned long *mc_ptrs, struct ucode_cpu_info *uci) { - u8 *ucode_ptr = data; - unsigned int leftover = size; + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + struct microcode_header_intel *mc_header; + unsigned int num_saved = mcs->num_saved; enum ucode_state state = UCODE_OK; + unsigned int leftover = size; + u8 *ucode_ptr = data; unsigned int mc_size; - struct microcode_header_intel *mc_header; - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count = mc_saved_data->mc_saved_count; int i; - while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { + while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) { if (leftover < sizeof(mc_header)) break; @@ -318,32 +317,31 @@ get_matching_model_microcode(int cpu, unsigned long start, * the platform, we need to find and save microcode patches * with the same family and model as the BSP. */ - if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != - UCODE_OK) { + if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) { ucode_ptr += mc_size; continue; } - mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count); + num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved); ucode_ptr += mc_size; } if (leftover) { state = UCODE_ERROR; - goto out; + return state; } - if (mc_saved_count == 0) { + if (!num_saved) { state = UCODE_NFOUND; - goto out; + return state; } - for (i = 0; i < mc_saved_count; i++) - mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; + for (i = 0; i < num_saved; i++) + mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start; + + mcs->num_saved = num_saved; - mc_saved_data->mc_saved_count = mc_saved_count; -out: return state; } @@ -373,7 +371,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); csig.pf = 1 << ((val[1] >> 18) & 7); } - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ sync_core(); @@ -396,11 +394,11 @@ static void show_saved_mc(void) unsigned int sig, pf, rev, total_size, data_size, date; struct ucode_cpu_info uci; - if (mc_saved_data.mc_saved_count == 0) { + if (!mc_saved_data.num_saved) { pr_debug("no microcode data saved.\n"); return; } - pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); + pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved); collect_cpu_info_early(&uci); @@ -409,7 +407,7 @@ static void show_saved_mc(void) rev = uci.cpu_sig.rev; pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); - for (i = 0; i < mc_saved_data.mc_saved_count; i++) { + for (i = 0; i < mc_saved_data.num_saved; i++) { struct microcode_header_intel *mc_saved_header; struct extended_sigtable *ext_header; int ext_sigcount; @@ -465,7 +463,7 @@ int save_mc_for_early(u8 *mc) { struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; unsigned int mc_saved_count_init; - unsigned int mc_saved_count; + unsigned int num_saved; struct microcode_intel **mc_saved; int ret = 0; int i; @@ -476,23 +474,23 @@ int save_mc_for_early(u8 *mc) */ mutex_lock(&x86_cpu_microcode_mutex); - mc_saved_count_init = mc_saved_data.mc_saved_count; - mc_saved_count = mc_saved_data.mc_saved_count; + mc_saved_count_init = mc_saved_data.num_saved; + num_saved = mc_saved_data.num_saved; mc_saved = mc_saved_data.mc_saved; - if (mc_saved && mc_saved_count) + if (mc_saved && num_saved) memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct microcode_intel *)); + num_saved * sizeof(struct microcode_intel *)); /* * Save the microcode patch mc in mc_save_tmp structure if it's a newer * version. */ - mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count); + num_saved = _save_mc(mc_saved_tmp, mc, num_saved); /* * Save the mc_save_tmp in global mc_saved_data. */ - ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); + ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved); if (ret) { pr_err("Cannot save microcode patch.\n"); goto out; @@ -536,7 +534,7 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp) static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; static __init enum ucode_state -scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, +scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, unsigned long start, unsigned long size, struct ucode_cpu_info *uci) { @@ -551,14 +549,18 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, cd.data = NULL; cd.size = 0; - cd = find_cpio_data(p, (void *)start, size, &offset); - if (!cd.data) { + /* try built-in microcode if no initrd */ + if (!size) { if (!load_builtin_intel_microcode(&cd)) return UCODE_ERROR; + } else { + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) + return UCODE_ERROR; } - return get_matching_model_microcode(0, start, cd.data, cd.size, - mc_saved_data, initrd, uci); + return get_matching_model_microcode(start, cd.data, cd.size, + mcs, mc_ptrs, uci); } /* @@ -567,14 +569,11 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, static void print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) { - int cpu = smp_processor_id(); - - pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", - cpu, - uci->cpu_sig.rev, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); + pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", + uci->cpu_sig.rev, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); } #ifdef CONFIG_X86_32 @@ -603,19 +602,19 @@ void show_ucode_info_early(void) */ static void print_ucode(struct ucode_cpu_info *uci) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; int *delay_ucode_info_p; int *current_mc_date_p; - mc_intel = uci->mc; - if (mc_intel == NULL) + mc = uci->mc; + if (!mc) return; delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); *delay_ucode_info_p = 1; - *current_mc_date_p = mc_intel->hdr.date; + *current_mc_date_p = mc->hdr.date; } #else @@ -630,37 +629,35 @@ static inline void flush_tlb_early(void) static inline void print_ucode(struct ucode_cpu_info *uci) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; - mc_intel = uci->mc; - if (mc_intel == NULL) + mc = uci->mc; + if (!mc) return; - print_ucode_info(uci, mc_intel->hdr.date); + print_ucode_info(uci, mc->hdr.date); } #endif static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; unsigned int val[2]; - mc_intel = uci->mc; - if (mc_intel == NULL) + mc = uci->mc; + if (!mc) return 0; /* write microcode via MSR 0x79 */ - native_wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); + native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ sync_core(); /* get the current revision from MSR 0x8B */ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc_intel->hdr.rev) + if (val[1] != mc->hdr.rev) return -1; #ifdef CONFIG_X86_64 @@ -672,25 +669,26 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (early) print_ucode(uci); else - print_ucode_info(uci, mc_intel->hdr.date); + print_ucode_info(uci, mc->hdr.date); return 0; } /* * This function converts microcode patch offsets previously stored in - * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. + * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data. */ int __init save_microcode_in_initrd_intel(void) { - unsigned int count = mc_saved_data.mc_saved_count; + unsigned int count = mc_saved_data.num_saved; struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; int ret = 0; - if (count == 0) + if (!count) return ret; - copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); + copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count); + ret = save_microcode(&mc_saved_data, mc_saved, count); if (ret) pr_err("Cannot save microcode patches from initrd.\n"); @@ -701,8 +699,7 @@ int __init save_microcode_in_initrd_intel(void) } static void __init -_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, - unsigned long *initrd, +_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs, unsigned long start, unsigned long size) { struct ucode_cpu_info uci; @@ -710,11 +707,11 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, collect_cpu_info_early(&uci); - ret = scan_microcode(mc_saved_data, initrd, start, size, &uci); + ret = scan_microcode(mcs, mc_ptrs, start, size, &uci); if (ret != UCODE_OK) return; - ret = load_microcode(mc_saved_data, initrd, start, &uci); + ret = load_microcode(mcs, mc_ptrs, start, &uci); if (ret != UCODE_OK) return; @@ -728,53 +725,49 @@ void __init load_ucode_intel_bsp(void) struct boot_params *p; p = (struct boot_params *)__pa_nodebug(&boot_params); - start = p->hdr.ramdisk_image; size = p->hdr.ramdisk_size; - _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), - (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), - start, size); + /* + * Set start only if we have an initrd image. We cannot use initrd_start + * because it is not set that early yet. + */ + start = (size ? p->hdr.ramdisk_image : 0); + + _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_tmp_ptrs), + start, size); #else - start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; size = boot_params.hdr.ramdisk_size; + start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0); - _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); + _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size); #endif } void load_ucode_intel_ap(void) { - struct mc_saved_data *mc_saved_data_p; + unsigned long *mcs_tmp_p; + struct mc_saved_data *mcs_p; struct ucode_cpu_info uci; - unsigned long *mc_saved_in_initrd_p; - unsigned long initrd_start_addr; enum ucode_state ret; #ifdef CONFIG_X86_32 - unsigned long *initrd_start_p; - mc_saved_in_initrd_p = - (unsigned long *)__pa_nodebug(mc_saved_in_initrd); - mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); - initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); + mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs); + mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); #else - mc_saved_data_p = &mc_saved_data; - mc_saved_in_initrd_p = mc_saved_in_initrd; - initrd_start_addr = initrd_start; + mcs_tmp_p = mc_tmp_ptrs; + mcs_p = &mc_saved_data; #endif /* * If there is no valid ucode previously saved in memory, no need to * update ucode on this AP. */ - if (mc_saved_data_p->mc_saved_count == 0) + if (!mcs_p->num_saved) return; collect_cpu_info_early(&uci); - ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, - initrd_start_addr, &uci); - + ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci); if (ret != UCODE_OK) return; @@ -786,13 +779,13 @@ void reload_ucode_intel(void) struct ucode_cpu_info uci; enum ucode_state ret; - if (!mc_saved_data.mc_saved_count) + if (!mc_saved_data.num_saved) return; collect_cpu_info_early(&uci); ret = load_microcode_early(mc_saved_data.mc_saved, - mc_saved_data.mc_saved_count, &uci); + mc_saved_data.num_saved, &uci); if (ret != UCODE_OK) return; @@ -825,7 +818,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) * return 0 - no update found * return 1 - found update */ -static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) +static int get_matching_mc(struct microcode_intel *mc, int cpu) { struct cpu_signature cpu_sig; unsigned int csig, cpf, crev; @@ -836,39 +829,36 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) cpf = cpu_sig.pf; crev = cpu_sig.rev; - return has_newer_microcode(mc_intel, csig, cpf, crev); + return has_newer_microcode(mc, csig, cpf, crev); } static int apply_microcode_intel(int cpu) { - struct microcode_intel *mc_intel; + struct microcode_intel *mc; struct ucode_cpu_info *uci; + struct cpuinfo_x86 *c; unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - - uci = ucode_cpu_info + cpu; - mc_intel = uci->mc; /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); + if (WARN_ON(raw_smp_processor_id() != cpu)) + return -1; - if (mc_intel == NULL) + uci = ucode_cpu_info + cpu; + mc = uci->mc; + if (!mc) return 0; /* * Microcode on this CPU could be updated earlier. Only apply the - * microcode patch in mc_intel when it is newer than the one on this + * microcode patch in mc when it is newer than the one on this * CPU. */ - if (get_matching_mc(mc_intel, cpu) == 0) + if (!get_matching_mc(mc, cpu)) return 0; /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); + wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); + wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ sync_core(); @@ -876,16 +866,19 @@ static int apply_microcode_intel(int cpu) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc_intel->hdr.rev) { + if (val[1] != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); + cpu, mc->hdr.rev); return -1; } + pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", - cpu_num, val[1], - mc_intel->hdr.date & 0xffff, - mc_intel->hdr.date >> 24, - (mc_intel->hdr.date >> 16) & 0xff); + cpu, val[1], + mc->hdr.date & 0xffff, + mc->hdr.date >> 24, + (mc->hdr.date >> 16) & 0xff); + + c = &cpu_data(cpu); uci->cpu_sig.rev = val[1]; c->microcode = val[1]; diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index b96896bcbdaf..2ce1a7dc45b7 100644 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c @@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err) unsigned long total_size, data_size, ext_table_size; struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; - int sum, orig_sum, ext_sigcount = 0, i; + u32 sum, orig_sum, ext_sigcount = 0, i; struct extended_signature *ext_sig; total_size = get_totalsize(mc_header); @@ -57,69 +57,85 @@ int microcode_sanity_check(void *mc, int print_err) if (data_size + MC_HEADER_SIZE > total_size) { if (print_err) - pr_err("error! Bad data size in microcode data file\n"); + pr_err("Error: bad microcode data file size.\n"); return -EINVAL; } if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { if (print_err) - pr_err("error! Unknown microcode update format\n"); + pr_err("Error: invalid/unknown microcode update format.\n"); return -EINVAL; } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); if (ext_table_size) { + u32 ext_table_sum = 0; + u32 *ext_tablep; + if ((ext_table_size < EXT_HEADER_SIZE) || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { if (print_err) - pr_err("error! Small exttable size in microcode data file\n"); + pr_err("Error: truncated extended signature table.\n"); return -EINVAL; } + ext_header = mc + MC_HEADER_SIZE + data_size; if (ext_table_size != exttable_size(ext_header)) { if (print_err) - pr_err("error! Bad exttable size in microcode data file\n"); + pr_err("Error: extended signature table size mismatch.\n"); return -EFAULT; } + ext_sigcount = ext_header->count; - } - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; + /* + * Check extended table checksum: the sum of all dwords that + * comprise a valid table must be 0. + */ + ext_tablep = (u32 *)ext_header; - i = ext_table_size / DWSIZE; + i = ext_table_size / sizeof(u32); while (i--) ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { if (print_err) - pr_warn("aborting, bad extended signature table checksum\n"); + pr_warn("Bad extended signature table checksum, aborting.\n"); return -EINVAL; } } - /* calculate the checksum */ + /* + * Calculate the checksum of update data and header. The checksum of + * valid update data and header including the extended signature table + * must be 0. + */ orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; + i = (MC_HEADER_SIZE + data_size) / sizeof(u32); while (i--) - orig_sum += ((int *)mc)[i]; + orig_sum += ((u32 *)mc)[i]; + if (orig_sum) { if (print_err) - pr_err("aborting, bad checksum\n"); + pr_err("Bad microcode data checksum, aborting.\n"); return -EINVAL; } + if (!ext_table_size) return 0; - /* check extended signature checksum */ + + /* + * Check extended signature checksum: 0 => valid. + */ for (i = 0; i < ext_sigcount; i++) { ext_sig = (void *)ext_header + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + + sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); if (sum) { if (print_err) - pr_err("aborting, bad checksum\n"); + pr_err("Bad extended signature checksum, aborting.\n"); return -EINVAL; } } diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 3f20710a5b23..6988c74409a8 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h +# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h # IN=$1 @@ -49,8 +49,8 @@ dump_array() trap 'rm "$OUT"' EXIT ( - echo "#ifndef _ASM_X86_CPUFEATURE_H" - echo "#include <asm/cpufeature.h>" + echo "#ifndef _ASM_X86_CPUFEATURES_H" + echo "#include <asm/cpufeatures.h>" echo "#endif" echo "" diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ba80d68f683e..10f8d4796240 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -47,7 +47,7 @@ #include <linux/smp.h> #include <linux/syscore_ops.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/e820.h> #include <asm/mtrr.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index e3b4d1841175..34178564be2a 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,6 +1,6 @@ #include <linux/kernel.h> #include <linux/mm.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/msr.h> #include "cpu.h" diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 58f34319b29a..9ef978d69c22 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -57,10 +57,9 @@ struct crash_elf_data { struct kimage *image; /* * Total number of ram ranges we have after various adjustments for - * GART, crash reserved region etc. + * crash reserved region, etc. */ unsigned int max_nr_ranges; - unsigned long gart_start, gart_end; /* Pointer to elf header */ void *ehdr; @@ -201,17 +200,6 @@ static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) return 0; } -static int get_gart_ranges_callback(u64 start, u64 end, void *arg) -{ - struct crash_elf_data *ced = arg; - - ced->gart_start = start; - ced->gart_end = end; - - /* Not expecting more than 1 gart aperture */ - return 1; -} - /* Gather all the required information to prepare elf headers for ram regions */ static void fill_up_crash_elf_data(struct crash_elf_data *ced, @@ -226,22 +214,6 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->max_nr_ranges = nr_ranges; - /* - * We don't create ELF headers for GART aperture as an attempt - * to dump this memory in second kernel leads to hang/crash. - * If gart aperture is present, one needs to exclude that region - * and that could lead to need of extra phdr. - */ - walk_iomem_res("GART", IORESOURCE_MEM, 0, -1, - ced, get_gart_ranges_callback); - - /* - * If we have gart region, excluding that could potentially split - * a memory range, resulting in extra header. Account for that. - */ - if (ced->gart_end) - ced->max_nr_ranges++; - /* Exclusion of crash region could split memory ranges */ ced->max_nr_ranges++; @@ -350,13 +322,6 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced, return ret; } - /* Exclude GART region */ - if (ced->gart_end) { - ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end); - if (ret) - return ret; - } - return ret; } @@ -599,12 +564,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) /* Add ACPI tables */ cmd.type = E820_ACPI; flags = IORESOURCE_MEM | IORESOURCE_BUSY; - walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd, + walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, memmap_entry_callback); /* Add ACPI Non-volatile Storage */ cmd.type = E820_NVS; - walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd, + walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, memmap_entry_callback); /* Add crashk_low_res region */ diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 569c1e4f96fe..621b501f8935 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -24,6 +24,7 @@ #include <asm/e820.h> #include <asm/proto.h> #include <asm/setup.h> +#include <asm/cpufeature.h> /* * The e820 map is the map that gets modified e.g. with command line parameters @@ -925,6 +926,41 @@ static const char *e820_type_to_string(int e820_type) } } +static unsigned long e820_type_to_iomem_type(int e820_type) +{ + switch (e820_type) { + case E820_RESERVED_KERN: + case E820_RAM: + return IORESOURCE_SYSTEM_RAM; + case E820_ACPI: + case E820_NVS: + case E820_UNUSABLE: + case E820_PRAM: + case E820_PMEM: + default: + return IORESOURCE_MEM; + } +} + +static unsigned long e820_type_to_iores_desc(int e820_type) +{ + switch (e820_type) { + case E820_ACPI: + return IORES_DESC_ACPI_TABLES; + case E820_NVS: + return IORES_DESC_ACPI_NV_STORAGE; + case E820_PMEM: + return IORES_DESC_PERSISTENT_MEMORY; + case E820_PRAM: + return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_RESERVED_KERN: + case E820_RAM: + case E820_UNUSABLE: + default: + return IORES_DESC_NONE; + } +} + static bool do_mark_busy(u32 type, struct resource *res) { /* this is the legacy bios/dos rom-shadow + mmio region */ @@ -967,7 +1003,8 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM; + res->flags = e820_type_to_iomem_type(e820.map[i].type); + res->desc = e820_type_to_iores_desc(e820.map[i].type); /* * don't register the region that could be conflicted with diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d25097c3fc1d..0b1b9abd4d5f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -114,6 +114,10 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); if (fpu->fpregs_active) { + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); @@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { - if (!copy_fpregs_to_fpstate(fpu)) - fpregs_deactivate(fpu); + if (!copy_fpregs_to_fpstate(fpu)) { + if (use_eager_fpu()) + copy_kernel_to_fpregs(&fpu->state); + else + fpregs_deactivate(fpu); + } } preempt_enable(); } @@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -/* - * Copy the current task's FPU state to a new task's FPU context. - * - * In both the 'eager' and the 'lazy' case we save hardware registers - * directly to the destination buffer. - */ -static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { + dst_fpu->counter = 0; + dst_fpu->fpregs_active = 0; + dst_fpu->last_cpu = -1; + + if (!src_fpu->fpstate_active || !cpu_has_fpu) + return 0; + WARN_ON_FPU(src_fpu != ¤t->thread.fpu); /* @@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. - * - * If the FPU context got destroyed in the process (FNSAVE - * done on old CPUs) then copy it back into the source - * context and mark the current task for lazy restore. + * In lazy mode, if the FPU context isn't loaded into + * fpregs, CR0.TS will be set and do_device_not_available + * will load the FPU context. * * We have to do all this with preemption disabled, * mostly because of the FNSAVE case, because in that @@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); - fpregs_deactivate(src_fpu); + + if (use_eager_fpu()) + copy_kernel_to_fpregs(&src_fpu->state); + else + fpregs_deactivate(src_fpu); } preempt_enable(); -} - -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) -{ - dst_fpu->counter = 0; - dst_fpu->fpregs_active = 0; - dst_fpu->last_cpu = -1; - - if (src_fpu->fpstate_active && cpu_has_fpu) - fpu_copy(dst_fpu, src_fpu); return 0; } @@ -409,8 +411,10 @@ static inline void copy_init_fpstate_to_fpregs(void) { if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); - else + else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); + else + copy_kernel_to_fregs(&init_fpstate.fsave); } /* @@ -423,7 +427,7 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu()) { + if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { /* FPU state will be reallocated lazily at the first use. */ fpu__drop(fpu); } else { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6d9f0a7ef4c8..54c86fffbf9f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -78,13 +78,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + } #ifndef CONFIG_MATH_EMULATION if (!cpu_has_fpu) { @@ -132,7 +134,7 @@ static void __init fpu__init_system_generic(void) * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ - fpstate_init_fxstate(&init_fpstate.fxsave); + fpstate_init(&init_fpstate); fpu__init_system_mxcsr(); } @@ -260,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) * not only saved the restores along the way, but we also have the * FPU ready to be used for the original task. * - * 'eager' switching is used on modern CPUs, there we switch the FPU + * 'lazy' is deprecated because it's almost never a performance win + * and it's much more complicated than 'eager'. + * + * 'eager' switching is by default on all CPUs, there we switch the FPU * state during every context switch, regardless of whether the task * has used FPU instructions in that time slice or not. This is done * because modern FPU context saving instructions are able to optimize @@ -271,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) * to use 'eager' restores, if we detect that a task is using the FPU * frequently. See the fpu->counter logic in fpu/internal.h for that. ] */ -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static enum { ENABLE, DISABLE } eagerfpu = ENABLE; /* * Find supported xfeatures based on cpu features and command-line input. @@ -300,12 +305,6 @@ u64 __init fpu__get_supported_xfeatures_mask(void) static void __init fpu__clear_eager_fpu_features(void) { setup_clear_cpu_cap(X86_FEATURE_MPX); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); } /* @@ -348,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { - /* - * No need to check "eagerfpu=auto" again, since it is the - * initial default. - */ if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { eagerfpu = DISABLE; fpu__clear_eager_fpu_features(); - } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { - eagerfpu = ENABLE; } if (cmdline_find_option_bool(boot_command_line, "no387")) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d425cda5ae6d..6e8354f5a593 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512PF); setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); + setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); + setup_clear_cpu_cap(X86_FEATURE_AVX512BW); + setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 29408d6d6626..702547ce33c9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end) static unsigned long text_ip_addr(unsigned long ip) { /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. + * On x86_64, kernel text mappings are mapped read-only, so we use + * the kernel identity mapping instead of the kernel text mapping + * to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. @@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { } #endif /* Defined as markers to the end of the ftrace default trampolines */ -extern void ftrace_caller_end(void); extern void ftrace_regs_caller_end(void); -extern void ftrace_return(void); +extern void ftrace_epilogue(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); @@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_offset = (unsigned long)ftrace_regs_caller_op_ptr; } else { start_offset = (unsigned long)ftrace_caller; - end_offset = (unsigned long)ftrace_caller_end; + end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; } @@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* * Allocate enough size to store the ftrace_caller code, - * the jmp to ftrace_return, as well as the address of + * the jmp to ftrace_epilogue, as well as the address of * the ftrace_ops this trampoline is used for. */ trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); @@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = (unsigned long)trampoline + size; - /* The trampoline ends with a jmp to ftrace_return */ - jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return); + /* The trampoline ends with a jmp to ftrace_epilogue */ + jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue); memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2c0f3407bd1f..1f4422d5c8d0 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -40,13 +40,8 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); /* Wipe all early page tables except for the kernel symbol map */ static void __init reset_early_page_tables(void) { - unsigned long i; - - for (i = 0; i < PTRS_PER_PGD-1; i++) - early_level4_pgt[i].pgd = 0; - + memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); next_early_pgt = 0; - write_cr3(__pa_nodebug(early_level4_pgt)); } @@ -54,7 +49,6 @@ static void __init reset_early_page_tables(void) int __init early_make_pgtable(unsigned long address) { unsigned long physaddr = address - __PAGE_OFFSET; - unsigned long i; pgdval_t pgd, *pgd_p; pudval_t pud, *pud_p; pmdval_t pmd, *pmd_p; @@ -81,8 +75,7 @@ again: } pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; - for (i = 0; i < PTRS_PER_PUD; i++) - pud_p[i] = 0; + memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pud_p += pud_index(address); @@ -97,8 +90,7 @@ again: } pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; - for (i = 0; i < PTRS_PER_PMD; i++) - pmd_p[i] = 0; + memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pmd = (physaddr & PMD_MASK) + early_pmd_flags; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6bc9ae24b6d2..54cdbd2003fe 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,7 +19,7 @@ #include <asm/setup.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/percpu.h> #include <asm/nops.h> #include <asm/bootparam.h> @@ -389,6 +389,12 @@ default_entry: /* Make changes effective */ wrmsr + /* + * And make sure that all the mappings we set up have NX set from + * the beginning. + */ + orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) + enable_paging: /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ffdc0e860390..22fbf9df61bb 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,7 +38,6 @@ #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) -L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) @@ -76,9 +75,7 @@ startup_64: subq $_text - __START_KERNEL_map, %rbp /* Is the address not 2M aligned? */ - movq %rbp, %rax - andl $~PMD_PAGE_MASK, %eax - testl %eax, %eax + testl $~PMD_PAGE_MASK, %ebp jnz bad_address /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b8e6ff5cd5d0..be0ebbb6d1d1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -12,6 +12,7 @@ #include <linux/pm.h> #include <linux/io.h> +#include <asm/cpufeature.h> #include <asm/irqdomain.h> #include <asm/fixmap.h> #include <asm/hpet.h> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 631a7087e332..2da6ee9ae69b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; -#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; -#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, @@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) return err; err = probe_kernel_write((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); -#ifdef CONFIG_DEBUG_RODATA if (!err) return err; /* @@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) return -EINVAL; bpt->type = BP_POKE_BREAKPOINT; -#endif /* CONFIG_DEBUG_RODATA */ + return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { -#ifdef CONFIG_DEBUG_RODATA int err; char opc[BREAK_INSTR_SIZE]; @@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) goto knl_write; return err; + knl_write: -#endif /* CONFIG_DEBUG_RODATA */ return probe_kernel_write((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 1deffe6cc873..0f05deeff5ce 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - if (fixup_exception(regs)) + if (fixup_exception(regs, trapnr)) return 1; /* diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 87e1762e2bca..ed48a9f465f8 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -168,12 +168,14 @@ GLOBAL(ftrace_call) restore_mcount_regs /* - * The copied trampoline must call ftrace_return as it + * The copied trampoline must call ftrace_epilogue as it * still may need to call the function graph tracer. + * + * The code up to this label is copied into trampolines so + * think twice before adding any new code or changing the + * layout here. */ -GLOBAL(ftrace_caller_end) - -GLOBAL(ftrace_return) +GLOBAL(ftrace_epilogue) #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call) popfq /* - * As this jmp to ftrace_return can be a short jump + * As this jmp to ftrace_epilogue can be a short jump * it must not be copied into the trampoline. * The trampoline will add the code to jump * to the return. */ GLOBAL(ftrace_regs_caller_end) - jmp ftrace_return + jmp ftrace_epilogue END(ftrace_regs_caller) diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 64f9616f93f1..7f3550acde1b 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -40,7 +40,7 @@ #include <linux/uaccess.h> #include <linux/gfp.h> -#include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/msr.h> static struct class *msr_class; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8a2cdd736fa4..04b132a767f1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -30,6 +30,7 @@ #include <asm/nmi.h> #include <asm/x86_init.h> #include <asm/reboot.h> +#include <asm/cache.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -69,7 +70,7 @@ struct nmi_stats { static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); -static int ignore_nmis; +static int ignore_nmis __read_mostly; int unknown_nmi_panic; /* diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 14415aff1813..92f70147a9a6 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -13,11 +13,11 @@ static int found(u64 start, u64 end, void *data) static __init int register_e820_pmem(void) { - char *pmem = "Persistent Memory (legacy)"; struct platform_device *pdev; int rc; - rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found); + rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY, + IORESOURCE_MEM, 0, -1, NULL, found); if (rc <= 0) return 0; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9f7c21c22477..2915d54e9dd5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif +#ifdef CONFIG_X86_32 + .SYSENTER_stack_canary = STACK_END_MAGIC, +#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss); @@ -418,9 +421,9 @@ static void mwait_idle(void) if (!current_set_polling_and_test()) { trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { - smp_mb(); /* quirk */ + mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); - smp_mb(); /* quirk */ + mb(); /* quirk */ } __monitor((void *)¤t_thread_info()->flags, 0, 0); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3d80e6d42a2..aa52c1009475 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -152,21 +152,21 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index cb6282c3638f..548ddf7d6fd2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -61,7 +61,38 @@ regs->seg = GET_SEG(seg) | 3; \ } while (0) -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +#ifdef CONFIG_X86_64 +/* + * If regs->ss will cause an IRET fault, change it. Otherwise leave it + * alone. Using this generally makes no sense unless + * user_64bit_mode(regs) would return true. + */ +static void force_valid_ss(struct pt_regs *regs) +{ + u32 ar; + asm volatile ("lar %[old_ss], %[ar]\n\t" + "jz 1f\n\t" /* If invalid: */ + "xorl %[ar], %[ar]\n\t" /* set ar = 0 */ + "1:" + : [ar] "=r" (ar) + : [old_ss] "rm" ((u16)regs->ss)); + + /* + * For a valid 64-bit user context, we need DPL 3, type + * read-write data or read-write exp-down data, and S and P + * set. We can't use VERW because VERW doesn't check the + * P bit. + */ + ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK; + if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) && + ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN)) + regs->ss = __USER_DS; +} +#endif + +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc, + unsigned long uc_flags) { unsigned long buf_val; void __user *buf; @@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ -#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); -#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_X86_64 + /* + * Fix up SS if needed for the benefit of old DOSEMU and + * CRIU. + */ + if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && + user_64bit_mode(regs))) + force_valid_ss(regs); +#endif get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->cs, &sc->cs); put_user_ex(0, &sc->gs); put_user_ex(0, &sc->fs); + put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, return 0; } #else /* !CONFIG_X86_32 */ +static unsigned long frame_uc_flags(struct pt_regs *regs) +{ + unsigned long flags; + + if (cpu_has_xsave) + flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; + else + flags = UC_SIGCONTEXT_SS; + + if (likely(user_64bit_mode(regs))) + flags |= UC_STRICT_RESTORE_SS; + + return flags; +} + static int __setup_rt_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { @@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); save_altstack_ex(&frame->uc.uc_stack, regs->sp); @@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* Set up the CS register to run signal handlers in 64-bit mode, - even if the handler happens to be interrupting 32-bit code. */ + /* + * Set up the CS and SS registers to run signal handlers in + * 64-bit mode, even if the handler happens to be interrupting + * 32-bit or 16-bit code. + * + * SS is subtle. In 64-bit mode, we don't need any particular + * SS descriptor, but we do need SS to be valid. It's possible + * that the old SS is entirely bogus -- this can happen if the + * signal we're trying to deliver is #GP or #SS caused by a bad + * SS value. We also have a compatbility issue here: DOSEMU + * relies on the contents of the SS register indicating the + * SS value at the time of the signal, even though that code in + * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU + * avoids relying on sigreturn to restore SS; instead it uses + * a trampoline.) So we do our best: if the old SS was valid, + * we keep it. Otherwise we replace it. + */ regs->cs = __USER_CS; + if (unlikely(regs->ss != __USER_DS)) + force_valid_ss(regs); + return 0; } #endif /* CONFIG_X86_32 */ @@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); @@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->sc)) + /* + * x86_32 has no uc_flags bits relevant to restore_sigcontext. + * Save a few cycles by skipping the __get_user. + */ + if (restore_sigcontext(regs, &frame->sc, 0)) goto badframe; return regs->ax; @@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) @@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { -#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) +#ifdef CONFIG_X86_64 + if (is_ia32_task()) + return __NR_ia32_restart_syscall; +#endif +#ifdef CONFIG_X86_X32_ABI + return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); +#else return __NR_restart_syscall; -#else /* !CONFIG_X86_32 && CONFIG_X86_64 */ - return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : - __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); -#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ +#endif } /* @@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); @@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 24d57f77b3c1..643dbdccf4bc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Logical package management. We might want to allocate that dynamically */ +static int *physical_to_logical_pkg __read_mostly; +static unsigned long *physical_package_map __read_mostly;; +static unsigned long *logical_package_map __read_mostly; +static unsigned int max_physical_pkg_id __read_mostly; +unsigned int __max_logical_packages __read_mostly; +EXPORT_SYMBOL(__max_logical_packages); + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -248,7 +256,98 @@ static void notrace start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} + +int topology_update_package_map(unsigned int apicid, unsigned int cpu) +{ + unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + /* Called from early boot ? */ + if (!physical_package_map) + return 0; + + if (pkg >= max_physical_pkg_id) + return -EINVAL; + + /* Set the logical package id */ + if (test_and_set_bit(pkg, physical_package_map)) + goto found; + + if (pkg < __max_logical_packages) { + set_bit(pkg, logical_package_map); + physical_to_logical_pkg[pkg] = pkg; + goto found; + } + new = find_first_zero_bit(logical_package_map, __max_logical_packages); + if (new >= __max_logical_packages) { + physical_to_logical_pkg[pkg] = -1; + pr_warn("APIC(%x) Package %u exceeds logical package map\n", + apicid, pkg); + return -ENOSPC; + } + set_bit(new, logical_package_map); + pr_info("APIC(%x) Converting physical %u to logical package %u\n", + apicid, pkg, new); + physical_to_logical_pkg[pkg] = new; + +found: + cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + return 0; +} + +/** + * topology_phys_to_logical_pkg - Map a physical package id to a logical + * + * Returns logical package id or -1 if not found + */ +int topology_phys_to_logical_pkg(unsigned int phys_pkg) +{ + if (phys_pkg >= max_physical_pkg_id) + return -1; + return physical_to_logical_pkg[phys_pkg]; +} +EXPORT_SYMBOL(topology_phys_to_logical_pkg); + +static void __init smp_init_package_map(void) +{ + unsigned int ncpus, cpu; + size_t size; + + /* + * Today neither Intel nor AMD support heterogenous systems. That + * might change in the future.... + */ + ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings; + __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + + /* + * Possibly larger than what we need as the number of apic ids per + * package can be smaller than the actual used apic ids. + */ + max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); + size = max_physical_pkg_id * sizeof(unsigned int); + physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); + memset(physical_to_logical_pkg, 0xff, size); + size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); + physical_package_map = kzalloc(size, GFP_KERNEL); + size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); + logical_package_map = kzalloc(size, GFP_KERNEL); + + pr_info("Max logical packages: %u\n", __max_logical_packages); + + for_each_present_cpu(cpu) { + unsigned int apicid = apic->cpu_present_to_apicid(cpu); + + if (apicid == BAD_APICID || !apic->apic_id_valid(apicid)) + continue; + if (!topology_update_package_map(apicid, cpu)) + continue; + pr_warn("CPU %u APICId %x disabled\n", cpu, apicid); + per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID; + set_cpu_possible(cpu, false); + set_cpu_present(cpu, false); + } } void __init smp_store_boot_cpu_info(void) @@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; + smp_init_package_map(); } /* diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 3f92ce07e525..27538f183c3b 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -142,7 +142,6 @@ static int test_NX(void) * by the error message */ -#ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ if (rodata_test_data != 0xC3) { printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); @@ -151,7 +150,6 @@ static int test_NX(void) printk(KERN_ERR "test_nx: .rodata section is executable\n"); ret = -ENODEV; } -#endif #if 0 /* Test 4: Check if the .data section of a module is executable */ diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index 5ecbfe5099da..cb4a01b41e27 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -76,5 +76,5 @@ int rodata_test(void) } MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); +MODULE_DESCRIPTION("Testcase for marking rodata as read-only"); MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ade185a46b1d..06cbe25861f1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss; DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); -static inline void conditional_sti(struct pt_regs *regs) +static inline void cond_local_irq_enable(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) -{ - preempt_count_inc(); - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); -} - -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void cond_local_irq_disable(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - preempt_count_dec(); } void ist_enter(struct pt_regs *regs) @@ -199,7 +185,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, } if (!user_mode(regs)) { - if (!fixup_exception(regs)) { + if (!fixup_exception(regs, trapnr)) { tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; -#ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", @@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, print_vma_addr(" in ", regs->ip); pr_cont("\n"); } -#endif force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } @@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { - conditional_sti(regs); + cond_local_irq_enable(regs); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } @@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) return; - conditional_sti(regs); + cond_local_irq_enable(regs); if (!user_mode(regs)) die("bounds", regs, error_code); @@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code) struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - conditional_sti(regs); + cond_local_irq_enable(regs); if (v8086_mode(regs)) { local_irq_enable(); @@ -453,7 +437,7 @@ do_general_protection(struct pt_regs *regs, long error_code) tsk = current; if (!user_mode(regs)) { - if (fixup_exception(regs)) + if (fixup_exception(regs, X86_TRAP_GP)) return; tsk->thread.error_code = error_code; @@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_conditional_sti(regs); + preempt_disable(); + cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); exit: ist_exit(regs); @@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) NOKPROBE_SYMBOL(fixup_bad_iret); #endif +static bool is_sysenter_singlestep(struct pt_regs *regs) +{ + /* + * We don't try for precision here. If we're anywhere in the region of + * code that can be single-stepped in the SYSENTER entry path, then + * assume that this is a useless single-step trap due to SYSENTER + * being invoked with TF set. (We don't know in advance exactly + * which instructions will be hit because BTF could plausibly + * be set.) + */ +#ifdef CONFIG_X86_32 + return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) < + (unsigned long)__end_SYSENTER_singlestep_region - + (unsigned long)__begin_SYSENTER_singlestep_region; +#elif defined(CONFIG_IA32_EMULATION) + return (regs->ip - (unsigned long)entry_SYSENTER_compat) < + (unsigned long)__end_entry_SYSENTER_compat - + (unsigned long)entry_SYSENTER_compat; +#else + return false; +#endif +} + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore @@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) ist_enter(regs); get_debugreg(dr6, 6); + /* + * The Intel SDM says: + * + * Certain debug exceptions may clear bits 0-3. The remaining + * contents of the DR6 register are never cleared by the + * processor. To avoid confusion in identifying debug + * exceptions, debug handlers should clear the register before + * returning to the interrupted task. + * + * Keep it simple: clear DR6 immediately. + */ + set_debugreg(0, 6); /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); + + if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) && + is_sysenter_singlestep(regs))) { + dr6 &= ~DR_STEP; + if (!dr6) + goto exit; + /* + * else we might have gotten a single-step trap and hit a + * watchpoint at the same time, in which case we should fall + * through and handle the watchpoint. + */ + } + + /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. @@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (!dr6 && user_mode(regs)) user_icebp = 1; - /* Catch kmemcheck conditions first of all! */ + /* Catch kmemcheck conditions! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) goto exit; - /* DR6 may or may not be cleared by the CPU */ - set_debugreg(0, 6); - - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; @@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + preempt_disable(); + cond_local_irq_enable(regs); if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); goto exit; } - /* - * Single-stepping through system calls: ignore any exceptions in - * kernel space, but re-enable TF when returning to user mode. - * - * We already checked v86 mode above, so we can check for kernel mode - * by just checking the CPL of CS. - */ - if ((dr6 & DR_STEP) && !user_mode(regs)) { + if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { + /* + * Historical junk that used to handle SYSENTER single-stepping. + * This should be unreachable now. If we survive for a while + * without anyone hitting this warning, we'll turn this into + * an oops. + */ tsk->thread.debugreg6 &= ~DR_STEP; set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; @@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); exit: +#if defined(CONFIG_X86_32) + /* + * This is the most likely code path that involves non-trivial use + * of the SYSENTER stack. Check that we haven't overrun it. + */ + WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, + "Overran or corrupted SYSENTER stack\n"); +#endif ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -696,10 +738,10 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) return; - conditional_sti(regs); + cond_local_irq_enable(regs); if (!user_mode(regs)) { - if (!fixup_exception(regs)) { + if (!fixup_exception(regs, trapnr)) { task->thread.error_code = error_code; task->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -743,20 +785,19 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { - conditional_sti(regs); + cond_local_irq_enable(regs); } dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION - if (read_cr0() & X86_CR0_EM) { + if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { struct math_emu_info info = { }; - conditional_sti(regs); + cond_local_irq_enable(regs); info.regs = regs; math_emulate(&info); @@ -765,7 +806,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif fpu__restore(¤t->thread.fpu); /* interrupts still off */ #ifdef CONFIG_X86_32 - conditional_sti(regs); + cond_local_irq_enable(regs); #endif } NOKPROBE_SYMBOL(do_device_not_available); @@ -868,7 +909,7 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); + set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index acec49b302d1..5e19d2587cc5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -43,6 +43,11 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc); int tsc_clocksource_reliable; +static u32 art_to_tsc_numerator; +static u32 art_to_tsc_denominator; +static u64 art_to_tsc_offset; +struct clocksource *art_related_clocksource; + /* * Use a ring-buffer like data structure, where a writer advances the head by * writing a new data entry and a reader advances the tail when it observes a @@ -964,6 +969,37 @@ core_initcall(cpufreq_tsc); #endif /* CONFIG_CPU_FREQ */ +#define ART_CPUID_LEAF (0x15) +#define ART_MIN_DENOMINATOR (1) + + +/* + * If ART is present detect the numerator:denominator to convert to TSC + */ +static void detect_art(void) +{ + unsigned int unused[2]; + + if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) + return; + + cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, + &art_to_tsc_numerator, unused, unused+1); + + /* Don't enable ART in a VM, non-stop TSC required */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || + !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || + art_to_tsc_denominator < ART_MIN_DENOMINATOR) + return; + + if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset)) + return; + + /* Make this sticky over multiple CPU init calls */ + setup_force_cpu_cap(X86_FEATURE_ART); +} + + /* clocksource code */ static struct clocksource clocksource_tsc; @@ -1071,6 +1107,25 @@ int unsynchronized_tsc(void) return 0; } +/* + * Convert ART to TSC given numerator/denominator found in detect_art() + */ +struct system_counterval_t convert_art_to_tsc(cycle_t art) +{ + u64 tmp, res, rem; + + rem = do_div(art, art_to_tsc_denominator); + + res = art * art_to_tsc_numerator; + tmp = rem * art_to_tsc_numerator; + + do_div(tmp, art_to_tsc_denominator); + res += tmp + art_to_tsc_offset; + + return (struct system_counterval_t) {.cs = art_related_clocksource, + .cycles = res}; +} +EXPORT_SYMBOL(convert_art_to_tsc); static void tsc_refine_calibration_work(struct work_struct *work); static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); @@ -1142,6 +1197,8 @@ static void tsc_refine_calibration_work(struct work_struct *work) (unsigned long)tsc_khz % 1000); out: + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); } @@ -1235,6 +1292,8 @@ void __init tsc_init(void) mark_tsc_unstable("TSCs unsynchronized"); check_system_tsc_reliable(); + + detect_art(); } #ifdef CONFIG_SMP @@ -1246,14 +1305,14 @@ void __init tsc_init(void) */ unsigned long calibrate_delay_is_known(void) { - int i, cpu = smp_processor_id(); + int sibling, cpu = smp_processor_id(); if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) return 0; - for_each_online_cpu(i) - if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) - return cpu_data(i).loops_per_jiffy; + sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu); + if (sibling < nr_cpu_ids) + return cpu_data(sibling).loops_per_jiffy; return 0; } #endif diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 07efb35ee4bc..014ea59aa153 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -30,7 +30,7 @@ * appropriately. Either display a message or halt. */ -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/msr-index.h> verify_cpu: diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e574b8546518..3dce1ca0a653 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) /* make room for real-mode segments */ tsk->thread.sp0 += 16; - if (static_cpu_has_safe(X86_FEATURE_SEP)) + if (static_cpu_has(X86_FEATURE_SEP)) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf11f562..5af9958cbdb6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -41,29 +41,28 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* - * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA - * we retain large page mappings for boundaries spanning kernel text, rodata - * and data sections. + * On 64-bit, align RODATA to 2MB so we retain large page mappings for + * boundaries spanning kernel text, rodata and data sections. * * However, kernel identity mappings will have different RWX permissions * to the pages mapping to text and to the pages padding (which are freed) the * text section. Hence kernel identity mappings will be broken to smaller * pages. For 64-bit, kernel text and kernel identity mappings are different, - * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, - * as well as retain 2MB large page mappings for kernel text. + * so we can enable protection checks as well as retain 2MB large page + * mappings for kernel text. */ -#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); +#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); -#define X64_ALIGN_DEBUG_RODATA_END \ +#define X64_ALIGN_RODATA_END \ . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; #else -#define X64_ALIGN_DEBUG_RODATA_BEGIN -#define X64_ALIGN_DEBUG_RODATA_END +#define X64_ALIGN_RODATA_BEGIN +#define X64_ALIGN_RODATA_END #endif @@ -112,13 +111,11 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 -#if defined(CONFIG_DEBUG_RODATA) /* .text should occupy whole number of pages */ . = ALIGN(PAGE_SIZE); -#endif - X64_ALIGN_DEBUG_RODATA_BEGIN + X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) - X64_ALIGN_DEBUG_RODATA_END + X64_ALIGN_RODATA_END /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { @@ -195,6 +192,17 @@ SECTIONS :init #endif + /* + * Section for code used exclusively before alternatives are run. All + * references to such code must be patched out by alternatives, normally + * by using X86_FEATURE_ALWAYS CPU feature bit. + * + * See static_cpu_has() for an example. + */ + .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { + *(.altinstr_aux) + } + INIT_DATA_SECTION(16) .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a0695be19864..cd05942bc918 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); +EXPORT_SYMBOL_GPL(memcpy_mcsafe); + EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1505587d06e9..b9b09fec173b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -650,10 +650,10 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, u16 sel; la = seg_base(ctxt, addr.seg) + addr.ea; - *linear = la; *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: + *linear = la; if (is_noncanonical_address(la)) goto bad; @@ -662,6 +662,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; break; default: + *linear = la = (u32)la; usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, addr.seg); if (!usable) @@ -689,7 +690,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, if (size > *max_size) goto bad; } - la &= (u32)-1; break; } if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36591faed13b..3a045f39ed81 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1195,7 +1195,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) static void apic_timer_expired(struct kvm_lapic *apic) { struct kvm_vcpu *vcpu = apic->vcpu; - wait_queue_head_t *q = &vcpu->wq; + struct swait_queue_head *q = &vcpu->wq; struct kvm_timer *ktimer = &apic->lapic_timer; if (atomic_read(&apic->lapic_timer.pending)) @@ -1204,8 +1204,8 @@ static void apic_timer_expired(struct kvm_lapic *apic) atomic_inc(&apic->lapic_timer.pending); kvm_set_pending_timer(vcpu); - if (waitqueue_active(q)) - wake_up_interruptible(q); + if (swait_active(q)) + swake_up(q); if (apic_lvtt_tscdeadline(apic)) ktimer->expired_tscdeadline = ktimer->tscdeadline; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e1bb320dd5b2..ddb3291d49c9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3721,13 +3721,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + bool uses_nx = context->nx || context->base_role.smep_andnot_wp; + /* * Passing "true" to the last argument is okay; it adds a check * on bit 8 of the SPTEs which KVM doesn't use anyway. */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, - context->shadow_root_level, context->nx, + context->shadow_root_level, uses_nx, guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), true); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c9fed957cce..2ce4f05e81d3 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -249,7 +249,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, return ret; kvm_vcpu_mark_page_dirty(vcpu, table_gfn); - walker->ptes[level] = pte; + walker->ptes[level - 1] = pte; } return 0; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 34208bf57c35..1735ae9d684a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -596,6 +596,8 @@ struct vcpu_vmx { /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; + + u64 current_tsc_ratio; }; enum segment_cache_field { @@ -1811,6 +1813,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, return; } break; + case MSR_IA32_PEBS_ENABLE: + /* PEBS needs a quiescent period after being disabled (to write + * a record). Disabling PEBS through VMX MSR swapping doesn't + * provide that period, so a CPU could write host's record into + * guest's memory. + */ + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } for (i = 0; i < m->nr; ++i) @@ -1848,26 +1857,31 @@ static void reload_tss(void) static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { - u64 guest_efer; - u64 ignore_bits; + u64 guest_efer = vmx->vcpu.arch.efer; + u64 ignore_bits = 0; - guest_efer = vmx->vcpu.arch.efer; + if (!enable_ept) { + /* + * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing + * host CPUID is more efficient than testing guest CPUID + * or CR4. Host SMEP is anyway a requirement for guest SMEP. + */ + if (boot_cpu_has(X86_FEATURE_SMEP)) + guest_efer |= EFER_NX; + else if (!(guest_efer & EFER_NX)) + ignore_bits |= EFER_NX; + } /* - * NX is emulated; LMA and LME handled by hardware; SCE meaningless - * outside long mode + * LMA and LME handled by hardware; SCE meaningless outside long mode. */ - ignore_bits = EFER_NX | EFER_SCE; + ignore_bits |= EFER_SCE; #ifdef CONFIG_X86_64 ignore_bits |= EFER_LMA | EFER_LME; /* SCE is meaningful only in long mode on Intel */ if (guest_efer & EFER_LMA) ignore_bits &= ~(u64)EFER_SCE; #endif - guest_efer &= ~ignore_bits; - guest_efer |= host_efer & ignore_bits; - vmx->guest_msrs[efer_offset].data = guest_efer; - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; clear_atomic_switch_msr(vmx, MSR_EFER); @@ -1878,16 +1892,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) */ if (cpu_has_load_ia32_efer || (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { - guest_efer = vmx->vcpu.arch.efer; if (!(guest_efer & EFER_LMA)) guest_efer &= ~EFER_LME; if (guest_efer != host_efer) add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); return false; - } + } else { + guest_efer &= ~ignore_bits; + guest_efer |= host_efer & ignore_bits; - return true; + vmx->guest_msrs[efer_offset].data = guest_efer; + vmx->guest_msrs[efer_offset].mask = ~ignore_bits; + + return true; + } } static unsigned long segment_base(u16 selector) @@ -2127,14 +2146,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - /* Setup TSC multiplier */ - if (cpu_has_vmx_tsc_scaling()) - vmcs_write64(TSC_MULTIPLIER, - vcpu->arch.tsc_scaling_ratio); - vmx->loaded_vmcs->cpu = cpu; } + /* Setup TSC multiplier */ + if (kvm_has_tsc_control && + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { + vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); + } + vmx_vcpu_pi_load(vcpu, cpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 429c3f5fc618..4838d35c9641 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6618,12 +6618,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * KVM_DEBUGREG_WONT_EXIT again. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { - int i; - WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops->sync_dirty_debug_regs(vcpu); - for (i = 0; i < KVM_NR_DB_REGS; i++) - vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + kvm_update_dr0123(vcpu); + kvm_update_dr6(vcpu); + kvm_update_dr7(vcpu); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a9033ae13369..fd57d3ae7e16 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1520,12 +1520,6 @@ __init void lguest_init(void) */ reserve_top_address(lguest_data.reserve_mem); - /* - * If we don't initialize the lock dependency checker now, it crashes - * atomic_notifier_chain_register, then paravirt_disable_iospace. - */ - lockdep_init(); - /* Hook in our special panic hypercall code. */ atomic_notifier_chain_register(&panic_notifier_list, &paniced); diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index a2fe51b00cce..65be7cfaf947 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,5 @@ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 422db000d727..5cc78bf57232 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -21,12 +21,16 @@ static inline int myisspace(u8 c) * @option: option string to look for * * Returns the position of that @option (starts counting with 1) - * or 0 on not found. + * or 0 on not found. @option will only be found if it is found + * as an entire word in @cmdline. For instance, if @option="car" + * then a cmdline which contains "cart" will not match. */ -int cmdline_find_option_bool(const char *cmdline, const char *option) +static int +__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, + const char *option) { char c; - int len, pos = 0, wstart = 0; + int pos = 0, wstart = 0; const char *opptr = NULL; enum { st_wordstart = 0, /* Start of word/after whitespace */ @@ -37,11 +41,11 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) if (!cmdline) return -1; /* No command line */ - len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE); - if (!len) - return 0; - - while (len--) { + /* + * This 'pos' check ensures we do not overrun + * a non-NULL-terminated 'cmdline' + */ + while (pos < max_cmdline_size) { c = *(char *)cmdline++; pos++; @@ -58,18 +62,35 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) /* fall through */ case st_wordcmp: - if (!*opptr) + if (!*opptr) { + /* + * We matched all the way to the end of the + * option we were looking for. If the + * command-line has a space _or_ ends, then + * we matched! + */ if (!c || myisspace(c)) return wstart; - else - state = st_wordskip; - else if (!c) + /* + * We hit the end of the option, but _not_ + * the end of a word on the cmdline. Not + * a match. + */ + } else if (!c) { + /* + * Hit the NULL terminator on the end of + * cmdline. + */ return 0; - else if (c != *opptr++) - state = st_wordskip; - else if (!len) /* last word and is matching */ - return wstart; - break; + } else if (c == *opptr++) { + /* + * We are currently matching, so continue + * to the next character on the cmdline. + */ + break; + } + state = st_wordskip; + /* fall through */ case st_wordskip: if (!c) @@ -82,3 +103,8 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) return 0; /* Buffer overrun */ } + +int cmdline_find_option_bool(const char *cmdline, const char *option) +{ + return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); +} diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 009f98216b7e..24ef1c2104d4 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -1,7 +1,7 @@ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 982ce34f4a9b..2b0ef26da0bd 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -10,7 +10,7 @@ #include <asm/current.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/smap.h> @@ -232,17 +232,31 @@ ENDPROC(copy_user_enhanced_fast_string) /* * copy_user_nocache - Uncached memory copy with exception handling - * This will force destination/source out of cache for more performance. + * This will force destination out of cache for more performance. + * + * Note: Cached memory copy is used when destination or size is not + * naturally aligned. That is: + * - Require 8-byte alignment when size is 8 bytes or larger. + * - Require 4-byte alignment when size is 4 bytes. */ ENTRY(__copy_user_nocache) ASM_STAC + + /* If size is less than 8 bytes, go to 4-byte copy */ cmpl $8,%edx - jb 20f /* less then 8 bytes, go to byte copy loop */ + jb .L_4b_nocache_copy_entry + + /* If destination is not 8-byte aligned, "cache" copy to align it */ ALIGN_DESTINATION + + /* Set 4x8-byte copy count and remainder */ movl %edx,%ecx andl $63,%edx shrl $6,%ecx - jz 17f + jz .L_8b_nocache_copy_entry /* jump if count is 0 */ + + /* Perform 4x8-byte nocache loop-copy */ +.L_4x8b_nocache_copy_loop: 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 3: movq 2*8(%rsi),%r10 @@ -262,60 +276,106 @@ ENTRY(__copy_user_nocache) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi decl %ecx - jnz 1b -17: movl %edx,%ecx + jnz .L_4x8b_nocache_copy_loop + + /* Set 8-byte copy count and remainder */ +.L_8b_nocache_copy_entry: + movl %edx,%ecx andl $7,%edx shrl $3,%ecx - jz 20f -18: movq (%rsi),%r8 -19: movnti %r8,(%rdi) + jz .L_4b_nocache_copy_entry /* jump if count is 0 */ + + /* Perform 8-byte nocache loop-copy */ +.L_8b_nocache_copy_loop: +20: movq (%rsi),%r8 +21: movnti %r8,(%rdi) leaq 8(%rsi),%rsi leaq 8(%rdi),%rdi decl %ecx - jnz 18b -20: andl %edx,%edx - jz 23f + jnz .L_8b_nocache_copy_loop + + /* If no byte left, we're done */ +.L_4b_nocache_copy_entry: + andl %edx,%edx + jz .L_finish_copy + + /* If destination is not 4-byte aligned, go to byte copy: */ + movl %edi,%ecx + andl $3,%ecx + jnz .L_1b_cache_copy_entry + + /* Set 4-byte copy count (1 or 0) and remainder */ movl %edx,%ecx -21: movb (%rsi),%al -22: movb %al,(%rdi) + andl $3,%edx + shrl $2,%ecx + jz .L_1b_cache_copy_entry /* jump if count is 0 */ + + /* Perform 4-byte nocache copy: */ +30: movl (%rsi),%r8d +31: movnti %r8d,(%rdi) + leaq 4(%rsi),%rsi + leaq 4(%rdi),%rdi + + /* If no bytes left, we're done: */ + andl %edx,%edx + jz .L_finish_copy + + /* Perform byte "cache" loop-copy for the remainder */ +.L_1b_cache_copy_entry: + movl %edx,%ecx +.L_1b_cache_copy_loop: +40: movb (%rsi),%al +41: movb %al,(%rdi) incq %rsi incq %rdi decl %ecx - jnz 21b -23: xorl %eax,%eax + jnz .L_1b_cache_copy_loop + + /* Finished copying; fence the prior stores */ +.L_finish_copy: + xorl %eax,%eax ASM_CLAC sfence ret .section .fixup,"ax" -30: shll $6,%ecx +.L_fixup_4x8b_copy: + shll $6,%ecx addl %ecx,%edx - jmp 60f -40: lea (%rdx,%rcx,8),%rdx - jmp 60f -50: movl %ecx,%edx -60: sfence + jmp .L_fixup_handle_tail +.L_fixup_8b_copy: + lea (%rdx,%rcx,8),%rdx + jmp .L_fixup_handle_tail +.L_fixup_4b_copy: + lea (%rdx,%rcx,4),%rdx + jmp .L_fixup_handle_tail +.L_fixup_1b_copy: + movl %ecx,%edx +.L_fixup_handle_tail: + sfence jmp copy_user_handle_tail .previous - _ASM_EXTABLE(1b,30b) - _ASM_EXTABLE(2b,30b) - _ASM_EXTABLE(3b,30b) - _ASM_EXTABLE(4b,30b) - _ASM_EXTABLE(5b,30b) - _ASM_EXTABLE(6b,30b) - _ASM_EXTABLE(7b,30b) - _ASM_EXTABLE(8b,30b) - _ASM_EXTABLE(9b,30b) - _ASM_EXTABLE(10b,30b) - _ASM_EXTABLE(11b,30b) - _ASM_EXTABLE(12b,30b) - _ASM_EXTABLE(13b,30b) - _ASM_EXTABLE(14b,30b) - _ASM_EXTABLE(15b,30b) - _ASM_EXTABLE(16b,30b) - _ASM_EXTABLE(18b,40b) - _ASM_EXTABLE(19b,40b) - _ASM_EXTABLE(21b,50b) - _ASM_EXTABLE(22b,50b) + _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) + _ASM_EXTABLE(20b,.L_fixup_8b_copy) + _ASM_EXTABLE(21b,.L_fixup_8b_copy) + _ASM_EXTABLE(30b,.L_fixup_4b_copy) + _ASM_EXTABLE(31b,.L_fixup_4b_copy) + _ASM_EXTABLE(40b,.L_fixup_1b_copy) + _ASM_EXTABLE(41b,.L_fixup_1b_copy) ENDPROC(__copy_user_nocache) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index e912b2f6d36e..2f07c291dcc8 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -102,7 +102,7 @@ static void delay_mwaitx(unsigned long __loops) * Use cpu_tss as a cacheline-aligned, seldomly * accessed per-cpu variable as the monitor target. */ - __monitorx(this_cpu_ptr(&cpu_tss), 0, 0); + __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); /* * AMD, like Intel, supports the EAX hint and EAX=0xf diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 16698bba87de..2ec0b0abbfaa 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,7 +1,8 @@ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/errno.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> /* @@ -177,3 +178,120 @@ ENTRY(memcpy_orig) .Lend: retq ENDPROC(memcpy_orig) + +#ifndef CONFIG_UML +/* + * memcpy_mcsafe - memory copy with machine check exception handling + * Note that we only catch machine checks when reading the source addresses. + * Writes to target are posted and don't generate machine checks. + */ +ENTRY(memcpy_mcsafe) + cmpl $8, %edx + /* Less than 8 bytes? Go to byte copy loop */ + jb .L_no_whole_words + + /* Check for bad alignment of source */ + testl $7, %esi + /* Already aligned */ + jz .L_8byte_aligned + + /* Copy one byte at a time until source is 8-byte aligned */ + movl %esi, %ecx + andl $7, %ecx + subl $8, %ecx + negl %ecx + subl %ecx, %edx +.L_copy_leading_bytes: + movb (%rsi), %al + movb %al, (%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz .L_copy_leading_bytes + +.L_8byte_aligned: + /* Figure out how many whole cache lines (64-bytes) to copy */ + movl %edx, %ecx + andl $63, %edx + shrl $6, %ecx + jz .L_no_whole_cache_lines + + /* Loop copying whole cache lines */ +.L_cache_w0: movq (%rsi), %r8 +.L_cache_w1: movq 1*8(%rsi), %r9 +.L_cache_w2: movq 2*8(%rsi), %r10 +.L_cache_w3: movq 3*8(%rsi), %r11 + movq %r8, (%rdi) + movq %r9, 1*8(%rdi) + movq %r10, 2*8(%rdi) + movq %r11, 3*8(%rdi) +.L_cache_w4: movq 4*8(%rsi), %r8 +.L_cache_w5: movq 5*8(%rsi), %r9 +.L_cache_w6: movq 6*8(%rsi), %r10 +.L_cache_w7: movq 7*8(%rsi), %r11 + movq %r8, 4*8(%rdi) + movq %r9, 5*8(%rdi) + movq %r10, 6*8(%rdi) + movq %r11, 7*8(%rdi) + leaq 64(%rsi), %rsi + leaq 64(%rdi), %rdi + decl %ecx + jnz .L_cache_w0 + + /* Are there any trailing 8-byte words? */ +.L_no_whole_cache_lines: + movl %edx, %ecx + andl $7, %edx + shrl $3, %ecx + jz .L_no_whole_words + + /* Copy trailing words */ +.L_copy_trailing_words: + movq (%rsi), %r8 + mov %r8, (%rdi) + leaq 8(%rsi), %rsi + leaq 8(%rdi), %rdi + decl %ecx + jnz .L_copy_trailing_words + + /* Any trailing bytes? */ +.L_no_whole_words: + andl %edx, %edx + jz .L_done_memcpy_trap + + /* Copy trailing bytes */ + movl %edx, %ecx +.L_copy_trailing_bytes: + movb (%rsi), %al + movb %al, (%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz .L_copy_trailing_bytes + + /* Copy successful. Return zero */ +.L_done_memcpy_trap: + xorq %rax, %rax + ret +ENDPROC(memcpy_mcsafe) + + .section .fixup, "ax" + /* Return -EFAULT for any failure */ +.L_memcpy_mcsafe_fail: + mov $-EFAULT, %rax + ret + + .previous + + _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) +#endif diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index ca2afdd6d98e..90ce01bee00c 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -6,7 +6,7 @@ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #undef memmove diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index f9962ad4e9c4..e1229ecd2a82 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -1,7 +1,7 @@ /* Copyright 2002 Andi Kleen, SuSE Labs */ #include <linux/linkage.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/alternative-asm.h> .weak memset diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 4a6f1d9b5106..99bfb192803f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -#ifdef CONFIG_X86_64 static inline bool is_hypervisor_range(int idx) { +#ifdef CONFIG_X86_64 /* * ffff800000000000 - ffff87ffffffffff is reserved for * the hypervisor. */ - return paravirt_enabled() && - (idx >= pgd_index(__PAGE_OFFSET) - 16) && - (idx < pgd_index(__PAGE_OFFSET)); -} + return (idx >= pgd_index(__PAGE_OFFSET) - 16) && + (idx < pgd_index(__PAGE_OFFSET)); #else -static inline bool is_hypervisor_range(int idx) { return false; } + return false; #endif +} static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, bool checkwx) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 903ec1e9c326..9dd7e4b7fcde 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -3,6 +3,9 @@ #include <linux/sort.h> #include <asm/uaccess.h> +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *, int); + static inline unsigned long ex_insn_addr(const struct exception_table_entry *x) { @@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } +static inline ex_handler_t +ex_fixup_handler(const struct exception_table_entry *x) +{ + return (ex_handler_t)((unsigned long)&x->handler + x->handler); +} -int fixup_exception(struct pt_regs *regs) +bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { - const struct exception_table_entry *fixup; - unsigned long new_ip; + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_default); + +bool ex_handler_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + regs->ip = ex_fixup_addr(fixup); + regs->ax = trapnr; + return true; +} +EXPORT_SYMBOL_GPL(ex_handler_fault); + +bool ex_handler_ext(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + /* Special hack for uaccess_err */ + current_thread_info()->uaccess_err = 1; + regs->ip = ex_fixup_addr(fixup); + return true; +} +EXPORT_SYMBOL(ex_handler_ext); + +bool ex_has_fault_handler(unsigned long ip) +{ + const struct exception_table_entry *e; + ex_handler_t handler; + + e = search_exception_tables(ip); + if (!e) + return false; + handler = ex_fixup_handler(e); + + return handler == ex_handler_fault; +} + +int fixup_exception(struct pt_regs *regs, int trapnr) +{ + const struct exception_table_entry *e; + ex_handler_t handler; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs) } #endif - fixup = search_exception_tables(regs->ip); - if (fixup) { - new_ip = ex_fixup_addr(fixup); - - if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { - /* Special hack for uaccess_err */ - current_thread_info()->uaccess_err = 1; - new_ip -= 0x7ffffff0; - } - regs->ip = new_ip; - return 1; - } + e = search_exception_tables(regs->ip); + if (!e) + return 0; - return 0; + handler = ex_fixup_handler(e); + return handler(e, regs, trapnr); } /* Restricted version used during very early boot */ int __init early_fixup_exception(unsigned long *ip) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *e; unsigned long new_ip; + ex_handler_t handler; - fixup = search_exception_tables(*ip); - if (fixup) { - new_ip = ex_fixup_addr(fixup); + e = search_exception_tables(*ip); + if (!e) + return 0; - if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { - /* uaccess handling not supported during early boot */ - return 0; - } + new_ip = ex_fixup_addr(e); + handler = ex_fixup_handler(e); - *ip = new_ip; - return 1; - } + /* special handling not supported during early boot */ + if (handler != ex_handler_default) + return 0; - return 0; + *ip = new_ip; + return 1; } /* @@ -133,6 +173,8 @@ void sort_extable(struct exception_table_entry *start, i += 4; p->fixup += i; i += 4; + p->handler += i; + i += 4; } sort(start, finish - start, sizeof(struct exception_table_entry), @@ -145,6 +187,8 @@ void sort_extable(struct exception_table_entry *start, i += 4; p->fixup -= i; i += 4; + p->handler -= i; + i += 4; } } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index eef44d9a3f77..03898aea6e0f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -287,6 +287,9 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; + if (pmd_huge(*pmd_k)) + return 0; + pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; @@ -360,8 +363,6 @@ void vmalloc_sync_all(void) * 64-bit: * * Handle a fault on the vmalloc area - * - * This assumes no large pages in there. */ static noinline int vmalloc_fault(unsigned long address) { @@ -403,17 +404,23 @@ static noinline int vmalloc_fault(unsigned long address) if (pud_none(*pud_ref)) return -1; - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) + if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) BUG(); + if (pud_huge(*pud)) + return 0; + pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); if (pmd_none(*pmd_ref)) return -1; - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) + if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) BUG(); + if (pmd_huge(*pmd)) + return 0; + pte_ref = pte_offset_kernel(pmd_ref, address); if (!pte_present(*pte_ref)) return -1; @@ -656,7 +663,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, int sig; /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) { + if (fixup_exception(regs, X86_TRAP_PF)) { /* * Any interrupt that takes a fault gets the fixup. This makes * the below recursive fault logic only apply to a faults from diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6d5eb5900372..d8a798d8bf50 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -102,7 +102,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } - page = pte_page(pte); if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { @@ -115,6 +114,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); get_page(page); put_dev_pagemap(pgmap); SetPageReferenced(page); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 42982b26e32b..740d7ac03a55 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -173,10 +173,10 @@ static __init int setup_hugepagesz(char *opt) } __setup("hugepagesz=", setup_hugepagesz); -#ifdef CONFIG_CMA +#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) static __init int gigantic_pages_init(void) { - /* With CMA we can allocate gigantic pages at runtime */ + /* With compaction or CMA we can allocate gigantic pages at runtime */ if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT)) hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); return 0; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cb4ef3de61f9..bd7a9b9e2e14 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -388,7 +388,6 @@ repeat: } pte_t *kmap_pte; -pgprot_t kmap_prot; static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) { @@ -405,8 +404,6 @@ static void __init kmap_init(void) */ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; } #ifdef CONFIG_HIGHMEM @@ -871,7 +868,6 @@ static noinline int do_test_wp_bit(void) return flag; } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -960,5 +956,3 @@ void mark_rodata_ro(void) if (__supported_pte_mask & _PAGE_NX) debug_checkwx(); } -#endif - diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5488d21123bd..214afda97911 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -53,6 +53,7 @@ #include <asm/numa.h> #include <asm/cacheflush.h> #include <asm/init.h> +#include <asm/uv/uv.h> #include <asm/setup.h> #include "mm_internal.h" @@ -1074,7 +1075,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -1166,8 +1166,6 @@ void mark_rodata_ro(void) debug_checkwx(); } -#endif - int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; @@ -1206,26 +1204,13 @@ int kern_addr_valid(unsigned long addr) static unsigned long probe_memory_block_size(void) { - /* start from 2g */ - unsigned long bz = 1UL<<31; - - if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) { - pr_info("Using 2GB memory block size for large-memory system\n"); - return 2UL * 1024 * 1024 * 1024; - } - - /* less than 64g installed */ - if ((max_pfn << PAGE_SHIFT) < (16UL << 32)) - return MIN_MEMORY_BLOCK_SIZE; + unsigned long bz = MIN_MEMORY_BLOCK_SIZE; - /* get the tail size */ - while (bz > MIN_MEMORY_BLOCK_SIZE) { - if (!((max_pfn << PAGE_SHIFT) & (bz - 1))) - break; - bz >>= 1; - } + /* if system is UV or has 64GB of RAM or more, use large blocks */ + if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) + bz = 2UL << 30; /* 2GB */ - printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20); + pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); return bz; } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d470cf219a2d..1b1110fa0057 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -120,11 +120,22 @@ void __init kasan_init(void) kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); - memset(kasan_zero_page, 0, PAGE_SIZE); - load_cr3(init_level4_pgt); __flush_tlb_all(); - init_task.kasan_depth = 0; + /* + * kasan_zero_page has been used as early shadow memory, thus it may + * contain some garbage. Now we can clear and write protect it, since + * after the TLB flush no one should write to it. + */ + memset(kasan_zero_page, 0, PAGE_SIZE); + for (i = 0; i < PTRS_PER_PTE; i++) { + pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO); + set_pte(&kasan_zero_pte[i], pte); + } + /* Flush TLBs again to be sure that write protection applied. */ + __flush_tlb_all(); + + init_task.kasan_depth = 0; pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34ed632..ddb2244b06a1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -33,7 +33,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ + unsigned long addr; /* the requested address */ pteval_t old_presence; /* page presence prior to arming */ bool armed; @@ -70,9 +70,16 @@ unsigned int kmmio_count; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct list_head *kmmio_page_list(unsigned long page) +static struct list_head *kmmio_page_list(unsigned long addr) { - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + + return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; } /* Accessed per-cpu */ @@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) } /* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) { struct list_head *head; struct kmmio_fault_page *f; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); - page &= PAGE_MASK; - head = kmmio_page_list(page); + if (!pte) + return NULL; + addr &= page_level_mask(l); + head = kmmio_page_list(addr); list_for_each_entry_rcu(f, head, list) { - if (f->page == page) + if (f->addr == addr) return f; } return NULL; @@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(f->page, &level); + pte_t *pte = lookup_address(f->addr, &level); if (!pte) { - pr_err("no pte for page 0x%08lx\n", f->page); + pr_err("no pte for addr 0x%08lx\n", f->addr); return -1; } @@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->page); + __flush_tlb_one(f->addr); return 0; } @@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), - f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), + f->addr); f->armed = true; return ret; } @@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); f->armed = false; } @@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; int ret = 0; /* default to fault not handled */ + unsigned long page_base = addr; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + if (!pte) + return -EINVAL; + page_base &= page_level_mask(l); /* * Preemption is now disabled to prevent process switch during @@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) preempt_disable(); rcu_read_lock(); - faultpage = get_kmmio_fault_page(addr); + faultpage = get_kmmio_fault_page(page_base); if (!faultpage) { /* * Either this page fault is not caused by kmmio, or @@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - if (addr == ctx->addr) { + if (page_base == ctx->addr) { /* * A second fault on the same page means some other * condition needs handling by do_page_fault(), the @@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->active++; ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); + ctx->probe = get_kmmio_probe(page_base); ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; + ctx->addr = page_base; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -354,12 +371,11 @@ out: } /* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) +static int add_kmmio_fault_page(unsigned long addr) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (f) { if (!f->count) arm_kmmio_fault_page(f); @@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) return -1; f->count = 1; - f->page = page; + f->addr = addr; if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } - list_add_rcu(&f->list, kmmio_page_list(f->page)); + list_add_rcu(&f->list, kmmio_page_list(f->addr)); return 0; } /* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, +static void release_kmmio_fault_page(unsigned long addr, struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (!f) return; @@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) int ret = 0; unsigned long size = 0; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + unsigned int l; + pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + + pte = lookup_address(p->addr, &l); + if (!pte) { + ret = -EINVAL; + goto out; + } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("Unable to set page fault.\n"); - size += PAGE_SIZE; + size += page_level_size(l); } out: spin_unlock_irqrestore(&kmmio_lock, flags); @@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; + unsigned int l; + pte_t *pte; + + pte = lookup_address(p->addr, &l); + if (!pte) + return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; + size += page_level_size(l); } list_del_rcu(&p->list); kmmio_count--; diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 96bd1e2bffaf..d2dc0438d654 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void) if (mmap_is_ia32()) #ifdef CONFIG_COMPAT - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); #else - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); #endif else - rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } @@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd) } /* - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 - * does, but not when emulating X86_32 - */ -static unsigned long mmap_legacy_base(unsigned long rnd) -{ - if (mmap_is_ia32()) - return TASK_UNMAPPED_BASE; - else - return TASK_UNMAPPED_BASE + rnd; -} - -/* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ @@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); - mm->mmap_legacy_base = mmap_legacy_base(random_factor); + mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor; if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index cca5174f86fe..009679ae5065 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -123,7 +123,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, break; } - if (regno > nr_registers) { + if (regno >= nr_registers) { WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index c3b3f653ed0c..f70c1ff46125 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) return true; } +/* + * Mark all currently memblock-reserved physical memory (which covers the + * kernel's own memory ranges) as hot-unswappable. + */ static void __init numa_clear_kernel_node_hotplug(void) { - int i, nid; - nodemask_t numa_kernel_nodes = NODE_MASK_NONE; - unsigned long start, end; - struct memblock_region *r; + nodemask_t reserved_nodemask = NODE_MASK_NONE; + struct memblock_region *mb_region; + int i; /* + * We have to do some preprocessing of memblock regions, to + * make them suitable for reservation. + * * At this time, all memory regions reserved by memblock are - * used by the kernel. Set the nid in memblock.reserved will - * mark out all the nodes the kernel resides in. + * used by the kernel, but those regions are not split up + * along node boundaries yet, and don't necessarily have their + * node ID set yet either. + * + * So iterate over all memory known to the x86 architecture, + * and use those ranges to set the nid in memblock.reserved. + * This will split up the memblock regions along node + * boundaries and will set the node IDs as well. */ for (i = 0; i < numa_meminfo.nr_blks; i++) { - struct numa_memblk *mb = &numa_meminfo.blk[i]; + struct numa_memblk *mb = numa_meminfo.blk + i; + int ret; - memblock_set_node(mb->start, mb->end - mb->start, - &memblock.reserved, mb->nid); + ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); + WARN_ON_ONCE(ret); } /* - * Mark all kernel nodes. + * Now go over all reserved memblock regions, to construct a + * node mask of all kernel reserved memory areas. * - * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo - * may not include all the memblock.reserved memory ranges because - * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. + * [ Note, when booting with mem=nn[kMG] or in a kdump kernel, + * numa_meminfo might not include all memblock.reserved + * memory ranges, because quirks such as trim_snb_memory() + * reserve specific pages for Sandy Bridge graphics. ] */ - for_each_memblock(reserved, r) - if (r->nid != MAX_NUMNODES) - node_set(r->nid, numa_kernel_nodes); + for_each_memblock(reserved, mb_region) { + if (mb_region->nid != MAX_NUMNODES) + node_set(mb_region->nid, reserved_nodemask); + } - /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ + /* + * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory + * belonging to the reserved node mask. + * + * Note that this will include memory regions that reside + * on nodes that contain kernel memory - entire nodes + * become hot-unpluggable: + */ for (i = 0; i < numa_meminfo.nr_blks; i++) { - nid = numa_meminfo.blk[i].nid; - if (!node_isset(nid, numa_kernel_nodes)) - continue; + struct numa_memblk *mb = numa_meminfo.blk + i; - start = numa_meminfo.blk[i].start; - end = numa_meminfo.blk[i].end; + if (!node_isset(mb->nid, reserved_nodemask)) + continue; - memblock_clear_hotplug(start, end - start); + memblock_clear_hotplug(mb->start, mb->end - mb->start); } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2440814b0069..007ebe2d8157 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -283,7 +283,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* * Once the kernel maps the text as RO (kernel_set_to_readonly is set), * kernel text mappings for the large page aligned text, rodata sections @@ -419,24 +419,30 @@ pmd_t *lookup_pmd_address(unsigned long address) phys_addr_t slow_virt_to_phys(void *__virt_addr) { unsigned long virt_addr = (unsigned long)__virt_addr; - unsigned long phys_addr, offset; + phys_addr_t phys_addr; + unsigned long offset; enum pg_level level; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); + /* + * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t + * before being left-shifted PAGE_SHIFT bits -- this trick is to + * make 32-PAE kernel work correctly. + */ switch (level) { case PG_LEVEL_1G: - phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; offset = virt_addr & ~PUD_PAGE_MASK; break; case PG_LEVEL_2M: - phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; offset = virt_addr & ~PMD_PAGE_MASK; break; default: - phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; offset = virt_addr & ~PAGE_MASK; } @@ -1122,8 +1128,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, /* * Ignore all non primary paths. */ - if (!primary) + if (!primary) { + cpa->numpages = 1; return 0; + } /* * Ignore the NULL PTE for kernel identity mapping, as it is expected diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index fa14b04b9f64..faec01e7a17d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return -EINVAL; } - *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); return 0; @@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* Set prot based on lookup */ pcm = lookup_memtype(pfn_t_to_phys(pfn)); - *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | cachemode2protval(pcm)); return 0; diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 92e2eacb3321..8bea84724a7d 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -4,6 +4,7 @@ #include <asm/pgtable.h> #include <asm/proto.h> +#include <asm/cpufeature.h> static int disable_nx; @@ -31,9 +32,8 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) - __supported_pte_mask |= _PAGE_NX; - else + /* If disable_nx is set, clear NX on all new mappings going forward. */ + if (disable_nx) __supported_pte_mask &= ~_PAGE_NX; } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 50d86c0e9ba4..660a83c8287b 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -24,7 +24,6 @@ #include <asm/nmi.h> #include <asm/apic.h> #include <asm/processor.h> -#include <asm/cpufeature.h> #include "op_x86_model.h" #include "op_counter.h" diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2879efc73a96..d34b5118b4e8 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -711,28 +711,22 @@ int pcibios_add_device(struct pci_dev *dev) return 0; } -int pcibios_alloc_irq(struct pci_dev *dev) +int pcibios_enable_device(struct pci_dev *dev, int mask) { - /* - * If the PCI device was already claimed by core code and has - * MSI enabled, probing of the pcibios IRQ will overwrite - * dev->irq. So bail out if MSI is already enabled. - */ - if (pci_dev_msi_enabled(dev)) - return -EBUSY; + int err; - return pcibios_enable_irq(dev); -} + if ((err = pci_enable_resources(dev, mask)) < 0) + return err; -void pcibios_free_irq(struct pci_dev *dev) -{ - if (pcibios_disable_irq) - pcibios_disable_irq(dev); + if (!pci_dev_msi_enabled(dev)) + return pcibios_enable_irq(dev); + return 0; } -int pcibios_enable_device(struct pci_dev *dev, int mask) +void pcibios_disable_device (struct pci_dev *dev) { - return pci_enable_resources(dev, mask); + if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq) + pcibios_disable_irq(dev); } int pci_ext_cfg_avail(void) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 0d24e7c10145..8b93e634af84 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -215,7 +215,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) int polarity; int ret; - if (pci_has_managed_irq(dev)) + if (dev->irq_managed && dev->irq > 0) return 0; switch (intel_mid_identify_cpu()) { @@ -256,13 +256,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (pci_has_managed_irq(dev)) { + if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed && + dev->irq > 0) { mp_unmap_irq(dev->irq); dev->irq_managed = 0; - /* - * Don't reset dev->irq here, otherwise - * intel_mid_pci_irq_enable() will fail on next call. - */ } } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 32e70343e6fd..9bd115484745 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev) struct pci_dev *temp_dev; int irq; - if (pci_has_managed_irq(dev)) + if (dev->irq_managed && dev->irq > 0) return 0; irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, @@ -1230,7 +1230,8 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { - pci_set_managed_irq(dev, irq); + dev->irq_managed = 1; + dev->irq = irq; dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); return 0; @@ -1256,10 +1257,24 @@ static int pirq_enable_irq(struct pci_dev *dev) return 0; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) { + if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && + dev->irq_managed && dev->irq) { mp_unmap_irq(dev->irq); - pci_reset_managed_irq(dev); + dev->irq = 0; + dev->irq_managed = 0; } } diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index ff31ab464213..beac4dfdade6 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -196,7 +196,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 0; error: - dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); + if (ret == -ENOSYS) + dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); + else if (ret) + dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret); free: kfree(v); return ret; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 2d66db8f80f9..ed30e79347e8 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -131,6 +131,27 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) EXPORT_SYMBOL_GPL(efi_query_variable_store); /* + * Helper function for efi_reserve_boot_services() to figure out if we + * can free regions in efi_free_boot_services(). + * + * Use this function to ensure we do not free regions owned by somebody + * else. We must only reserve (and then free) regions: + * + * - Not within any part of the kernel + * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc) + */ +static bool can_free_region(u64 start, u64 size) +{ + if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) + return false; + + if (!e820_all_mapped(start, start+size, E820_RAM)) + return false; + + return true; +} + +/* * The UEFI specification makes it clear that the operating system is free to do * whatever it wants with boot services code after ExitBootServices() has been * called. Ignoring this recommendation a significant bunch of EFI implementations @@ -147,26 +168,50 @@ void __init efi_reserve_boot_services(void) efi_memory_desc_t *md = p; u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; + bool already_reserved; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) continue; - /* Only reserve where possible: - * - Not within any already allocated areas - * - Not over any memory area (really needed, if above?) - * - Not within any part of the kernel - * - Not the bios reserved area - */ - if ((start + size > __pa_symbol(_text) - && start <= __pa_symbol(_end)) || - !e820_all_mapped(start, start+size, E820_RAM) || - memblock_is_region_reserved(start, size)) { - /* Could not reserve, skip it */ - md->num_pages = 0; - memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", - start, start+size-1); - } else + + already_reserved = memblock_is_region_reserved(start, size); + + /* + * Because the following memblock_reserve() is paired + * with free_bootmem_late() for this region in + * efi_free_boot_services(), we must be extremely + * careful not to reserve, and subsequently free, + * critical regions of memory (like the kernel image) or + * those regions that somebody else has already + * reserved. + * + * A good example of a critical region that must not be + * freed is page zero (first 4Kb of memory), which may + * contain boot services code/data but is marked + * E820_RESERVED by trim_bios_range(). + */ + if (!already_reserved) { memblock_reserve(start, size); + + /* + * If we are the first to reserve the region, no + * one else cares about it. We own it and can + * free it later. + */ + if (can_free_region(start, size)) + continue; + } + + /* + * We don't own the region. We must not free it. + * + * Setting this bit for a boot services region really + * doesn't make sense as far as the firmware is + * concerned, but it does provide us with a way to tag + * those regions that must not be paired with + * free_bootmem_late(). + */ + md->attribute |= EFI_MEMORY_RUNTIME; } } @@ -183,8 +228,8 @@ void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; - /* Could not reserve boot area */ - if (!size) + /* Do not free, someone else owns it: */ + if (md->attribute & EFI_MEMORY_RUNTIME) continue; free_bootmem_late(start, size); diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index 76b6632d3143..1865c196f136 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c @@ -21,7 +21,7 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/leds.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -35,6 +35,11 @@ #define BIOS_SIGNATURE_COREBOOT 0x500 #define BIOS_REGION_SIZE 0x10000 +/* + * This driver is not modular, but to keep back compatibility + * with existing use cases, continuing with module_param is + * the easiest way forward. + */ static bool force = 0; module_param(force, bool, 0444); /* FIXME: Award bios is not automatically detected as Alix platform */ @@ -192,9 +197,4 @@ static int __init alix_init(void) return 0; } - -module_init(alix_init); - -MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>"); -MODULE_DESCRIPTION("PCEngines ALIX System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(alix_init); diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c index aa733fba2471..4fcdb91318a0 100644 --- a/arch/x86/platform/geode/geos.c +++ b/arch/x86/platform/geode/geos.c @@ -19,7 +19,6 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> #include <linux/leds.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -120,9 +119,4 @@ static int __init geos_init(void) return 0; } - -module_init(geos_init); - -MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); -MODULE_DESCRIPTION("Traverse Technologies Geos System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(geos_init); diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 927e38c0089f..a2f6b982a729 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -20,7 +20,6 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/string.h> -#include <linux/module.h> #include <linux/leds.h> #include <linux/platform_device.h> #include <linux/gpio.h> @@ -146,9 +145,4 @@ static int __init net5501_init(void) return 0; } - -module_init(net5501_init); - -MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); -MODULE_DESCRIPTION("Soekris net5501 System Setup"); -MODULE_LICENSE("GPL"); +device_initcall(net5501_init); diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index 23381d2174ae..1eb47b6298c2 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -52,10 +52,7 @@ static unsigned long __init mfld_calibrate_tsc(void) /* mark tsc clocksource as reliable */ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - if (fast_calibrate) - return fast_calibrate; - - return 0; + return fast_calibrate; } static void __init penwell_arch_setup(void) diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c index aaca91753d32..bd1adc621781 100644 --- a/arch/x86/platform/intel-mid/mrfl.c +++ b/arch/x86/platform/intel-mid/mrfl.c @@ -81,10 +81,7 @@ static unsigned long __init tangier_calibrate_tsc(void) /* mark tsc clocksource as reliable */ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - if (fast_calibrate) - return fast_calibrate; - - return 0; + return fast_calibrate; } static void __init tangier_arch_setup(void) diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index c61b6c332e97..17d6d2296e4d 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -1,5 +1,5 @@ /** - * imr.c + * imr.c -- Intel Isolated Memory Region driver * * Copyright(c) 2013 Intel Corporation. * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> @@ -31,7 +31,6 @@ #include <linux/debugfs.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/module.h> #include <linux/types.h> struct imr_device { @@ -135,11 +134,9 @@ static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) * @idev: pointer to imr_device structure. * @imr_id: IMR entry to write. * @imr: IMR structure representing address and access masks. - * @lock: indicates if the IMR lock bit should be applied. * @return: 0 on success or error code passed from mbi_iosf on failure. */ -static int imr_write(struct imr_device *idev, u32 imr_id, - struct imr_regs *imr, bool lock) +static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) { unsigned long flags; u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; @@ -163,15 +160,6 @@ static int imr_write(struct imr_device *idev, u32 imr_id, if (ret) goto failed; - /* Lock bit must be set separately to addr_lo address bits. */ - if (lock) { - imr->addr_lo |= IMR_LOCK; - ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, - reg - IMR_NUM_REGS, imr->addr_lo); - if (ret) - goto failed; - } - local_irq_restore(flags); return 0; failed: @@ -270,17 +258,6 @@ static int imr_debugfs_register(struct imr_device *idev) } /** - * imr_debugfs_unregister - unregister debugfs hooks. - * - * @idev: pointer to imr_device structure. - * @return: - */ -static void imr_debugfs_unregister(struct imr_device *idev) -{ - debugfs_remove(idev->file); -} - -/** * imr_check_params - check passed address range IMR alignment and non-zero size * * @base: base address of intended IMR. @@ -334,11 +311,10 @@ static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr) * @size: physical size of region in bytes must be aligned to 1KiB. * @read_mask: read access mask. * @write_mask: write access mask. - * @lock: indicates whether or not to permanently lock this region. * @return: zero on success or negative value indicating error. */ int imr_add_range(phys_addr_t base, size_t size, - unsigned int rmask, unsigned int wmask, bool lock) + unsigned int rmask, unsigned int wmask) { phys_addr_t end; unsigned int i; @@ -411,7 +387,7 @@ int imr_add_range(phys_addr_t base, size_t size, imr.rmask = rmask; imr.wmask = wmask; - ret = imr_write(idev, reg, &imr, lock); + ret = imr_write(idev, reg, &imr); if (ret < 0) { /* * In the highly unlikely event iosf_mbi_write failed @@ -422,7 +398,7 @@ int imr_add_range(phys_addr_t base, size_t size, imr.addr_hi = 0; imr.rmask = IMR_READ_ACCESS_ALL; imr.wmask = IMR_WRITE_ACCESS_ALL; - imr_write(idev, reg, &imr, false); + imr_write(idev, reg, &imr); } failed: mutex_unlock(&idev->lock); @@ -518,7 +494,7 @@ static int __imr_remove_range(int reg, phys_addr_t base, size_t size) imr.rmask = IMR_READ_ACCESS_ALL; imr.wmask = IMR_WRITE_ACCESS_ALL; - ret = imr_write(idev, reg, &imr, false); + ret = imr_write(idev, reg, &imr); failed: mutex_unlock(&idev->lock); @@ -592,14 +568,14 @@ static void __init imr_fixup_memmap(struct imr_device *idev) end = (unsigned long)__end_rodata - 1; /* - * Setup a locked IMR around the physical extent of the kernel + * Setup an unlocked IMR around the physical extent of the kernel * from the beginning of the .text secton to the end of the * .rodata section as one physically contiguous block. * * We don't round up @size since it is already PAGE_SIZE aligned. * See vmlinux.lds.S for details. */ - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); if (ret < 0) { pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n", size / 1024, start, end); @@ -614,7 +590,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ {} }; -MODULE_DEVICE_TABLE(x86cpu, imr_ids); /** * imr_init - entry point for IMR driver. @@ -640,22 +615,4 @@ static int __init imr_init(void) imr_fixup_memmap(idev); return 0; } - -/** - * imr_exit - exit point for IMR code. - * - * Deregisters debugfs, leave IMR state as-is. - * - * return: - */ -static void __exit imr_exit(void) -{ - imr_debugfs_unregister(&imr_dev); -} - -module_init(imr_init); -module_exit(imr_exit); - -MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); -MODULE_DESCRIPTION("Intel Isolated Memory Region driver"); -MODULE_LICENSE("Dual BSD/GPL"); +device_initcall(imr_init); diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index 278e4da4222f..f5bad40936ac 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -1,5 +1,5 @@ /** - * imr_selftest.c + * imr_selftest.c -- Intel Isolated Memory Region self-test driver * * Copyright(c) 2013 Intel Corporation. * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> @@ -15,7 +15,6 @@ #include <asm/imr.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/module.h> #include <linux/types.h> #define SELFTEST KBUILD_MODNAME ": " @@ -61,30 +60,30 @@ static void __init imr_self_test(void) int ret; /* Test zero zero. */ - ret = imr_add_range(0, 0, 0, 0, false); + ret = imr_add_range(0, 0, 0, 0); imr_self_test_result(ret < 0, "zero sized IMR\n"); /* Test exact overlap. */ - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); /* Test overlap with base inside of existing. */ base += size - IMR_ALIGN; - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); /* Test overlap with end inside of existing. */ base -= size + IMR_ALIGN * 2; - ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */ ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL, - IMR_WRITE_ACCESS_ALL, false); + IMR_WRITE_ACCESS_ALL); imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n"); /* Test that a 1 KiB IMR @ zero with CPU only will work. */ - ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false); + ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU); imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n"); if (ret >= 0) { ret = imr_remove_range(0, IMR_ALIGN); @@ -93,8 +92,7 @@ static void __init imr_self_test(void) /* Test 2 KiB works. */ size = IMR_ALIGN * 2; - ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, - IMR_WRITE_ACCESS_ALL, false); + ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL); imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n"); if (ret >= 0) { ret = imr_remove_range(0, size); @@ -106,7 +104,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ {} }; -MODULE_DEVICE_TABLE(x86cpu, imr_ids); /** * imr_self_test_init - entry point for IMR driver. @@ -125,13 +122,4 @@ static int __init imr_self_test_init(void) * * return: */ -static void __exit imr_self_test_exit(void) -{ -} - -module_init(imr_self_test_init); -module_exit(imr_self_test_exit); - -MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); -MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver"); -MODULE_LICENSE("Dual BSD/GPL"); +device_initcall(imr_self_test_init); diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 174781a404ff..00c319048d52 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -3,7 +3,7 @@ #include <asm/asm.h> #include <asm/segment.h> -#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> #include <asm/cmpxchg.h> #include <asm/nops.h> diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c index 8502ad30e61b..5adb6a2fd117 100644 --- a/arch/x86/um/os-Linux/task_size.c +++ b/arch/x86/um/os-Linux/task_size.c @@ -109,7 +109,7 @@ unsigned long os_get_top_address(void) exit(1); } - printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT); + printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT); printf("Locating the top of the address space ... "); fflush(stdout); @@ -134,7 +134,7 @@ out: exit(1); } top <<= UM_KERN_PAGE_SHIFT; - printf("0x%x\n", top); + printf("0x%lx\n", top); return top; } diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 439c0994b696..bfce503dffae 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -25,11 +25,11 @@ #define old_mmap sys_old_mmap -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 -#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, +#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym, extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index b74ea6c2c0e7..f306413d3eb6 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -35,14 +35,11 @@ #define stub_execveat sys_execveat #define stub_rt_sigreturn sys_rt_sigreturn -#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) -#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ - -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_64.h> #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, +#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym, extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index ce7e3607a870..470564bbd08e 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -9,14 +9,12 @@ #include <asm/types.h> #ifdef __i386__ -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include <asm/syscalls_32.h> }; #else -#define __SYSCALL_64(nr, sym, compat) [nr] = 1, -#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, -#define __SYSCALL_X32(nr, sym, compat) /* Not supported */ +#define __SYSCALL_64(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include <asm/syscalls_64.h> }; diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index d5644bbe8cba..9fd24846d094 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -14,26 +14,24 @@ int fb_is_primary_device(struct fb_info *info) { struct device *device = info->device; - struct pci_dev *pci_dev = NULL; struct pci_dev *default_device = vga_default_device(); - struct resource *res = NULL; + struct pci_dev *pci_dev; + struct resource *res; - if (device) - pci_dev = to_pci_dev(device); - - if (!pci_dev) + if (!device || !dev_is_pci(device)) return 0; + pci_dev = to_pci_dev(device); + if (default_device) { if (pci_dev == default_device) return 1; - else - return 0; + return 0; } - res = &pci_dev->resource[PCI_ROM_RESOURCE]; + res = pci_dev->resource + PCI_ROM_RESOURCE; - if (res && res->flags & IORESOURCE_ROM_SHADOW) + if (res->flags & IORESOURCE_ROM_SHADOW) return 1; return 0; diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 724a08740a04..9466354d3e49 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -11,7 +11,7 @@ #include "pmu.h" /* x86_pmu.handle_irq definition */ -#include "../kernel/cpu/perf_event.h" +#include "../events/perf_event.h" #define XENPMU_IRQ_PROCESSING 1 struct xenpmu { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3f4ebf0261f2..3c6d17fd423a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu) xen_pvh_secondary_vcpu_init(cpu); #endif cpu_bringup(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } static void xen_smp_intr_free(unsigned int cpu) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 4d02e38514f5..fc4ad21a5ed4 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -157,7 +157,7 @@ void secondary_start_kernel(void) complete(&cpu_running); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } static void mx_cpu_start(void *p) |