From 24633d901ea44fe99bc9a2d01a3881fa097b78b3 Mon Sep 17 00:00:00 2001 From: Vaibhav Shankar Date: Thu, 13 Aug 2020 19:22:34 -0700 Subject: perf/x86/intel/uncore: Add BW counters for GT, IA and IO breakdown Linux only has support to read total DDR reads and writes. Here we add support to enable bandwidth breakdown-GT, IA and IO. Breakdown of BW is important to debug and optimize memory access. This can also be used for telemetry and improving the system software.The offsets for GT, IA and IO are added and these free running counters can be accessed via MMIO space. The BW breakdown can be measured using the following cmd: perf stat -e uncore_imc/gt_requests/,uncore_imc/ia_requests/,uncore_imc/io_requests/ 30.57 MiB uncore_imc/gt_requests/ 1346.13 MiB uncore_imc/ia_requests/ 190.97 MiB uncore_imc/io_requests/ 5.984572733 seconds time elapsed BW/s = _requests/time elapsed Signed-off-by: Vaibhav Shankar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200814022234.23605-1-vaibhav.shankar@intel.com --- arch/x86/events/intel/uncore_snb.c | 52 +++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index cb94ba86efd2..6a4ca27b2c9e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -390,6 +390,18 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(gt_requests, "event=0x03"), + INTEL_UNCORE_EVENT_DESC(gt_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(gt_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ia_requests, "event=0x04"), + INTEL_UNCORE_EVENT_DESC(ia_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ia_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(io_requests, "event=0x05"), + INTEL_UNCORE_EVENT_DESC(io_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(io_requests.unit, "MiB"), + { /* end: all zeroes */ }, }; @@ -405,13 +417,35 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE +/* BW break down- legacy counters */ +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS 0x3 +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE 0x5040 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS 0x4 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE 0x5044 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS 0x5 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE 0x5048 + enum perf_snb_uncore_imc_freerunning_types { - SNB_PCI_UNCORE_IMC_DATA = 0, + SNB_PCI_UNCORE_IMC_DATA_READS = 0, + SNB_PCI_UNCORE_IMC_DATA_WRITES, + SNB_PCI_UNCORE_IMC_GT_REQUESTS, + SNB_PCI_UNCORE_IMC_IA_REQUESTS, + SNB_PCI_UNCORE_IMC_IO_REQUESTS, + SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, }; static struct freerunning_counters snb_uncore_imc_freerunning[] = { - [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IA_REQUESTS] = { SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IO_REQUESTS] = { SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, }; static struct attribute *snb_uncore_imc_formats_attr[] = { @@ -525,6 +559,18 @@ static int snb_uncore_imc_event_init(struct perf_event *event) base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; idx = UNCORE_PMC_IDX_FREERUNNING; break; + case SNB_UNCORE_PCI_IMC_GT_REQUESTS: + base = SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IA_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IO_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; default: return -EINVAL; } @@ -598,7 +644,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { static struct intel_uncore_type snb_uncore_imc = { .name = "imc", - .num_counters = 2, + .num_counters = 5, .num_boxes = 1, .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, .mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE, -- cgit v1.2.3 From 030a2c689fb46e1690f7ded8b194bab7678efb28 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 3 Aug 2020 13:56:00 +1000 Subject: powerpc: Fix P10 PVR revision in /proc/cpuinfo for SMT4 cores On POWER10 bit 12 in the PVR indicates if the core is SMT4 or SMT8. Bit 12 is set for SMT4. Without this patch, /proc/cpuinfo on a SMT4 DD1 POWER10 looks like this: cpu : POWER10, altivec supported revision : 17.0 (pvr 0080 1100) Fixes: a3ea40d5c736 ("powerpc: Add POWER10 architected mode") Cc: stable@vger.kernel.org # v5.8 Signed-off-by: Michael Neuling Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200803035600.1820371-1-mikey@neuling.org --- arch/powerpc/kernel/setup-common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b198b0ff25bc..808ec9fab605 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -311,6 +311,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) min = pvr & 0xFF; break; case 0x004e: /* POWER9 bits 12-15 give chip type */ + case 0x0080: /* POWER10 bit 12 gives SMT8/4 */ maj = (pvr >> 8) & 0x0F; min = pvr & 0xFF; break; -- cgit v1.2.3 From 781fa4811d95314c1965c0c3337c9ac36ef26093 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 7 Aug 2020 06:05:00 -0400 Subject: powerpc/perf: Add support for outputting extended regs in perf intr_regs Add support for perf extended register capability in powerpc. The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to indicate the PMU which support extended registers. The generic code define the mask of extended registers as 0 for non supported architectures. Patch adds extended regs support for power9 platform by exposing MMCR0, MMCR1 and MMCR2 registers. REG_RESERVED mask needs update to include extended regs. PERF_REG_EXTENDED_MASK, contains mask value of the supported registers, is defined at runtime in the kernel based on platform since the supported registers may differ from one processor version to another and hence the MASK value. With the patch: available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip msr orig_r3 ctr link xer ccr softe trap dar dsisr sier mmcra mmcr0 mmcr1 mmcr2 PERF_RECORD_SAMPLE(IP, 0x1): 4784/4784: 0 period: 1 addr: 0 ... intr regs: mask 0xffffffffffff ABI 64-bit .... r0 0xc00000000012b77c .... r1 0xc000003fe5e03930 .... r2 0xc000000001b0e000 .... r3 0xc000003fdcddf800 .... r4 0xc000003fc7880000 .... r5 0x9c422724be .... r6 0xc000003fe5e03908 .... r7 0xffffff63bddc8706 .... r8 0x9e4 .... r9 0x0 .... r10 0x1 .... r11 0x0 .... r12 0xc0000000001299c0 .... r13 0xc000003ffffc4800 .... r14 0x0 .... r15 0x7fffdd8b8b00 .... r16 0x0 .... r17 0x7fffdd8be6b8 .... r18 0x7e7076607730 .... r19 0x2f .... r20 0xc00000001fc26c68 .... r21 0xc0002041e4227e00 .... r22 0xc00000002018fb60 .... r23 0x1 .... r24 0xc000003ffec4d900 .... r25 0x80000000 .... r26 0x0 .... r27 0x1 .... r28 0x1 .... r29 0xc000000001be1260 .... r30 0x6008010 .... r31 0xc000003ffebb7218 .... nip 0xc00000000012b910 .... msr 0x9000000000009033 .... orig_r3 0xc00000000012b86c .... ctr 0xc0000000001299c0 .... link 0xc00000000012b77c .... xer 0x0 .... ccr 0x28002222 .... softe 0x1 .... trap 0xf00 .... dar 0x0 .... dsisr 0x80000000000 .... sier 0x0 .... mmcra 0x80000000000 .... mmcr0 0x82008090 .... mmcr1 0x1e000000 .... mmcr2 0x0 ... thread: perf:4784 Signed-off-by: Anju T Sudhakar Signed-off-by: Athira Rajeev Tested-by: Nageswara R Sastry Reviewed-by: Madhavan Srinivasan Reviewed-by: Kajol Jain Reviewed-and-tested-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1596794701-23530-2-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/asm/perf_event.h | 3 +++ arch/powerpc/include/asm/perf_event_server.h | 5 ++++ arch/powerpc/include/uapi/asm/perf_regs.h | 14 +++++++++++- arch/powerpc/perf/core-book3s.c | 1 + arch/powerpc/perf/perf_regs.c | 34 +++++++++++++++++++++++++--- arch/powerpc/perf/power9-pmu.c | 6 +++++ 6 files changed, 59 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 1e8b2e1ec1db..daec64d41b44 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -40,4 +40,7 @@ static inline bool is_sier_available(void) { return false; } /* To support perf_regs sier update */ extern bool is_sier_available(void); +/* To define perf extended regs mask value */ +extern u64 PERF_REG_EXTENDED_MASK; +#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK #endif diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 86c9eb064b22..f6acabb6c9be 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -62,6 +62,11 @@ struct power_pmu { int *blacklist_ev; /* BHRB entries in the PMU */ int bhrb_nr; + /* + * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if + * the pmu supports extended perf regs capability + */ + int capabilities; }; /* diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index f599064dd8dc..225c64c56813 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -48,6 +48,18 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, PERF_REG_POWERPC_MMCRA, - PERF_REG_POWERPC_MAX, + /* Extended registers */ + PERF_REG_POWERPC_MMCR0, + PERF_REG_POWERPC_MMCR1, + PERF_REG_POWERPC_MMCR2, + /* Max regs without the extended regs */ + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; + +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) + +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) + +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 78fe34986594..00038650a007 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2323,6 +2323,7 @@ int register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV /* diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index a213a0aa5d25..9301e6852d53 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -13,9 +13,11 @@ #include #include +u64 PERF_REG_EXTENDED_MASK; + #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) -#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1)) +#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK)) static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]), @@ -69,10 +71,26 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr), }; +/* Function to return the extended register values */ +static u64 get_ext_regs_value(int idx) +{ + switch (idx) { + case PERF_REG_POWERPC_MMCR0: + return mfspr(SPRN_MMCR0); + case PERF_REG_POWERPC_MMCR1: + return mfspr(SPRN_MMCR1); + case PERF_REG_POWERPC_MMCR2: + return mfspr(SPRN_MMCR2); + default: return 0; + } +} + u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX)) - return 0; + u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + perf_reg_extended_max = PERF_REG_MAX_ISA_300; if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || @@ -85,6 +103,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; + if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + return get_ext_regs_value(idx); + + /* + * If the idx is referring to value beyond the + * supported registers, return 0 with a warning + */ + if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 05dae38b969a..2a57e93a79dc 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -90,6 +90,8 @@ enum { #define POWER9_MMCRA_IFM3 0x00000000C0000000UL #define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Nasty Power9 specific hack */ #define PVR_POWER9_CUMULUS 0x00002000 @@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = { .cache_events = &power9_cache_events, .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power9_pmu(void) @@ -457,6 +460,9 @@ int init_power9_pmu(void) } } + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300; + rc = register_power_pmu(&power9_pmu); if (rc) return rc; -- cgit v1.2.3 From d735599a069f6936c1392e07075c34a19bda949a Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 7 Aug 2020 06:05:01 -0400 Subject: powerpc/perf: Add extended regs support for power10 platform Include capability flag PERF_PMU_CAP_EXTENDED_REGS for power10 and expose MMCR3, SIER2, SIER3 registers as part of extended regs. Also introduce PERF_REG_PMU_MASK_31 to define extended mask value at runtime for power10. Suggested-by: Ryan Grimm Signed-off-by: Athira Rajeev Tested-by: Nageswara R Sastry Reviewed-by: Kajol Jain Reviewed-and-tested-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1596794701-23530-3-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/uapi/asm/perf_regs.h | 6 ++++++ arch/powerpc/perf/perf_regs.c | 12 +++++++++++- arch/powerpc/perf/power10-pmu.c | 6 ++++++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index 225c64c56813..bdf5f10f8b9f 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -52,6 +52,9 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_MMCR0, PERF_REG_POWERPC_MMCR1, PERF_REG_POWERPC_MMCR2, + PERF_REG_POWERPC_MMCR3, + PERF_REG_POWERPC_SIER2, + PERF_REG_POWERPC_SIER3, /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; @@ -60,6 +63,9 @@ enum perf_event_powerpc_regs { /* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ #define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ +#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) #define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) +#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 9301e6852d53..8e53f2fc3fe0 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -81,6 +81,14 @@ static u64 get_ext_regs_value(int idx) return mfspr(SPRN_MMCR1); case PERF_REG_POWERPC_MMCR2: return mfspr(SPRN_MMCR2); +#ifdef CONFIG_PPC64 + case PERF_REG_POWERPC_MMCR3: + return mfspr(SPRN_MMCR3); + case PERF_REG_POWERPC_SIER2: + return mfspr(SPRN_SIER2); + case PERF_REG_POWERPC_SIER3: + return mfspr(SPRN_SIER3); +#endif default: return 0; } } @@ -89,7 +97,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) { u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + perf_reg_extended_max = PERF_REG_MAX_ISA_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) perf_reg_extended_max = PERF_REG_MAX_ISA_300; if (idx == PERF_REG_POWERPC_SIER && diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index f7cff7f36a1c..83148656b524 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -87,6 +87,8 @@ #define POWER10_MMCRA_IFM3 0x00000000C0000000UL #define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Table of alternatives, sorted by column 0 */ static const unsigned int power10_event_alternatives[][MAX_ALT] = { { PM_RUN_CYC_ALT, PM_RUN_CYC }, @@ -397,6 +399,7 @@ static struct power_pmu power10_pmu = { .cache_events = &power10_cache_events, .attr_groups = power10_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power10_pmu(void) @@ -408,6 +411,9 @@ int init_power10_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) return -ENODEV; + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + rc = register_power_pmu(&power10_pmu); if (rc) return rc; -- cgit v1.2.3 From 327da008e65a25b8206b36b7fc0c9e4edbb36a58 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 17 Aug 2020 06:26:18 +0530 Subject: powerpc: Add POWER10 raw mode cputable entry Add a raw mode cputable entry for POWER10. Copies most of the fields from commit a3ea40d5c736 ("powerpc: Add POWER10 architected mode") except for oprofile_cpu_type, machine_check_early, pvr_mask and pvr_mask fields. On bare metal systems we use DT CPU features, which doesn't need a cputable entry. But in VMs we still rely on the raw cputable entry to set the correct values for the PMU related fields. Fixes: a3ea40d5c736 ("powerpc: Add POWER10 architected mode") Signed-off-by: Madhavan Srinivasan [mpe: Reorder vs cleanup patch and add Fixes tag] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817005618.3305028-2-maddy@linux.ibm.com --- arch/powerpc/kernel/cputable.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 3d406a9626e8..93b986660664 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -75,6 +75,7 @@ extern void __restore_cpu_power10(void); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); +extern long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -542,6 +543,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power10", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power10", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, -- cgit v1.2.3 From 388692e943a58f28aac0fe83e75f5994da9ff8a1 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 17 Aug 2020 06:26:17 +0530 Subject: powerpc/kernel: Cleanup machine check function declarations __machine_check_early_realmode_p*() are currently declared as extern in cputable.c and because of this when compiled with "C=1" (which enables semantic checker) produces these warnings. CHECK arch/powerpc/kernel/mce_power.c arch/powerpc/kernel/mce_power.c:709:6: warning: symbol '__machine_check_early_realmode_p7' was not declared. Should it be static? arch/powerpc/kernel/mce_power.c:717:6: warning: symbol '__machine_check_early_realmode_p8' was not declared. Should it be static? arch/powerpc/kernel/mce_power.c:722:6: warning: symbol '__machine_check_early_realmode_p9' was not declared. Should it be static? arch/powerpc/kernel/mce_power.c:740:6: warning: symbol '__machine_check_early_realmode_p10' was not declared. Should it be static? Patch here moves the declaration to asm/mce.h and includes the same in cputable.c Fixes: ae744f3432d3 ("powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8") Fixes: 7b9f71f974a1 ("powerpc/64s: POWER9 machine check handler") Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817005618.3305028-1-maddy@linux.ibm.com --- arch/powerpc/include/asm/cputable.h | 5 +++++ arch/powerpc/include/asm/mce.h | 7 +++++++ arch/powerpc/kernel/cputable.c | 4 ---- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ---- 4 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index fdddb822d564..e005b4581023 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -9,6 +9,11 @@ #ifndef __ASSEMBLY__ +/* + * Added to include __machine_check_early_realmode_* functions + */ +#include + /* This structure can grow, it's real size is used by head.S code * via the mkdefs mechanism. */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index adf2cda67f9a..89aa8248a57d 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -210,6 +210,9 @@ struct mce_error_info { #define MCE_EVENT_RELEASE true #define MCE_EVENT_DONTRELEASE false +struct pt_regs; +struct notifier_block; + extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr); @@ -225,5 +228,9 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +long __machine_check_early_realmode_p7(struct pt_regs *regs); +long __machine_check_early_realmode_p8(struct pt_regs *regs); +long __machine_check_early_realmode_p9(struct pt_regs *regs); +long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 93b986660664..2aa89c6b2896 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -72,10 +72,6 @@ extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power10(void); -extern long __machine_check_early_realmode_p7(struct pt_regs *regs); -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -extern long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 6f8c0c6b937a..8dc46f38680b 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -64,10 +64,6 @@ struct dt_cpu_feature { * Set up the base CPU */ -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -extern long __machine_check_early_realmode_p10(struct pt_regs *regs); - static int hv_mode; static struct { -- cgit v1.2.3 From e5ed6069b7d8a41e4de09e8edf783f6b1a83b48f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 21 Jul 2020 13:59:58 -0700 Subject: ARC: pgalloc.h: delete a duplicated word + other fixes Drop the repeated word "to". Change "Thay" to "That". Add a closing right parenthesis. Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index b747f2ec2928..6147db925248 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -18,10 +18,10 @@ * vineetg: April 2010 * -Switched pgtable_t from being struct page * to unsigned long * =Needed so that Page Table allocator (pte_alloc_one) is not forced to - * to deal with struct page. Thay way in future we can make it allocate + * deal with struct page. That way in future we can make it allocate * multiple PG Tbls in one Page Frame * =sweet side effect is avoiding calls to ugly page_address( ) from the - * pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate + * pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate) * * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 */ -- cgit v1.2.3 From feb92d7d3813456c11dce215b3421801a78a8986 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sun, 26 Jul 2020 21:51:59 -0700 Subject: ARC: perf: don't bail setup if pct irq missing in device-tree Current code inadventely bails if hardware supports sampling/overflow interrupts, but the irq is missing from device tree. | | # perf stat -e cycles,instructions,major-faults,minor-faults ../hackbench | Running with 10 groups 400 process | Time: 0.921 | | Performance counter stats for '../hackbench': | | cycles | instructions | 0 major-faults | 8679 minor-faults This need not be as we can still do simple counting based perf stat. This unborks perf on HSDK-4xD Cc: Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 661fd842ea97..79849f37e782 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, has_interrupts; + int i, has_interrupts, irq; int counter_size; /* in bits */ union cc_name { @@ -637,13 +637,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) .attr_groups = arc_pmu->attr_groups, }; - if (has_interrupts) { - int irq = platform_get_irq(pdev, 0); - - if (irq < 0) { - pr_err("Cannot get IRQ number for the platform\n"); - return -ENODEV; - } + if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) { arc_pmu->irq = irq; @@ -652,9 +646,9 @@ static int arc_pmu_device_probe(struct platform_device *pdev) this_cpu_ptr(&arc_pmu_cpu)); on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); - - } else + } else { arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } /* * perf parser doesn't really like '-' symbol in events name, so let's -- cgit v1.2.3 From fe81d927b78c4f0557836661d32e41ebc957b024 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 9 Jul 2020 19:52:32 -0700 Subject: ARC: HSDK: wireup perf irq Newer version of HSDK aka HSDK-4xD (with dual issue HS48x4 CPU) wired up the perf interrupt, so enable that in DT. This is OK for old HSDK where this irq is ignored because pct irq is not wired up in hardware. Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 9acbeba832c0..5d64a5a940ee 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -88,6 +88,8 @@ arcpct: pct { compatible = "snps,archs-pct"; + interrupt-parent = <&cpu_intc>; + interrupts = <20>; }; /* TIMER0 with interrupt for clockevent */ -- cgit v1.2.3 From 433c1ca0d441ee0b88fdd83c84ee6d6d43080dcd Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 8 Aug 2020 20:32:27 +0800 Subject: MIPS: Loongson64: Do not override watch and ejtag feature Do not override ejtag feature to 0 as Loongson 3A1000+ do have ejtag. For watch, as KVM emulated CPU doesn't have watch feature, we should not enable it unconditionally. Signed-off-by: Jiaxun Yang Reviewed-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index b6e9c99b85a5..eb181224eb4c 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -26,7 +26,6 @@ #define cpu_has_counter 1 #define cpu_has_dc_aliases (PAGE_SIZE < 0x4000) #define cpu_has_divec 0 -#define cpu_has_ejtag 0 #define cpu_has_inclusive_pcaches 1 #define cpu_has_llsc 1 #define cpu_has_mcheck 0 @@ -42,7 +41,6 @@ #define cpu_has_veic 0 #define cpu_has_vint 0 #define cpu_has_vtag_icache 0 -#define cpu_has_watch 1 #define cpu_has_wsbh 1 #define cpu_has_ic_fills_f_dc 1 #define cpu_hwrena_impl_bits 0xc0000000 -- cgit v1.2.3 From b76fee1bc56c31a9d2a49592810eba30cc06d61a Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 4 Aug 2020 13:01:26 +0200 Subject: s390/pci: ignore stale configuration request event A configuration request event may be stale, that is the event may reference a zdev which was already configured. This can happen when a hotplug happens during boot such that the device is discovered and configured in the initial clp_list_pci(), then after initialization we enable events and process the original configuration request which additionally still contains the old disabled function handle leading to a failure during device enablement and subsequent I/O lockout. Fix this by restoring the check that the device to be configured is in standby which was removed in commit f606b3ef47c9 ("s390/pci: adapt events for zbus"). This check does not need serialization as we only enable the events after zPCI has fully initialized, which includes the initial clp_list_pci(), rescan only does updates and events are serialized with respect to each other. Fixes: f606b3ef47c9 ("s390/pci: adapt events for zbus") Cc: # 5.8 Reported-by: Shalini Chellathurai Saroja Tested-by: Shalini Chellathurai Saroja Acked-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index fdebd286f402..4602068c7548 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -92,6 +92,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); break; } + /* the configuration request may be stale */ + if (zdev->state != ZPCI_FN_STATE_STANDBY) + break; zdev->fh = ccdf->fh; zdev->state = ZPCI_FN_STATE_CONFIGURED; ret = zpci_enable_device(zdev); -- cgit v1.2.3 From 9eaba29c7985236e16468f4e6a49cc18cf01443e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Aug 2020 18:55:41 +0200 Subject: s390/runtime_instrumentation: fix storage key handling The key member of the runtime instrumentation control block contains only the access key, not the complete storage key. Therefore the value must be shifted by four bits. Note: this is only relevant for debugging purposes in case somebody compiles a kernel with a default storage access key set to a value not equal to zero. Fixes: e4b8b3f33fca ("s390: add support for runtime instrumentation") Reported-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/kernel/runtime_instr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 125c7f6e8715..1788a5454b6f 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -57,7 +57,7 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->k = 1; cb->ps = 1; cb->pc = 1; - cb->key = PAGE_DEFAULT_KEY; + cb->key = PAGE_DEFAULT_KEY >> 4; cb->v = 1; } -- cgit v1.2.3 From fd78c59446b8d050ecf3e0897c5a486c7de7c595 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Aug 2020 18:56:28 +0200 Subject: s390/ptrace: fix storage key handling The key member of the runtime instrumentation control block contains only the access key, not the complete storage key. Therefore the value must be shifted by four bits. Since existing user space does not necessarily query and set the access key correctly, just ignore the user space provided key and use the correct one. Note: this is only relevant for debugging purposes in case somebody compiles a kernel with a default storage access key set to a value not equal to zero. Fixes: 262832bc5acd ("s390/ptrace: add runtime instrumention register get/set") Reported-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/kernel/ptrace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 11d2f7d05f91..a76dd27fb2e8 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1268,7 +1268,6 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb) cb->pc == 1 && cb->qc == 0 && cb->reserved2 == 0 && - cb->key == PAGE_DEFAULT_KEY && cb->reserved3 == 0 && cb->reserved4 == 0 && cb->reserved5 == 0 && @@ -1330,7 +1329,11 @@ static int s390_runtime_instr_set(struct task_struct *target, kfree(data); return -EINVAL; } - + /* + * Override access key in any case, since user space should + * not be able to set it, nor should it care about it. + */ + ri_cb.key = PAGE_DEFAULT_KEY >> 4; preempt_disable(); if (!target->thread.ri_cb) target->thread.ri_cb = data; -- cgit v1.2.3 From 3cddb79afc60bcdb5fd9dd7a1c64a8d03bdd460f Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 3 Aug 2020 09:33:29 +0200 Subject: s390/pci: fix zpci_bus_link_virtfn() We were missing the pci_dev_put() for candidate PFs. Furhtermore in discussion with upstream it turns out that somewhat counterintuitively some common code, in particular the vfio-pci driver, assumes that pdev->is_virtfn always implies that pdev->physfn is set, i.e. that VFs are always linked. While POWER does seem to set pdev->is_virtfn even for unlinked functions (see comments in arch/powerpc/kernel/eeh.c:eeh_debugfs_break_device()) for now just be safe and only set pdev->is_virtfn on linking. Also make sure that we only search for parent PFs if the zbus is multifunction and we thus know the devfn values supplied by firmware come from the RID. Fixes: e5794cf1a270 ("s390/pci: create links between PFs and VFs") Cc: # 5.8 Reviewed-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_bus.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 642a99384688..423af8b1da7b 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -132,13 +132,14 @@ static int zpci_bus_link_virtfn(struct pci_dev *pdev, { int rc; - virtfn->physfn = pci_dev_get(pdev); rc = pci_iov_sysfs_link(pdev, virtfn, vfid); - if (rc) { - pci_dev_put(pdev); - virtfn->physfn = NULL; + if (rc) return rc; - } + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + virtfn->physfn = pci_dev_get(pdev); + return 0; } @@ -151,9 +152,9 @@ static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ int rc = 0; - virtfn->is_virtfn = 1; - virtfn->multifunction = 0; - WARN_ON(vfid < 0); + if (!zbus->multifunction) + return 0; + /* If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what @@ -165,11 +166,17 @@ static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, zdev = zbus->function[i]; if (zdev && zdev->is_physfn) { pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (!pdev) + continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); if (cand_devfn == virtfn->devfn) { rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); + /* balance pci_get_slot() */ + pci_dev_put(pdev); break; } + /* balance pci_get_slot() */ + pci_dev_put(pdev); } } return rc; @@ -178,8 +185,6 @@ static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) { - virtfn->is_virtfn = 1; - virtfn->multifunction = 0; return 0; } #endif -- cgit v1.2.3 From 2f0230b2f2d5fd287a85583eefb5aed35b6fe510 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 3 Aug 2020 17:46:32 +0200 Subject: s390/pci: re-introduce zpci_remove_device() For fixing the PF to VF link removal we need to perform some action on every removal of a zdev from the common PCI subsystem. So in preparation re-introduce zpci_remove_device() and use that instead of directly calling the common code functions. This was actually still declared from earlier code but no longer implemented. Reviewed-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci.c | 19 ++++++++++++------- arch/s390/pci/pci_event.c | 4 ++-- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 3902c9f6f2d6..6e57ff885f8a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -672,6 +672,16 @@ int zpci_disable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_disable_device); +void zpci_remove_device(struct zpci_dev *zdev) +{ + struct zpci_bus *zbus = zdev->zbus; + struct pci_dev *pdev; + + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (pdev) + pci_stop_and_remove_bus_device_locked(pdev); +} + int zpci_create_device(struct zpci_dev *zdev) { int rc; @@ -716,13 +726,8 @@ void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - if (zdev->zbus->bus) { - struct pci_dev *pdev; - - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - if (pdev) - pci_stop_and_remove_bus_device_locked(pdev); - } + if (zdev->zbus->bus) + zpci_remove_device(zdev); switch (zdev->state) { case ZPCI_FN_STATE_ONLINE: diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 4602068c7548..9a3a291cad43 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -121,7 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) if (!zdev) break; if (pdev) - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev); ret = zpci_disable_device(zdev); if (ret) @@ -140,7 +140,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) /* Give the driver a hint that the function is * already unusable. */ pdev->error_state = pci_channel_io_perm_failure; - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev); } zdev->state = ZPCI_FN_STATE_STANDBY; -- cgit v1.2.3 From b97bf44f99155e57088e16974afb1f2d7b5287aa Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 3 Aug 2020 17:58:10 +0200 Subject: s390/pci: fix PF/VF linking on hot plug Currently there are four places in which a PCI function is scanned and made available to drivers: 1. In pci_scan_root_bus() as part of the initial zbus creation. 2. In zpci_bus_add_devices() when registering a device in configured state on a zbus that has already been scanned. 3. When a function is already known to zPCI (in reserved/standby state) and configuration is triggered through firmware by PEC 0x301. 4. When a device is already known to zPCI (in standby/reserved state) and configuration is triggered from within Linux using enable_slot(). The PF/VF linking step and setting of pdev->is_virtfn introduced with commit e5794cf1a270 ("s390/pci: create links between PFs and VFs") was only triggered for the second case, which is where VFs created through sriov_numvfs usually land. However unlike some other platforms but like POWER VFs can be individually enabled/disabled through /sys/bus/pci/slots. Fix this by doing VF setup as part of pcibios_bus_add_device() which is called in all of the above cases. Finally to remove the PF/VF links call the common code pci_iov_remove_virtfn() function to remove linked VFs. This takes care of the necessary sysfs cleanup. Fixes: e5794cf1a270 ("s390/pci: create links between PFs and VFs") Cc: # 5.8: 2f0230b2f2d5: s390/pci: re-introduce zpci_remove_device() Cc: # 5.8 Acked-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci.c | 5 ++++- arch/s390/pci/pci_bus.c | 27 +++++++++++++++------------ arch/s390/pci/pci_bus.h | 13 +++++++++++++ 3 files changed, 32 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6e57ff885f8a..4b62d6b55024 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -678,8 +678,11 @@ void zpci_remove_device(struct zpci_dev *zdev) struct pci_dev *pdev; pdev = pci_get_slot(zbus->bus, zdev->devfn); - if (pdev) + if (pdev) { + if (pdev->is_virtfn) + return zpci_remove_virtfn(pdev, zdev->vfn); pci_stop_and_remove_bus_device_locked(pdev); + } } int zpci_create_device(struct zpci_dev *zdev) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 423af8b1da7b..5967f3014156 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -189,6 +189,19 @@ static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, } #endif +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + + /* + * With pdev->no_vf_scan the common PCI probing code does not + * perform PF/VF linking. + */ + if (zdev->vfn) + zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn); + +} + static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) { struct pci_bus *bus; @@ -219,20 +232,10 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) } pdev = pci_scan_single_device(bus, zdev->devfn); - if (pdev) { - if (!zdev->is_physfn) { - rc = zpci_bus_setup_virtfn(zbus, pdev, zdev->vfn); - if (rc) - goto failed_with_pdev; - } + if (pdev) pci_bus_add_device(pdev); - } - return 0; -failed_with_pdev: - pci_stop_and_remove_bus_device(pdev); - pci_dev_put(pdev); - return rc; + return 0; } static void zpci_bus_add_devices(struct zpci_bus *zbus) diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 89be3c354b7b..4972433df458 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -29,3 +29,16 @@ static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; } + +#ifdef CONFIG_PCI_IOV +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) +{ + + pci_lock_rescan_remove(); + /* Linux' vfid's start at 0 vfn at 1 */ + pci_iov_remove_virtfn(pdev->physfn, vfn - 1); + pci_unlock_rescan_remove(); +} +#else /* CONFIG_PCI_IOV */ +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {} +#endif /* CONFIG_PCI_IOV */ -- cgit v1.2.3 From 1e4e4bcaf70ec89f8b499c93a83d078c1e5c0ea6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 17 Aug 2020 16:03:01 +0530 Subject: powerpc/pkeys: Fix build error with PPC_MEM_KEYS disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IS_ENABLED() instead of #ifdef still requires variable declaration. In this specific case, default_uamor is declared in asm/pkeys.h which is only included if PPC_MEM_KEYS is enabled. arch/powerpc/mm/book3s64/hash_utils.c: In function ‘hash__early_init_mmu_secondary’: arch/powerpc/mm/book3s64/hash_utils.c:1119:21: error: ‘default_uamor’ undeclared (first use in this function) 1119 | mtspr(SPRN_UAMOR, default_uamor); | ^~~~~~~~~~~~~ Fixes: 6553fb799f60 ("powerpc/pkeys: Fix boot failures with Nemo board (A-EON AmigaOne X1000)") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817103301.158836-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1da9dbba9217..890a71c5293e 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1115,8 +1115,10 @@ void hash__early_init_mmu_secondary(void) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); - if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY)) +#ifdef CONFIG_PPC_MEM_KEYS + if (mmu_has_feature(MMU_FTR_PKEY)) mtspr(SPRN_UAMOR, default_uamor); +#endif } #endif /* CONFIG_SMP */ -- cgit v1.2.3 From fdc6edbb31fba76fd25d7bd016b675a92908d81e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 06:03:26 +0000 Subject: powerpc/fixmap: Fix the size of the early debug area Commit ("03fd42d458fb powerpc/fixmap: Fix FIX_EARLY_DEBUG_BASE when page size is 256k") reworked the setup of the early debug area and mistakenly replaced 128 * 1024 by SZ_128. Change to SZ_128K to restore the original 128 kbytes size of the area. Fixes: 03fd42d458fb ("powerpc/fixmap: Fix FIX_EARLY_DEBUG_BASE when page size is 256k") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/996184974d674ff984643778cf1cdd7fe58cc065.1597644194.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/fixmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 925cf89cbf4b..6bfc87915d5d 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -52,7 +52,7 @@ enum fixed_addresses { FIX_HOLE, /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, - FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1, + FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -- cgit v1.2.3 From bd05220c7be3356046861c317d9c287ca50445ba Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Tue, 11 Aug 2020 19:24:57 +0100 Subject: arch/ia64: Restore arch-specific pgd_offset_k implementation IA-64 is special and treats pgd_offset_k() differently to pgd_offset(), using different formulae to calculate the indices into the kernel and user PGDs. The index into the user PGDs takes into account the region number, but the index into the kernel (init_mm) PGD always assumes a predefined kernel region number. Commit 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") made IA-64 use a generic pgd_offset_k() which incorrectly used pgd_index() for kernel page tables. As a result, the index into the kernel PGD was going out of bounds and the kernel hung during early boot. Allow overrides of pgd_offset_k() and override it on IA-64 with the old implementation that will correctly index the kernel PGD. Fixes: 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") Reported-by: John Paul Adrian Glaubitz Signed-off-by: Jessica Clarke Tested-by: John Paul Adrian Glaubitz Acked-by: Tony Luck Signed-off-by: Mike Rapoport --- arch/ia64/include/asm/pgtable.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 10850897a91c..779b6972aa84 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -366,6 +366,15 @@ pgd_index (unsigned long address) } #define pgd_index pgd_index +/* + * In the kernel's mapped region we know everything is in region number 5, so + * as an optimisation its PGD already points to the area for that region. + * However, this also means that we cannot use pgd_index() and we must + * never add the region here. + */ +#define pgd_offset_k(addr) \ + (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) + /* Look up a pgd entry in the gate area. On IA-64, the gate-area resides in the kernel-mapped segment, hence we use pgd_offset_k() here. */ -- cgit v1.2.3 From 19cf4b7eefc3040192bccb10c322e4c831bb0eb0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 17 Aug 2020 13:53:04 -0400 Subject: KVM: x86: fix access code passed to gva_to_gpa The PK bit of the error code is computed dynamically in permission_fault and therefore need not be passed to gva_to_gpa: only the access bits (fetch, user, write) need to be passed down. Not doing so causes a splat in the pku test: WARNING: CPU: 25 PID: 5465 at arch/x86/kvm/mmu.h:197 paging64_walk_addr_generic+0x594/0x750 [kvm] Hardware name: Intel Corporation WilsonCity/WilsonCity, BIOS WLYDCRB1.SYS.0014.D62.2001092233 01/09/2020 RIP: 0010:paging64_walk_addr_generic+0x594/0x750 [kvm] Code: <0f> 0b e9 db fe ff ff 44 8b 43 04 4c 89 6c 24 30 8b 13 41 39 d0 89 RSP: 0018:ff53778fc623fb60 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ff53778fc623fbf0 RCX: 0000000000000007 RDX: 0000000000000001 RSI: 0000000000000002 RDI: ff4501efba818000 RBP: 0000000000000020 R08: 0000000000000005 R09: 00000000004000e7 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007 R13: ff4501efba818388 R14: 10000000004000e7 R15: 0000000000000000 FS: 00007f2dcf31a700(0000) GS:ff4501f1c8040000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000001dea475005 CR4: 0000000000763ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: paging64_gva_to_gpa+0x3f/0xb0 [kvm] kvm_fixup_and_inject_pf_error+0x48/0xa0 [kvm] handle_exception_nmi+0x4fc/0x5b0 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x911/0x1c10 [kvm] kvm_vcpu_ioctl+0x23e/0x5d0 [kvm] ksys_ioctl+0x92/0xb0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x3e/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace d17eb998aee991da ]--- Reported-by: Sean Christopherson Fixes: 897861479c064 ("KVM: x86: Add helper functions for illegal GPA checking and page fault injection") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2db369a64f29..fcfd79a02e85 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10743,9 +10743,11 @@ EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code) { struct x86_exception fault; + u32 access = error_code & + (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK); if (!(error_code & PFERR_PRESENT_MASK) || - vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) { + vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) { /* * If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page * tables probably do not match the TLB. Just proceed -- cgit v1.2.3 From 427890aff8558eb4326e723835e0eae0e6fe3102 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 17 Aug 2020 11:16:55 -0700 Subject: kvm: x86: Toggling CR4.SMAP does not load PDPTEs in PAE mode See the SDM, volume 3, section 4.4.1: If PAE paging would be in use following an execution of MOV to CR0 or MOV to CR4 (see Section 4.1.1) and the instruction is modifying any of CR0.CD, CR0.NW, CR0.PG, CR4.PAE, CR4.PGE, CR4.PSE, or CR4.SMEP; then the PDPTEs are loaded from the address in CR3. Fixes: 0be0226f07d14 ("KVM: MMU: fix SMAP virtualization") Cc: Xiao Guangrong Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Oliver Upton Message-Id: <20200817181655.3716509-2-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fcfd79a02e85..ecc3470b279e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,7 +975,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + X86_CR4_SMEP | X86_CR4_PKE; if (kvm_valid_cr4(vcpu, cr4)) return 1; -- cgit v1.2.3 From cb957adb4ea422bd758568df5b2478ea3bb34f35 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 17 Aug 2020 11:16:54 -0700 Subject: kvm: x86: Toggling CR4.PKE does not load PDPTEs in PAE mode See the SDM, volume 3, section 4.4.1: If PAE paging would be in use following an execution of MOV to CR0 or MOV to CR4 (see Section 4.1.1) and the instruction is modifying any of CR0.CD, CR0.NW, CR0.PG, CR4.PAE, CR4.PGE, CR4.PSE, or CR4.SMEP; then the PDPTEs are loaded from the address in CR3. Fixes: b9baba8614890 ("KVM, pkeys: expose CPUID/CR4 to guest") Cc: Huaitong Han Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Oliver Upton Message-Id: <20200817181655.3716509-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecc3470b279e..539ea1cd6020 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,7 +975,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_PKE; + X86_CR4_SMEP; if (kvm_valid_cr4(vcpu, cr4)) return 1; -- cgit v1.2.3 From 48d2f0407be7a36e8f20be37ec9121e021ef3964 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 5 Aug 2020 15:27:29 +0000 Subject: powerpc/kasan: Fix KASAN_SHADOW_START on BOOK3S_32 On BOOK3S_32, when we have modules and strict kernel RWX, modules are not in vmalloc space but in a dedicated segment that is below PAGE_OFFSET. So KASAN_SHADOW_START must take it into account. MODULES_VADDR can't be used because it is not defined yet in kasan.h Fixes: 6ca055322da8 ("powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6eddca2d5611fd57312a88eae31278c87a8fc99d.1596641224.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index d635b96c7ea6..7355ed05e65e 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -15,11 +15,18 @@ #ifndef __ASSEMBLY__ #include +#include #define KASAN_SHADOW_SCALE_SHIFT 3 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) +#else +#define KASAN_KERN_START PAGE_OFFSET +#endif + #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) -- cgit v1.2.3 From 7bee31ad8e2f6c276f36993346ac70f4d4c80e45 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 5 Aug 2020 15:27:28 +0000 Subject: powerpc/32s: Fix is_module_segment() when MODULES_VADDR is defined When MODULES_VADDR is defined, is_module_segment() shall check the address against it instead of checking agains VMALLOC_START. Fixes: 6ca055322da8 ("powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/07884ed033c31e074747b7eb8eaa329d15db07ec.1596641219.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index c0162911f6cb..82ae9e06a773 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -191,10 +191,17 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; +#ifdef MODULES_VADDR + if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) + return false; + if (addr >= ALIGN(MODULES_END, SZ_256M)) + return false; +#else if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) return false; if (addr >= ALIGN(VMALLOC_END, SZ_256M)) return false; +#endif return true; } -- cgit v1.2.3 From 801980f6497946048709b9b09771a1729551d705 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 11 Aug 2020 11:15:44 -0500 Subject: powerpc/pseries/hotplug-cpu: wait indefinitely for vCPU death For a power9 KVM guest with XIVE enabled, running a test loop where we hotplug 384 vcpus and then unplug them, the following traces can be seen (generally within a few loops) either from the unplugged vcpu: cpu 65 (hwid 65) Ready to die... Querying DEAD? cpu 66 (66) shows 2 list_del corruption. next->prev should be c00a000002470208, but was c00a000002470048 ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! Oops: Exception in kernel mode, sig: 5 [#1] LE SMP NR_CPUS=2048 NUMA pSeries Modules linked in: fuse nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 ... CPU: 66 PID: 0 Comm: swapper/66 Kdump: loaded Not tainted 4.18.0-221.el8.ppc64le #1 NIP: c0000000007ab50c LR: c0000000007ab508 CTR: 00000000000003ac REGS: c0000009e5a17840 TRAP: 0700 Not tainted (4.18.0-221.el8.ppc64le) MSR: 800000000282b033 CR: 28000842 XER: 20040000 ... NIP __list_del_entry_valid+0xac/0x100 LR __list_del_entry_valid+0xa8/0x100 Call Trace: __list_del_entry_valid+0xa8/0x100 (unreliable) free_pcppages_bulk+0x1f8/0x940 free_unref_page+0xd0/0x100 xive_spapr_cleanup_queue+0x148/0x1b0 xive_teardown_cpu+0x1bc/0x240 pseries_mach_cpu_die+0x78/0x2f0 cpu_die+0x48/0x70 arch_cpu_idle_dead+0x20/0x40 do_idle+0x2f4/0x4c0 cpu_startup_entry+0x38/0x40 start_secondary+0x7bc/0x8f0 start_secondary_prolog+0x10/0x14 or on the worker thread handling the unplug: pseries-hotplug-cpu: Attempting to remove CPU , drc index: 1000013a Querying DEAD? cpu 314 (314) shows 2 BUG: Bad page state in process kworker/u768:3 pfn:95de1 cpu 314 (hwid 314) Ready to die... page:c00a000002577840 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0x5ffffc00000000() raw: 005ffffc00000000 5deadbeef0000100 5deadbeef0000200 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffff7f 0000000000000000 page dumped because: nonzero mapcount Modules linked in: kvm xt_CHECKSUM ipt_MASQUERADE xt_conntrack ... CPU: 0 PID: 548 Comm: kworker/u768:3 Kdump: loaded Not tainted 4.18.0-224.el8.bz1856588.ppc64le #1 Workqueue: pseries hotplug workque pseries_hp_work_fn Call Trace: dump_stack+0xb0/0xf4 (unreliable) bad_page+0x12c/0x1b0 free_pcppages_bulk+0x5bc/0x940 page_alloc_cpu_dead+0x118/0x120 cpuhp_invoke_callback.constprop.5+0xb8/0x760 _cpu_down+0x188/0x340 cpu_down+0x5c/0xa0 cpu_subsys_offline+0x24/0x40 device_offline+0xf0/0x130 dlpar_offline_cpu+0x1c4/0x2a0 dlpar_cpu_remove+0xb8/0x190 dlpar_cpu_remove_by_index+0x12c/0x150 dlpar_cpu+0x94/0x800 pseries_hp_work_fn+0x128/0x1e0 process_one_work+0x304/0x5d0 worker_thread+0xcc/0x7a0 kthread+0x1ac/0x1c0 ret_from_kernel_thread+0x5c/0x80 The latter trace is due to the following sequence: page_alloc_cpu_dead drain_pages drain_pages_zone free_pcppages_bulk where drain_pages() in this case is called under the assumption that the unplugged cpu is no longer executing. To ensure that is the case, and early call is made to __cpu_die()->pseries_cpu_die(), which runs a loop that waits for the cpu to reach a halted state by polling its status via query-cpu-stopped-state RTAS calls. It only polls for 25 iterations before giving up, however, and in the trace above this results in the following being printed only .1 seconds after the hotplug worker thread begins processing the unplug request: pseries-hotplug-cpu: Attempting to remove CPU , drc index: 1000013a Querying DEAD? cpu 314 (314) shows 2 At that point the worker thread assumes the unplugged CPU is in some unknown/dead state and procedes with the cleanup, causing the race with the XIVE cleanup code executed by the unplugged CPU. Fix this by waiting indefinitely, but also making an effort to avoid spurious lockup messages by allowing for rescheduling after polling the CPU status and printing a warning if we wait for longer than 120s. Fixes: eac1e731b59ee ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Suggested-by: Michael Ellerman Signed-off-by: Michael Roth Tested-by: Greg Kurz Reviewed-by: Thiago Jung Bauermann Reviewed-by: Greg Kurz [mpe: Trim oopses in change log slightly for readability] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811161544.10513-1-mdroth@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index c6e0d8abf75e..7a974ed6b240 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -107,22 +107,28 @@ static int pseries_cpu_disable(void) */ static void pseries_cpu_die(unsigned int cpu) { - int tries; int cpu_status = 1; unsigned int pcpu = get_hard_smp_processor_id(cpu); + unsigned long timeout = jiffies + msecs_to_jiffies(120000); - for (tries = 0; tries < 25; tries++) { + while (true) { cpu_status = smp_query_cpu_stopped(pcpu); if (cpu_status == QCSS_STOPPED || cpu_status == QCSS_HARDWARE_ERROR) break; - cpu_relax(); + if (time_after(jiffies, timeout)) { + pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n", + cpu, pcpu); + timeout = jiffies + msecs_to_jiffies(120000); + } + + cond_resched(); } - if (cpu_status != 0) { - printk("Querying DEAD? cpu %i (%i) shows %i\n", - cpu, pcpu, cpu_status); + if (cpu_status == QCSS_HARDWARE_ERROR) { + pr_warn("CPU %i (hwid %i) reported error while dying\n", + cpu, pcpu); } /* Isolation and deallocation are definitely done by -- cgit v1.2.3 From 394b19d6cb58ae292c0e1ad6b893fed8ece477ce Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 4 Aug 2020 19:48:17 -0400 Subject: x86/boot/compressed: Use builtin mem functions for decompressor Since commits c041b5ad8640 ("x86, boot: Create a separate string.h file to provide standard string functions") fb4cac573ef6 ("x86, boot: Move memcmp() into string.h and string.c") the decompressor stub has been using the compiler's builtin memcpy, memset and memcmp functions, _except_ where it would likely have the largest impact, in the decompression code itself. Remove the #undef's of memcpy and memset in misc.c so that the decompressor code also uses the compiler builtins. The rationale given in the comment doesn't really apply: just because some functions use the out-of-line version is no reason to not use the builtin version in the rest. Replace the comment with an explanation of why memzero and memmove are being #define'd. Drop the suggestion to #undef in boot/string.h as well: the out-of-line versions are not really optimized versions, they're generic code that's good enough for the preboot environment. The compiler will likely generate better code for constant-size memcpy/memset/memcmp if it is allowed to. Most decompressors' performance is unchanged, with the exception of LZ4 and 64-bit ZSTD. Before After ARCH LZ4 73ms 10ms 32 LZ4 120ms 10ms 64 ZSTD 90ms 74ms 64 Measurements on QEMU on 2.2GHz Broadwell Xeon, using defconfig kernels. Decompressor code size has small differences, with the largest being that 64-bit ZSTD decreases just over 2k. The largest code size increase was on 64-bit XZ, of about 400 bytes. Signed-off-by: Arvind Sankar Suggested-by: Nick Terrell Tested-by: Nick Terrell Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- arch/x86/boot/compressed/misc.c | 7 ++----- arch/x86/boot/string.h | 5 +---- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 39e592d0e0b4..e478e40fbe5a 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -30,12 +30,9 @@ #define STATIC static /* - * Use normal definitions of mem*() from string.c. There are already - * included header files which expect a definition of memset() and by - * the time we define memset macro, it is too late. + * Provide definitions of memzero and memmove as some of the decompressors will + * try to define their own functions if these are not defined as macros. */ -#undef memcpy -#undef memset #define memzero(s, n) memset((s), 0, (n)) #define memmove memmove diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 995f7b7ad512..a232da487cd2 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -11,10 +11,7 @@ void *memcpy(void *dst, const void *src, size_t len); void *memset(void *dst, int c, size_t len); int memcmp(const void *s1, const void *s2, size_t len); -/* - * Access builtin version by default. If one needs to use optimized version, - * do "undef memcpy" in .c file and link against right string.c - */ +/* Access builtin version by default. */ #define memcpy(d,s,l) __builtin_memcpy(d,s,l) #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp -- cgit v1.2.3 From e17a7c0e0aebb956719ce2a8465f649859c2da7d Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 19 Aug 2020 15:07:41 +0200 Subject: powerpc/powernv/pci: Fix possible crash when releasing DMA resources Fix a typo introduced during recent code cleanup, which could lead to silently not freeing resources or an oops message (on PCI hotplug or CAPI reset). Only impacts ioda2, the code path for ioda1 is correct. Fixes: 01e12629af4e ("powerpc/powernv/pci: Add explicit tracking of the DMA setup state") Signed-off-by: Frederic Barrat Reviewed-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819130741.16769-1-fbarrat@linux.ibm.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c9c25fb0783c..023a4f987bb2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2705,7 +2705,7 @@ void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) struct iommu_table *tbl = pe->table_group.tables[0]; int64_t rc; - if (pe->dma_setup_done) + if (!pe->dma_setup_done) return; rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); -- cgit v1.2.3 From ee87e1557c42dc9c2da11c38e11b87c311569853 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 20 Aug 2020 06:30:47 +0200 Subject: Fix build error when CONFIG_ACPI is not set/enabled: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ../arch/x86/pci/xen.c: In function ‘pci_xen_init’: ../arch/x86/pci/xen.c:410:2: error: implicit declaration of function ‘acpi_noirq_set’; did you mean ‘acpi_irq_get’? [-Werror=implicit-function-declaration] acpi_noirq_set(); Fixes: 88e9ca161c13 ("xen/pci: Use acpi_noirq_set() helper to avoid #ifdef") Signed-off-by: Randy Dunlap Reviewed-by: Juergen Gross Cc: Andy Shevchenko Cc: Bjorn Helgaas Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xenproject.org Cc: linux-pci@vger.kernel.org Signed-off-by: Juergen Gross --- arch/x86/pci/xen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e3f1ca316068..db34fee93138 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -26,6 +26,7 @@ #include #include #include +#include #include static int xen_pcifront_enable_irq(struct pci_dev *dev) -- cgit v1.2.3 From c8502eb2d43b6b9b1dc382299a4d37031be63876 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 17 Jul 2020 15:45:26 -0400 Subject: efi/x86: Mark kernel rodata non-executable for mixed mode When remapping the kernel rodata section RO in the EFI pagetables, the protection flags that were used for the text section are being reused, but the rodata section should not be marked executable. Cc: Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200717194526.3452089-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- arch/x86/platform/efi/efi_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 413583f904a6..6af4da1149ba 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -259,6 +259,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; rodata = __pa(__start_rodata); pfn = rodata >> PAGE_SHIFT; + + pf = _PAGE_NX | _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { pr_err("Failed to map kernel rodata 1:1\n"); return 1; -- cgit v1.2.3 From 39ada88f9c862c1ff8929ff67e0d1199c7af73fe Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 13 Aug 2020 19:38:17 +0200 Subject: efi/x86: Move 32-bit code into efi_32.c Now that the old memmap code has been removed, some code that was left behind in arch/x86/platform/efi/efi.c is only used for 32-bit builds, which means it can live in efi_32.c as well. So move it over. Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 10 ------ arch/x86/platform/efi/efi.c | 69 ------------------------------------------ arch/x86/platform/efi/efi_32.c | 44 ++++++++++++++++++++++----- 3 files changed, 37 insertions(+), 86 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b9c2667ac46c..bc9758ef292e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -81,11 +81,8 @@ extern unsigned long efi_fw_vendor, efi_config_table; kernel_fpu_end(); \ }) - #define arch_efi_call_virt(p, f, args...) p->f(args) -#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) - #else /* !CONFIG_X86_32 */ #define EFI_LOADER_SIGNATURE "EL64" @@ -125,9 +122,6 @@ struct efi_scratch { kernel_fpu_end(); \ }) -extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, - u32 type, u64 attribute); - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -143,17 +137,13 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; -extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); extern void __init efi_print_memmap(void); -extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init old_map_region(efi_memory_desc_t *md); -extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f6ea8f1a9d57..d37ebe6e70d7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -496,74 +495,6 @@ void __init efi_init(void) efi_print_memmap(); } -#if defined(CONFIG_X86_32) - -void __init efi_set_executable(efi_memory_desc_t *md, bool executable) -{ - u64 addr, npages; - - addr = md->virt_addr; - npages = md->num_pages; - - memrange_efi_to_native(&addr, &npages); - - if (executable) - set_memory_x(addr, npages); - else - set_memory_nx(addr, npages); -} - -void __init runtime_code_page_mkexec(void) -{ - efi_memory_desc_t *md; - - /* Make EFI runtime service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type != EFI_RUNTIME_SERVICES_CODE) - continue; - - efi_set_executable(md, true); - } -} - -void __init efi_memory_uc(u64 addr, unsigned long size) -{ - unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; - u64 npages; - - npages = round_up(size, page_shift) / page_shift; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); -} - -void __init old_map_region(efi_memory_desc_t *md) -{ - u64 start_pfn, end_pfn, end; - unsigned long size; - void *va; - - start_pfn = PFN_DOWN(md->phys_addr); - size = md->num_pages << PAGE_SHIFT; - end = md->phys_addr + size; - end_pfn = PFN_UP(end); - - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); - - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); - - md->virt_addr = (u64) (unsigned long) va; - if (!va) - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); -} - -#endif - /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 826ead67753d..e06a199423c0 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -29,9 +29,35 @@ #include #include #include +#include #include #include +void __init efi_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc((unsigned long)va, md->num_pages); + } else { + va = ioremap_cache(md->phys_addr, size); + } + + md->virt_addr = (unsigned long)va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", md->phys_addr); +} + /* * To make EFI call EFI runtime service in physical addressing mode we need * prolog/epilog before/after the invocation to claim the EFI runtime service @@ -58,11 +84,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; } -void __init efi_map_region(efi_memory_desc_t *md) -{ - old_map_region(md); -} - void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} @@ -107,6 +128,15 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, void __init efi_runtime_update_mappings(void) { - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); + if (__supported_pte_mask & _PAGE_NX) { + efi_memory_desc_t *md; + + /* Make EFI runtime service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + set_memory_x(md->virt_addr, md->num_pages); + } + } } -- cgit v1.2.3 From 17899eaf88d689529b866371344c8f269ba79b5f Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 6 Aug 2020 08:46:32 -0400 Subject: powerpc/perf: Fix soft lockups due to missed interrupt accounting Performance monitor interrupt handler checks if any counter has overflown and calls record_and_restart() in core-book3s which invokes perf_event_overflow() to record the sample information. Apart from creating sample, perf_event_overflow() also does the interrupt and period checks via perf_event_account_interrupt(). Currently we record information only if the SIAR (Sampled Instruction Address Register) valid bit is set (using siar_valid() check) and hence the interrupt check. But it is possible that we do sampling for some events that are not generating valid SIAR, and hence there is no chance to disable the event if interrupts are more than max_samples_per_tick. This leads to soft lockup. Fix this by adding perf_event_account_interrupt() in the invalid SIAR code path for a sampling event. ie if SIAR is invalid, just do interrupt check and don't record the sample information. Reported-by: Alexey Kardashevskiy Signed-off-by: Athira Rajeev Tested-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1596717992-7321-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 00038650a007..93d20e1ed845 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2141,6 +2141,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); + } else if (period) { + /* Account for interrupt in case of invalid SIAR */ + if (perf_event_account_interrupt(event)) + power_pmu_stop(event, 0); } } -- cgit v1.2.3 From 90a9b102eddf6a3f987d15f4454e26a2532c1c98 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 20 Aug 2020 11:48:44 +0530 Subject: powerpc/pseries: Do not initiate shutdown when system is running on UPS As per PAPR we have to look for both EPOW sensor value and event modifier to identify the type of event and take appropriate action. In LoPAPR v1.1 section 10.2.2 includes table 136 "EPOW Action Codes": SYSTEM_SHUTDOWN 3 The system must be shut down. An EPOW-aware OS logs the EPOW error log information, then schedules the system to be shut down to begin after an OS defined delay internal (default is 10 minutes.) Then in section 10.3.2.2.8 there is table 146 "Platform Event Log Format, Version 6, EPOW Section", which includes the "EPOW Event Modifier": For EPOW sensor value = 3 0x01 = Normal system shutdown with no additional delay 0x02 = Loss of utility power, system is running on UPS/Battery 0x03 = Loss of system critical functions, system should be shutdown 0x04 = Ambient temperature too high All other values = reserved We have a user space tool (rtas_errd) on LPAR to monitor for EPOW_SHUTDOWN_ON_UPS. Once it gets an event it initiates shutdown after predefined time. It also starts monitoring for any new EPOW events. If it receives "Power restored" event before predefined time it will cancel the shutdown. Otherwise after predefined time it will shutdown the system. Commit 79872e35469b ("powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown") changed our handling of the "on UPS/Battery" case, to immediately shutdown the system. This breaks existing setups that rely on the userspace tool to delay shutdown and let the system run on the UPS. Fixes: 79872e35469b ("powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Vasant Hegde [mpe: Massage change log and add PAPR references] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200820061844.306460-1-hegdevasant@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/ras.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f3736fcd98fc..13c86a292c6d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of system power detected. System is running on" " UPS/battery. Check RTAS error log for details\n"); - orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: -- cgit v1.2.3 From cc7f3f72dc2ae2b383142896d79ca1e237ad7e8b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:48 +0530 Subject: RISC-V: Add mechanism to provide custom IPI operations We add mechanism to set custom IPI operations so that CLINT driver from drivers directory can provide custom IPI operations. Signed-off-by: Anup Patel Tested-by: Emil Renner Berhing Reviewed-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/clint.h | 25 ------------------------ arch/riscv/include/asm/smp.h | 19 +++++++++++++++++++ arch/riscv/kernel/clint.c | 23 ++++++++++++++++++++-- arch/riscv/kernel/sbi.c | 14 ++++++++++++++ arch/riscv/kernel/smp.c | 43 +++++++++++++++++++++++------------------- arch/riscv/kernel/smpboot.c | 3 +-- 6 files changed, 79 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h index a279b17a6aad..adaba98a7d6c 100644 --- a/arch/riscv/include/asm/clint.h +++ b/arch/riscv/include/asm/clint.h @@ -6,34 +6,9 @@ #include #ifdef CONFIG_RISCV_M_MODE -extern u32 __iomem *clint_ipi_base; - void clint_init_boot_cpu(void); - -static inline void clint_send_ipi_single(unsigned long hartid) -{ - writel(1, clint_ipi_base + hartid); -} - -static inline void clint_send_ipi_mask(const struct cpumask *mask) -{ - int cpu; - - for_each_cpu(cpu, mask) - clint_send_ipi_single(cpuid_to_hartid_map(cpu)); -} - -static inline void clint_clear_ipi(unsigned long hartid) -{ - writel(0, clint_ipi_base + hartid); -} #else /* CONFIG_RISCV_M_MODE */ #define clint_init_boot_cpu() do { } while (0) - -/* stubs to for code is only reachable under IS_ENABLED(CONFIG_RISCV_M_MODE): */ -void clint_send_ipi_single(unsigned long hartid); -void clint_send_ipi_mask(const struct cpumask *hartid_mask); -void clint_clear_ipi(unsigned long hartid); #endif /* CONFIG_RISCV_M_MODE */ #endif /* _ASM_RISCV_CLINT_H */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 6dfd2a1446d5..df1f7c4cd433 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -15,6 +15,11 @@ struct seq_file; extern unsigned long boot_cpu_hartid; +struct riscv_ipi_ops { + void (*ipi_inject)(const struct cpumask *target); + void (*ipi_clear)(void); +}; + #ifdef CONFIG_SMP /* * Mapping between linux logical cpu index and hartid. @@ -40,6 +45,12 @@ void arch_send_call_function_single_ipi(int cpu); int riscv_hartid_to_cpuid(int hartid); void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); +/* Set custom IPI operations */ +void riscv_set_ipi_ops(struct riscv_ipi_ops *ops); + +/* Clear IPI for current CPU */ +void riscv_clear_ipi(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); @@ -81,6 +92,14 @@ static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, cpumask_set_cpu(boot_cpu_hartid, out); } +static inline void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) +{ +} + +static inline void riscv_clear_ipi(void) +{ +} + #endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c index 3647980d14c3..a9845ee023e2 100644 --- a/arch/riscv/kernel/clint.c +++ b/arch/riscv/kernel/clint.c @@ -5,11 +5,11 @@ #include #include +#include #include #include #include #include -#include /* * This is the layout used by the SiFive clint, which is also shared by the qemu @@ -21,6 +21,24 @@ u32 __iomem *clint_ipi_base; +static void clint_send_ipi(const struct cpumask *target) +{ + unsigned int cpu; + + for_each_cpu(cpu, target) + writel(1, clint_ipi_base + cpuid_to_hartid_map(cpu)); +} + +static void clint_clear_ipi(void) +{ + writel(0, clint_ipi_base + cpuid_to_hartid_map(smp_processor_id())); +} + +static struct riscv_ipi_ops clint_ipi_ops = { + .ipi_inject = clint_send_ipi, + .ipi_clear = clint_clear_ipi, +}; + void clint_init_boot_cpu(void) { struct device_node *np; @@ -40,5 +58,6 @@ void clint_init_boot_cpu(void) riscv_time_cmp = base + CLINT_TIME_CMP_OFF; riscv_time_val = base + CLINT_TIME_VAL_OFF; - clint_clear_ipi(boot_cpu_hartid); + clint_clear_ipi(); + riscv_set_ipi_ops(&clint_ipi_ops); } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f383ef5672b2..226ccce0f9e0 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -547,6 +547,18 @@ static inline long sbi_get_firmware_version(void) return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION); } +static void sbi_send_cpumask_ipi(const struct cpumask *target) +{ + struct cpumask hartid_mask; + + riscv_cpuid_to_hartid_mask(target, &hartid_mask); + + sbi_send_ipi(cpumask_bits(&hartid_mask)); +} + +static struct riscv_ipi_ops sbi_ipi_ops = { + .ipi_inject = sbi_send_cpumask_ipi +}; int __init sbi_init(void) { @@ -587,5 +599,7 @@ int __init sbi_init(void) __sbi_rfence = __sbi_rfence_v01; } + riscv_set_ipi_ops(&sbi_ipi_ops); + return 0; } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 554b0fb47060..cf9acb43604c 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -86,9 +86,25 @@ static void ipi_stop(void) wait_for_interrupt(); } +static struct riscv_ipi_ops *ipi_ops; + +void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) +{ + ipi_ops = ops; +} +EXPORT_SYMBOL_GPL(riscv_set_ipi_ops); + +void riscv_clear_ipi(void) +{ + if (ipi_ops && ipi_ops->ipi_clear) + ipi_ops->ipi_clear(); + + csr_clear(CSR_IP, IE_SIE); +} +EXPORT_SYMBOL_GPL(riscv_clear_ipi); + static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) { - struct cpumask hartid_mask; int cpu; smp_mb__before_atomic(); @@ -96,33 +112,22 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) set_bit(op, &ipi_data[cpu].bits); smp_mb__after_atomic(); - riscv_cpuid_to_hartid_mask(mask, &hartid_mask); - if (IS_ENABLED(CONFIG_RISCV_SBI)) - sbi_send_ipi(cpumask_bits(&hartid_mask)); + if (ipi_ops && ipi_ops->ipi_inject) + ipi_ops->ipi_inject(mask); else - clint_send_ipi_mask(mask); + pr_warn("SMP: IPI inject method not available\n"); } static void send_ipi_single(int cpu, enum ipi_message_type op) { - int hartid = cpuid_to_hartid_map(cpu); - smp_mb__before_atomic(); set_bit(op, &ipi_data[cpu].bits); smp_mb__after_atomic(); - if (IS_ENABLED(CONFIG_RISCV_SBI)) - sbi_send_ipi(cpumask_bits(cpumask_of(hartid))); - else - clint_send_ipi_single(hartid); -} - -static inline void clear_ipi(void) -{ - if (IS_ENABLED(CONFIG_RISCV_SBI)) - csr_clear(CSR_IP, IE_SIE); + if (ipi_ops && ipi_ops->ipi_inject) + ipi_ops->ipi_inject(cpumask_of(cpu)); else - clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); + pr_warn("SMP: IPI inject method not available\n"); } #ifdef CONFIG_IRQ_WORK @@ -140,7 +145,7 @@ void handle_IPI(struct pt_regs *regs) irq_enter(); - clear_ipi(); + riscv_clear_ipi(); while (true) { unsigned long ops; diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 356825a57551..12033d71ba0c 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -147,8 +147,7 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); - if (!IS_ENABLED(CONFIG_RISCV_SBI)) - clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); + riscv_clear_ipi(); /* All kernel threads share the same mm context. */ mmgrab(mm); -- cgit v1.2.3 From 2bc3fc877aa9c4c8b80cc49f66dfcb7e4857a128 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:50 +0530 Subject: RISC-V: Remove CLINT related code from timer and arch Right now the RISC-V timer driver is convoluted to support: 1. Linux RISC-V S-mode (with MMU) where it will use TIME CSR for clocksource and SBI timer calls for clockevent device. 2. Linux RISC-V M-mode (without MMU) where it will use CLINT MMIO counter register for clocksource and CLINT MMIO compare register for clockevent device. We now have a separate CLINT timer driver which also provide CLINT based IPI operations so let's remove CLINT MMIO related code from arch/riscv directory and RISC-V timer driver. Signed-off-by: Anup Patel Tested-by: Emil Renner Berhing Acked-by: Daniel Lezcano Reviewed-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/Kconfig.socs | 2 ++ arch/riscv/configs/nommu_virt_defconfig | 7 ++-- arch/riscv/include/asm/clint.h | 14 -------- arch/riscv/include/asm/timex.h | 28 ++++----------- arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/clint.c | 63 --------------------------------- arch/riscv/kernel/setup.c | 2 -- arch/riscv/kernel/smp.c | 1 - arch/riscv/kernel/smpboot.c | 1 - 10 files changed, 13 insertions(+), 109 deletions(-) delete mode 100644 arch/riscv/include/asm/clint.h delete mode 100644 arch/riscv/kernel/clint.c (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7b5905529146..df18372861d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -81,7 +81,7 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI select RISCV_INTC - select RISCV_TIMER + select RISCV_TIMER if RISCV_SBI select SPARSEMEM_STATIC if 32BIT select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 6c88148f1b9b..8a55f6156661 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -12,6 +12,7 @@ config SOC_SIFIVE config SOC_VIRT bool "QEMU Virt Machine" + select CLINT_TIMER if RISCV_M_MODE select POWER_RESET select POWER_RESET_SYSCON select POWER_RESET_SYSCON_POWEROFF @@ -24,6 +25,7 @@ config SOC_VIRT config SOC_KENDRYTE bool "Kendryte K210 SoC" depends on !MMU + select CLINT_TIMER if RISCV_M_MODE select SERIAL_SIFIVE if TTY select SERIAL_SIFIVE_CONSOLE if TTY select SIFIVE_PLIC diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index f27596e9663e..e046a0babde4 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -26,6 +26,7 @@ CONFIG_EXPERT=y CONFIG_SLOB=y # CONFIG_SLAB_MERGE_DEFAULT is not set # CONFIG_MMU is not set +CONFIG_SOC_VIRT=y CONFIG_MAXPHYSMEM_2GB=y CONFIG_SMP=y CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" @@ -49,7 +50,6 @@ CONFIG_VIRTIO_BLK=y # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_LDISC_AUTOLOAD is not set -# CONFIG_DEVMEM is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y @@ -57,16 +57,13 @@ CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set +# CONFIG_DEVMEM is not set # CONFIG_HWMON is not set -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_SIFIVE_PLIC=y -# CONFIG_VALIDATE_FS_PARSER is not set CONFIG_EXT2_FS=y # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h deleted file mode 100644 index adaba98a7d6c..000000000000 --- a/arch/riscv/include/asm/clint.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_RISCV_CLINT_H -#define _ASM_RISCV_CLINT_H 1 - -#include -#include - -#ifdef CONFIG_RISCV_M_MODE -void clint_init_boot_cpu(void); -#else /* CONFIG_RISCV_M_MODE */ -#define clint_init_boot_cpu() do { } while (0) -#endif /* CONFIG_RISCV_M_MODE */ - -#endif /* _ASM_RISCV_CLINT_H */ diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index bad2a7c2cda5..a3fb85d505d4 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -7,41 +7,27 @@ #define _ASM_RISCV_TIMEX_H #include -#include typedef unsigned long cycles_t; -extern u64 __iomem *riscv_time_val; -extern u64 __iomem *riscv_time_cmp; - -#ifdef CONFIG_64BIT -#define mmio_get_cycles() readq_relaxed(riscv_time_val) -#else -#define mmio_get_cycles() readl_relaxed(riscv_time_val) -#define mmio_get_cycles_hi() readl_relaxed(((u32 *)riscv_time_val) + 1) -#endif - static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_RISCV_SBI)) - return csr_read(CSR_TIME); - return mmio_get_cycles(); + return csr_read(CSR_TIME); } #define get_cycles get_cycles +static inline u32 get_cycles_hi(void) +{ + return csr_read(CSR_TIMEH); +} +#define get_cycles_hi get_cycles_hi + #ifdef CONFIG_64BIT static inline u64 get_cycles64(void) { return get_cycles(); } #else /* CONFIG_64BIT */ -static inline u32 get_cycles_hi(void) -{ - if (IS_ENABLED(CONFIG_RISCV_SBI)) - return csr_read(CSR_TIMEH); - return mmio_get_cycles_hi(); -} - static inline u64 get_cycles64(void) { u32 hi, lo; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index a5287ab9f7f2..dc93710f0b2f 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -31,7 +31,7 @@ obj-y += cacheinfo.o obj-y += patch.o obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += clint.o traps_misaligned.o +obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c deleted file mode 100644 index a9845ee023e2..000000000000 --- a/arch/riscv/kernel/clint.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2019 Christoph Hellwig. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * This is the layout used by the SiFive clint, which is also shared by the qemu - * virt platform, and the Kendryte KD210 at least. - */ -#define CLINT_IPI_OFF 0 -#define CLINT_TIME_CMP_OFF 0x4000 -#define CLINT_TIME_VAL_OFF 0xbff8 - -u32 __iomem *clint_ipi_base; - -static void clint_send_ipi(const struct cpumask *target) -{ - unsigned int cpu; - - for_each_cpu(cpu, target) - writel(1, clint_ipi_base + cpuid_to_hartid_map(cpu)); -} - -static void clint_clear_ipi(void) -{ - writel(0, clint_ipi_base + cpuid_to_hartid_map(smp_processor_id())); -} - -static struct riscv_ipi_ops clint_ipi_ops = { - .ipi_inject = clint_send_ipi, - .ipi_clear = clint_clear_ipi, -}; - -void clint_init_boot_cpu(void) -{ - struct device_node *np; - void __iomem *base; - - np = of_find_compatible_node(NULL, NULL, "riscv,clint0"); - if (!np) { - panic("clint not found"); - return; - } - - base = of_iomap(np, 0); - if (!base) - panic("could not map CLINT"); - - clint_ipi_base = base + CLINT_IPI_OFF; - riscv_time_cmp = base + CLINT_TIME_CMP_OFF; - riscv_time_val = base + CLINT_TIME_VAL_OFF; - - clint_clear_ipi(); - riscv_set_ipi_ops(&clint_ipi_ops); -} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f04373be54a6..2c6dd329312b 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -79,7 +78,6 @@ void __init setup_arch(char **cmdline_p) #else unflatten_device_tree(); #endif - clint_init_boot_cpu(); #ifdef CONFIG_SWIOTLB swiotlb_init(1); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index cf9acb43604c..ea028d9e0d24 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 12033d71ba0c..96167d55ed98 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From fc26f5bbf19459930b7290c87b65a9ae6a274650 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 15 Jul 2020 21:39:53 -0700 Subject: riscv: Add SiFive drivers to rv32_defconfig This adds SiFive drivers to rv32_defconfig, to keep in sync with the 64-bit config. This is useful when testing 32-bit kernel with QEMU 'sifive_u' 32-bit machine. Signed-off-by: Bin Meng Reviewed-by: Anup Patel Reviewed-by: Alistair Francis Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/rv32_defconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 3a55f0e00d6c..2c2cda6cc1c5 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -14,6 +14,7 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y @@ -62,6 +63,8 @@ CONFIG_HVC_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_SPI=y +CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y @@ -77,6 +80,8 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_MMC=y +CONFIG_MMC_SPI=y CONFIG_RTC_CLASS=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y -- cgit v1.2.3 From dbfc95f98f0158958d1f1e6bf06d74be38dbd821 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 19 Aug 2020 11:26:44 -0700 Subject: MIPS: mm: BMIPS5000 has inclusive physical caches When the BMIPS generic cpu-feature-overrides.h file was introduced, cpu_has_inclusive_caches/MIPS_CPU_INCLUSIVE_CACHES was not set for BMIPS5000 CPUs. Correct this when we have initialized the MIPS secondary cache successfully. Fixes: f337967d6d87 ("MIPS: BMIPS: Add cpu-feature-overrides.h") Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/mm/c-r4k.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index fc5a6d25f74f..0ef717093262 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1712,7 +1712,11 @@ static void setup_scache(void) printk("MIPS secondary cache %ldkB, %s, linesize %d bytes.\n", scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + if (current_cpu_type() == CPU_BMIPS5000) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; } + #else if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); -- cgit v1.2.3 From e14f633b66902615cf7faa5d032b45ab8b6fb158 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 19 Aug 2020 11:26:45 -0700 Subject: MIPS: BMIPS: Also call bmips_cpu_setup() for secondary cores The initialization done by bmips_cpu_setup() typically affects both threads of a given core, on 7435 which supports 2 cores and 2 threads, logical CPU number 2 and 3 would not run this initialization. Fixes: 738a3f79027b ("MIPS: BMIPS: Add early CPU initialization code") Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/smp-bmips.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 2f513506a3d5..1dbfb5aadffd 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -239,6 +239,8 @@ static int bmips_boot_secondary(int cpu, struct task_struct *idle) */ static void bmips_init_secondary(void) { + bmips_cpu_setup(); + switch (current_cpu_type()) { case CPU_BMIPS4350: case CPU_BMIPS4380: -- cgit v1.2.3 From d49f7d7376d0c0daf8680984a37bd07581ac7d38 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 31 Jul 2020 18:38:23 +0100 Subject: arm64: Move handling of erratum 1418040 into C code Instead of dealing with erratum 1418040 on each entry and exit, let's move the handling to __switch_to() instead, which has several advantages: - It can be applied when it matters (switching between 32 and 64 bit tasks). - It is written in C (yay!) - It can rely on static keys rather than alternatives Signed-off-by: Marc Zyngier Tested-by: Sai Prakash Ranjan Reviewed-by: Stephen Boyd Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200731173824.107480-2-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 21 --------------------- arch/arm64/kernel/process.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2646178c8329..55af8b504b65 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -170,19 +170,6 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 - .if \regsize == 32 - /* - * If we're returning from a 32-bit task on a system affected by - * 1418040 then re-enable userspace access to the virtual counter. - */ -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if ARM64_WORKAROUND_1418040 - mrs x0, cntkctl_el1 - orr x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x0 -alternative_else_nop_endif -#endif - .endif clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 @@ -294,14 +281,6 @@ alternative_else_nop_endif tst x22, #PSR_MODE32_BIT // native task? b.eq 3f -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if ARM64_WORKAROUND_1418040 - mrs x0, cntkctl_el1 - bic x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x0 -alternative_else_nop_endif -#endif - #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 #ifdef CONFIG_PID_IN_CONTEXTIDR diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 84ec630b8ab5..b63ce4c54cfe 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -515,6 +515,39 @@ static void entry_task_switch(struct task_struct *next) __this_cpu_write(__entry_task, next); } +/* + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. + * Assuming the virtual counter is enabled at the beginning of times: + * + * - disable access when switching from a 64bit task to a 32bit task + * - enable access when switching from a 32bit task to a 64bit task + */ +static void erratum_1418040_thread_switch(struct task_struct *prev, + struct task_struct *next) +{ + bool prev32, next32; + u64 val; + + if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && + cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + return; + + prev32 = is_compat_thread(task_thread_info(prev)); + next32 = is_compat_thread(task_thread_info(next)); + + if (prev32 == next32) + return; + + val = read_sysreg(cntkctl_el1); + + if (!next32) + val |= ARCH_TIMER_USR_VCT_ACCESS_EN; + else + val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; + + write_sysreg(val, cntkctl_el1); +} + /* * Thread switching. */ @@ -530,6 +563,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); + erratum_1418040_thread_switch(prev, next); /* * Complete any pending TLB or cache maintenance on this CPU in case -- cgit v1.2.3 From bf87bb0881d0f59181fe3bbcf29c609f36483ff8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 31 Jul 2020 18:38:24 +0100 Subject: arm64: Allow booting of late CPUs affected by erratum 1418040 As we can now switch from a system that isn't affected by 1418040 to a system that globally is affected, let's allow affected CPUs to come in at a later time. Signed-off-by: Marc Zyngier Tested-by: Sai Prakash Ranjan Reviewed-by: Stephen Boyd Reviewed-by: Suzuki K Poulose Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200731173824.107480-3-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6bd1d3ad037a..c332d49780dc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -910,6 +910,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM erratum 1418040", .capability = ARM64_WORKAROUND_1418040, ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), + .type = (ARM64_CPUCAP_SCOPE_LOCAL_CPU | + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU), }, #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT -- cgit v1.2.3 From abf532cceaca9c21a148498091f87de1b8ae9b49 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 3 Aug 2020 13:31:25 -0600 Subject: KVM: arm64: Print warning when cpu erratum can cause guests to deadlock If guests don't have certain CPU erratum workarounds implemented, then there is a possibility a guest can deadlock the system. IOW, only trusted guests should be used on systems with the erratum. This is the case for Cortex-A57 erratum 832075. Signed-off-by: Rob Herring Acked-by: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: Julien Thierry Cc: Suzuki K Poulose Cc: Will Deacon Cc: kvmarm@lists.cs.columbia.edu Link: https://lore.kernel.org/r/20200803193127.3012242-2-robh@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kvm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 691d21e4c717..46dc3d75cf13 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1640,6 +1640,10 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)) + kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ + "Only trusted guests should be used on this system.\n"); + for_each_online_cpu(cpu) { smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); if (ret < 0) { -- cgit v1.2.3 From 541cebb51f3422d4f2c6cb95c1e5cc3dcc9e5021 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Aug 2020 07:15:25 +0000 Subject: powerpc/32s: Fix module loading failure when VMALLOC_END is over 0xf0000000 In is_module_segment(), when VMALLOC_END is over 0xf0000000, ALIGN(VMALLOC_END, SZ_256M) has value 0. In that case, addr >= ALIGN(VMALLOC_END, SZ_256M) is always true then is_module_segment() always returns false. Use (ALIGN(VMALLOC_END, SZ_256M) - 1) which will have value 0xffffffff and will be suitable for the comparison. Fixes: c49643319715 ("powerpc/32s: Only leave NX unset on segments used for modules") Reported-by: Andreas Schwab Signed-off-by: Christophe Leroy Tested-by: Andreas Schwab Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/09fc73fe9c7423c6b4cf93f93df9bb0ed8eefab5.1597994047.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 82ae9e06a773..d426eaf76bb0 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -194,12 +194,12 @@ static bool is_module_segment(unsigned long addr) #ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; - if (addr >= ALIGN(MODULES_END, SZ_256M)) + if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; #else if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) return false; - if (addr >= ALIGN(VMALLOC_END, SZ_256M)) + if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) return false; #endif return true; -- cgit v1.2.3 From 64ef8f2c4791940d7f3945507b6a45c20d959260 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 21 Aug 2020 13:36:10 +0530 Subject: powerpc/perf/hv-24x7: Move cpumask file to top folder of hv-24x7 driver Commit 792f73f747b8 ("powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show cpumask") added cpumask file as part of hv-24x7 driver inside the interface folder. The cpumask file is supposed to be in the top folder of the PMU driver in order to make hotplug work. This patch fixes that issue and creates new group 'cpumask_attr_group' to add cpumask file and make sure it added in top folder. command:# cat /sys/devices/hv_24x7/cpumask 0 Fixes: 792f73f747b8 ("powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show cpumask") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821080610.123997-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index cdb7bfbd157e..6e7e820508df 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1128,6 +1128,15 @@ static struct bin_attribute *if_bin_attrs[] = { NULL, }; +static struct attribute *cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cpumask_attrs, +}; + static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, @@ -1135,7 +1144,6 @@ static struct attribute *if_attrs[] = { &dev_attr_sockets.attr, &dev_attr_chipspersocket.attr, &dev_attr_coresperchip.attr, - &dev_attr_cpumask.attr, NULL, }; @@ -1151,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = { &event_desc_group, &event_long_desc_group, &if_group, + &cpumask_attr_group, NULL, }; -- cgit v1.2.3 From 6a3ea3e68b8a8a26c4aaac03432ed92269c9a14e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Aug 2020 06:52:29 -0400 Subject: x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM KVM has an optmization to avoid expensive MRS read/writes on VMENTER/EXIT. It caches the MSR values and restores them either when leaving the run loop, on preemption or when going out to user space. The affected MSRs are not required for kernel context operations. This changed with the recently introduced mechanism to handle FSGSBASE in the paranoid entry code which has to retrieve the kernel GSBASE value by accessing per CPU memory. The mechanism needs to retrieve the CPU number and uses either LSL or RDPID if the processor supports it. Unfortunately RDPID uses MSR_TSC_AUX which is in the list of cached and lazily restored MSRs, which means between the point where the guest value is written and the point of restore, MSR_TSC_AUX contains a random number. If an NMI or any other exception which uses the paranoid entry path happens in such a context, then RDPID returns the random guest MSR_TSC_AUX value. As a consequence this reads from the wrong memory location to retrieve the kernel GSBASE value. Kernel GS is used to for all regular this_cpu_*() operations. If the GSBASE in the exception handler points to the per CPU memory of a different CPU then this has the obvious consequences of data corruption and crashes. As the paranoid entry path is the only place which accesses MSR_TSX_AUX (via RDPID) and the fallback via LSL is not significantly slower, remove the RDPID alternative from the entry path and always use LSL. The alternative would be to write MSR_TSC_AUX on every VMENTER and VMEXIT which would be inflicting massive overhead on that code path. [ tglx: Rewrote changelog ] Fixes: eaad981291ee3 ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro") Reported-by: Tom Lendacky Debugged-by: Tom Lendacky Suggested-by: Andy Lutomirski Suggested-by: Peter Zijlstra Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200821105229.18938-1-pbonzini@redhat.com --- arch/x86/entry/calling.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 98e4d8886f11..ae9b0d4615b3 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is built with * Fetch the per-CPU GSBASE value for this processor and put it in @reg. * We normally use %gs for accessing per-CPU data, but we are setting up * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. */ .macro GET_PERCPU_BASE reg:req - ALTERNATIVE \ - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ - "RDPID \reg", \ - X86_FEATURE_RDPID + LOAD_CPU_AND_NODE_SEG_LIMIT \reg andq $VDSO_CPUNODE_MASK, \reg movq __per_cpu_offset(, \reg, 8), \reg .endm -- cgit v1.2.3 From 8d75785a814241587802655cc33e384230744f0c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 17 Aug 2020 18:49:50 -0700 Subject: ARM64: vdso32: Install vdso32 from vdso_install Add the 32-bit vdso Makefile to the vdso_install rule so that 'make vdso_install' installs the 32-bit compat vdso when it is compiled. Fixes: a7f71a2c8903 ("arm64: compat: Add vDSO") Signed-off-by: Stephen Boyd Reviewed-by: Vincenzo Frascino Acked-by: Will Deacon Cc: Vincenzo Frascino Link: https://lore.kernel.org/r/20200818014950.42492-1-swboyd@chromium.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 1 + arch/arm64/kernel/vdso32/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 55bc8546d9c7..b45f0124cc16 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -165,6 +165,7 @@ zinstall install: PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@ # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 5139a5f19256..d6adb4677c25 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -208,7 +208,7 @@ quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ # Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ +quiet_cmd_vdso_install = INSTALL32 $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so vdso.so: $(obj)/vdso.so.dbg -- cgit v1.2.3 From fdfe7cbd58806522e799e2a50a15aee7f2cbb7b6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Aug 2020 11:27:24 +0100 Subject: KVM: Pass MMU notifier range flags to kvm_unmap_hva_range() The 'flags' field of 'struct mmu_notifier_range' is used to indicate whether invalidate_range_{start,end}() are permitted to block. In the case of kvm_mmu_notifier_invalidate_range_start(), this field is not forwarded on to the architecture-specific implementation of kvm_unmap_hva_range() and therefore the backend cannot sensibly decide whether or not to block. Add an extra 'flags' parameter to kvm_unmap_hva_range() so that architectures are aware as to whether or not they are permitted to block. Cc: Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: James Morse Signed-off-by: Will Deacon Message-Id: <20200811102725.7121-2-will@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/mmu.c | 3 ++- arch/powerpc/include/asm/kvm_host.h | 3 ++- arch/powerpc/kvm/book3s.c | 3 ++- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/mmu/mmu.c | 3 ++- 9 files changed, 15 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 65568b23868a..e52c927aade5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -473,7 +473,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0121ef2c7c8d..dc351802ff18 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2213,7 +2213,7 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat } int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, unsigned flags) { if (!kvm->arch.mmu.pgd) return 0; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d35eaed1668f..825d337a505a 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -969,7 +969,7 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 87fa8d8a1031..28c366d307e7 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -486,7 +486,8 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, return 1; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e020d269416d..10ded83414de 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -58,7 +58,8 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER extern int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + unsigned flags); extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 41fedec69ac3..49db50d1db04 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -834,7 +834,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index d6c1069e9954..ed0c9c43d0cf 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return 0; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { /* kvm_unmap_hva flushes everything anyways */ kvm_unmap_hva(kvm, start); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ab3af7275d8..5303dbc5c9bc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1596,7 +1596,8 @@ asmlinkage void kvm_spurious_fault(void); _ASM_EXTABLE(666b, 667b) #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4e03841f053d..a5d0207e7189 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1916,7 +1916,8 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); } -- cgit v1.2.3 From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Aug 2020 11:27:25 +0100 Subject: KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is not set When an MMU notifier call results in unmapping a range that spans multiple PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary, since this avoids running into RCU stalls during VM teardown. Unfortunately, if the VM is destroyed as a result of OOM, then blocking is not permitted and the call to the scheduler triggers the following BUG(): | BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394 | in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper | INFO: lockdep is turned off. | CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Call trace: | dump_backtrace+0x0/0x284 | show_stack+0x1c/0x28 | dump_stack+0xf0/0x1a4 | ___might_sleep+0x2bc/0x2cc | unmap_stage2_range+0x160/0x1ac | kvm_unmap_hva_range+0x1a0/0x1c8 | kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8 | __mmu_notifier_invalidate_range_start+0x218/0x31c | mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0 | __oom_reap_task_mm+0x128/0x268 | oom_reap_task+0xac/0x298 | oom_reaper+0x178/0x17c | kthread+0x1e4/0x1fc | ret_from_fork+0x10/0x30 Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier flags. Cc: Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd") Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: James Morse Signed-off-by: Will Deacon Message-Id: <20200811102725.7121-3-will@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/mmu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index dc351802ff18..ba00bcc0c884 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, * destroying the VM), otherwise another faulting VCPU may come in and mess * with things behind our backs. */ -static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, + bool may_block) { struct kvm *kvm = mmu->kvm; pgd_t *pgd; @@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si * If the range is too large, release the kvm->mmu_lock * to prevent starvation and lockup detector warnings. */ - if (next != end) + if (may_block && next != end) cond_resched_lock(&kvm->mmu_lock); } while (pgd++, addr = next, addr != end); } +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +{ + __unmap_stage2_range(mmu, start, size, true); +} + static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { @@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm, static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(&kvm->arch.mmu, gpa, size); + unsigned flags = *(unsigned *)data; + bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE; + + __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block); return 0; } @@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; trace_kvm_unmap_hva_range(start, end); - handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags); return 0; } -- cgit v1.2.3 From ed0ec1a81faba7572646ceb515ea8cbf9625bbac Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 19 Aug 2020 14:07:22 +0800 Subject: MIPS: Loongson64: Remove unnecessary inclusion of boot_param.h The couple of #includes are unused by now; remove to prevent namespace pollution. This fixes e.g. build of dm_thin, which has a VIRTUAL symbol that conflicted with the newly-introduced one in mach-loongson64/boot_param.h. Fixes: 39c1485c8baa ("MIPS: KVM: Add kvm guest support for Loongson-3") Signed-off-by: WANG Xuerui Reviewed-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-loongson64/irq.h | 2 -- arch/mips/include/asm/mach-loongson64/mmzone.h | 1 - 2 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index f5e362f79701..bf2480923154 100644 --- a/arch/mips/include/asm/mach-loongson64/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -2,8 +2,6 @@ #ifndef __ASM_MACH_LOONGSON64_IRQ_H_ #define __ASM_MACH_LOONGSON64_IRQ_H_ -#include - /* cpu core interrupt numbers */ #define NR_IRQS_LEGACY 16 #define NR_MIPS_CPU_IRQS 8 diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 3a25dbd3b3e9..5eaca4fe3f92 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -9,7 +9,6 @@ #ifndef _ASM_MACH_LOONGSON64_MMZONE_H #define _ASM_MACH_LOONGSON64_MMZONE_H -#include #define NODE_ADDRSPACE_SHIFT 44 #define NODE0_ADDRSPACE_OFFSET 0x000000000000UL #define NODE1_ADDRSPACE_OFFSET 0x100000000000UL -- cgit v1.2.3 From 91dbd73a1739039fa7e9fe5c0169f2817a7f7670 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Thu, 20 Aug 2020 20:54:40 +0800 Subject: mips/oprofile: Fix fallthrough placement We want neither " include/linux/compiler_attributes.h:201:41: warning: statement will never be executed [-Wswitch-unreachable] 201 | # define fallthrough __attribute__((__fallthrough__)) | ^~~~~~~~~~~~~ " nor " include/linux/compiler_attributes.h:201:41: warning: attribute 'fallthrough' not preceding a case label or default label 201 | # define fallthrough __attribute__((__fallthrough__)) | ^~~~~~~~~~~~~ " It's not worth adding one more macro. Let's simply place the fallthrough in between the expansions. Fixes: c9b029903466 ("MIPS: Use fallthrough for arch/mips") Cc: stable@vger.kernel.org Signed-off-by: He Zhe Reviewed-by: Gustavo A. R. Silva Signed-off-by: Thomas Bogendoerfer --- arch/mips/oprofile/op_model_mipsxx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index 1493c49ca47a..55d7b7fd18b6 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -245,7 +245,6 @@ static int mipsxx_perfcount_handler(void) switch (counters) { #define HANDLE_COUNTER(n) \ - fallthrough; \ case n + 1: \ control = r_c0_perfctrl ## n(); \ counter = r_c0_perfcntr ## n(); \ @@ -256,8 +255,11 @@ static int mipsxx_perfcount_handler(void) handled = IRQ_HANDLED; \ } HANDLE_COUNTER(3) + fallthrough; HANDLE_COUNTER(2) + fallthrough; HANDLE_COUNTER(1) + fallthrough; HANDLE_COUNTER(0) } -- cgit v1.2.3 From df561f6688fef775baa341a0f5d960becd248b11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 23 Aug 2020 17:36:59 -0500 Subject: treewide: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva --- arch/alpha/kernel/module.c | 2 +- arch/alpha/kernel/signal.c | 2 +- arch/alpha/kernel/traps.c | 4 +- arch/arc/kernel/disasm.c | 2 +- arch/arc/kernel/signal.c | 2 +- arch/arc/kernel/unwind.c | 6 +-- arch/arm/kernel/hw_breakpoint.c | 10 ++--- arch/arm/kernel/signal.c | 2 +- arch/arm/mach-ep93xx/crunch.c | 2 +- arch/arm/mach-mmp/pm-mmp2.c | 8 ++-- arch/arm/mach-mmp/pm-pxa910.c | 10 ++--- arch/arm/mach-omap2/id.c | 8 ---- arch/arm/mach-omap2/omap_device.c | 2 +- arch/arm/mach-orion5x/dns323-setup.c | 2 +- arch/arm/mach-rpc/riscpc.c | 2 +- arch/arm/mach-tegra/reset.c | 2 +- arch/arm/mm/alignment.c | 4 +- arch/arm/mm/proc-v7-bugs.c | 2 +- arch/arm/plat-omap/dma.c | 6 +-- arch/arm/probes/decode.c | 2 +- arch/arm/probes/kprobes/core.c | 2 +- arch/arm64/kernel/acpi.c | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/cpuinfo.c | 2 +- arch/arm64/kernel/hw_breakpoint.c | 6 +-- arch/arm64/kernel/module.c | 8 ++-- arch/arm64/kernel/smp.c | 2 +- arch/arm64/kvm/handle_exit.c | 2 +- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 60 ++++++++++++++--------------- arch/arm64/kvm/hyp/vgic-v3-sr.c | 16 ++++---- arch/arm64/mm/context.c | 2 +- arch/c6x/kernel/signal.c | 4 +- arch/csky/kernel/signal.c | 2 +- arch/h8300/kernel/signal.c | 2 +- arch/hexagon/kernel/module.c | 2 +- arch/hexagon/kernel/signal.c | 2 +- arch/ia64/kernel/crash.c | 2 +- arch/ia64/kernel/module.c | 2 +- arch/ia64/kernel/perfmon.c | 2 +- arch/ia64/kernel/signal.c | 2 +- arch/ia64/kernel/unaligned.c | 6 +-- arch/ia64/kernel/unwind.c | 2 +- arch/m68k/atari/atakeyb.c | 2 +- arch/m68k/kernel/signal.c | 2 +- arch/m68k/mac/config.c | 2 +- arch/m68k/mac/via.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/microblaze/kernel/signal.c | 2 +- arch/mips/include/asm/unroll.h | 64 +++++++++++++++---------------- arch/nds32/kernel/fpu.c | 12 +++--- arch/nds32/kernel/signal.c | 4 +- arch/openrisc/kernel/signal.c | 2 +- arch/parisc/kernel/signal.c | 2 +- arch/parisc/kernel/traps.c | 11 +++--- arch/parisc/mm/fault.c | 4 +- arch/powerpc/net/bpf_jit_comp.c | 2 +- arch/riscv/kernel/signal.c | 2 +- arch/riscv/net/bpf_jit_comp32.c | 4 +- arch/sh/drivers/platform_early.c | 2 +- arch/sh/kernel/disassemble.c | 4 +- arch/sh/kernel/kgdb.c | 2 +- arch/sh/kernel/signal_32.c | 2 +- arch/sparc/kernel/auxio_64.c | 1 - arch/sparc/kernel/central.c | 2 +- arch/sparc/kernel/kgdb_32.c | 2 +- arch/sparc/kernel/kgdb_64.c | 2 +- arch/sparc/kernel/pcr.c | 2 +- arch/sparc/kernel/prom_32.c | 2 +- arch/sparc/kernel/signal32.c | 4 +- arch/sparc/kernel/signal_32.c | 4 +- arch/sparc/kernel/signal_64.c | 4 +- arch/sparc/math-emu/math_32.c | 8 ++-- arch/sparc/net/bpf_jit_comp_32.c | 2 +- arch/um/kernel/signal.c | 2 +- arch/x86/boot/cmdline.c | 4 +- arch/x86/boot/compressed/kaslr.c | 2 +- arch/x86/events/intel/core.c | 6 +-- arch/x86/events/intel/lbr.c | 2 +- arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/apic/io_apic.c | 4 +- arch/x86/kernel/apic/probe_32.c | 2 +- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/mce/inject.c | 2 +- arch/x86/kernel/cpu/mce/intel.c | 2 +- arch/x86/kernel/cpu/mtrr/cyrix.c | 2 +- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/kernel/kgdb.c | 4 +- arch/x86/kernel/mpparse.c | 4 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/signal.c | 2 +- arch/x86/kernel/uprobes.c | 4 +- arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/irq_comm.c | 2 +- arch/x86/kvm/lapic.c | 6 +-- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 12 +++--- arch/x86/kvm/x86.c | 11 ++---- arch/x86/lib/cmdline.c | 8 ++-- arch/x86/lib/insn-eval.c | 6 +-- arch/x86/math-emu/errors.c | 2 +- arch/x86/math-emu/fpu_trig.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/xtensa/kernel/signal.c | 2 +- 106 files changed, 236 insertions(+), 251 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index ac110ae8f978..5b60c248de9e 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -212,7 +212,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, STO_ALPHA_STD_GPLOAD) /* Omit the prologue. */ value += 8; - /* FALLTHRU */ + fallthrough; case R_ALPHA_BRADDR: value -= (u64)location + 4; if (value & 3) diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index a813020d2f11..15bc9d1e79f4 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -453,7 +453,7 @@ syscall_restart(unsigned long r0, unsigned long r19, regs->r0 = EINTR; break; } - /* fallthrough */ + fallthrough; case ERESTARTNOINTR: regs->r0 = r0; /* reset v0 and a3 and replay syscall */ regs->r19 = r19; diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 49754e07e04f..921d4b6e4d95 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -883,7 +883,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, case 0x26: /* sts */ fake_reg = s_reg_to_mem(alpha_read_fp_reg(reg)); - /* FALLTHRU */ + fallthrough; case 0x2c: /* stl */ __asm__ __volatile__( @@ -911,7 +911,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, case 0x27: /* stt */ fake_reg = alpha_read_fp_reg(reg); - /* FALLTHRU */ + fallthrough; case 0x2d: /* stq */ __asm__ __volatile__( diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c index d04837d91b40..03f8b1be0c3a 100644 --- a/arch/arc/kernel/disasm.c +++ b/arch/arc/kernel/disasm.c @@ -339,7 +339,7 @@ void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state, case op_LDWX_S: /* LDWX_S c, [b, u6] */ state->x = 1; - /* intentional fall-through */ + fallthrough; case op_LDW_S: /* LDW_S c, [b, u6] */ state->zz = 2; diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 3d57ed0d8535..8222f8c54690 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -321,7 +321,7 @@ static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs) regs->r0 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: /* diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index f87758a6851b..74ad4256022e 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -572,7 +572,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, #else BUILD_BUG_ON(sizeof(u32) != sizeof(value)); #endif - /* Fall through */ + fallthrough; case DW_EH_PE_native: if (end < (const void *)(ptr.pul + 1)) return 0; @@ -827,7 +827,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, case DW_CFA_def_cfa: state->cfa.reg = get_uleb128(&ptr.p8, end); unw_debug("cfa_def_cfa: r%lu ", state->cfa.reg); - /* fall through */ + fallthrough; case DW_CFA_def_cfa_offset: state->cfa.offs = get_uleb128(&ptr.p8, end); unw_debug("cfa_def_cfa_offset: 0x%lx ", @@ -835,7 +835,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, break; case DW_CFA_def_cfa_sf: state->cfa.reg = get_uleb128(&ptr.p8, end); - /* fall through */ + fallthrough; case DW_CFA_def_cfa_offset_sf: state->cfa.offs = get_sleb128(&ptr.p8, end) * state->dataAlign; diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7fff88e61252..7a4853b1213a 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -547,7 +547,7 @@ static int arch_build_bp_info(struct perf_event *bp, if ((hw->ctrl.type != ARM_BREAKPOINT_EXECUTE) && max_watchpoint_len >= 8) break; - /* Else, fall through */ + fallthrough; default: return -EINVAL; } @@ -612,12 +612,12 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, /* Allow halfword watchpoints and breakpoints. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; - /* Else, fall through */ + fallthrough; case 3: /* Allow single byte watchpoint. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; - /* Else, fall through */ + fallthrough; default: ret = -EINVAL; goto out; @@ -884,7 +884,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, break; case ARM_ENTRY_ASYNC_WATCHPOINT: WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n"); - /* Fall through */ + fallthrough; case ARM_ENTRY_SYNC_WATCHPOINT: watchpoint_handler(addr, fsr, regs); break; @@ -933,7 +933,7 @@ static bool core_has_os_save_restore(void) ARM_DBG_READ(c1, c1, 4, oslsr); if (oslsr & ARM_OSLSR_OSLM0) return true; - /* Else, fall through */ + fallthrough; default: return false; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index c9dc912b83f0..c1892f733f20 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -596,7 +596,7 @@ static int do_signal(struct pt_regs *regs, int syscall) switch (retval) { case -ERESTART_RESTARTBLOCK: restart -= 2; - /* Fall through */ + fallthrough; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/arch/arm/mach-ep93xx/crunch.c b/arch/arm/mach-ep93xx/crunch.c index 1c05c5bf7e5c..757032d82f63 100644 --- a/arch/arm/mach-ep93xx/crunch.c +++ b/arch/arm/mach-ep93xx/crunch.c @@ -49,7 +49,7 @@ static int crunch_do(struct notifier_block *self, unsigned long cmd, void *t) * FALLTHROUGH: Ensure we don't try to overwrite our newly * initialised state information on the first fault. */ - /* Fall through */ + fallthrough; case THREAD_NOTIFY_EXIT: crunch_task_release(thread); diff --git a/arch/arm/mach-mmp/pm-mmp2.c b/arch/arm/mach-mmp/pm-mmp2.c index 2d86381e152d..7a6f74c32d42 100644 --- a/arch/arm/mach-mmp/pm-mmp2.c +++ b/arch/arm/mach-mmp/pm-mmp2.c @@ -123,19 +123,19 @@ void mmp2_pm_enter_lowpower_mode(int state) case POWER_MODE_SYS_SLEEP: apcr |= MPMU_PCR_PJ_SLPEN; /* set the SLPEN bit */ apcr |= MPMU_PCR_PJ_VCTCXOSD; /* set VCTCXOSD */ - /* fall through */ + fallthrough; case POWER_MODE_CHIP_SLEEP: apcr |= MPMU_PCR_PJ_SLPEN; - /* fall through */ + fallthrough; case POWER_MODE_APPS_SLEEP: apcr |= MPMU_PCR_PJ_APBSD; /* set APBSD */ - /* fall through */ + fallthrough; case POWER_MODE_APPS_IDLE: apcr |= MPMU_PCR_PJ_AXISD; /* set AXISDD bit */ apcr |= MPMU_PCR_PJ_DDRCORSD; /* set DDRCORSD bit */ idle_cfg |= APMU_PJ_IDLE_CFG_PJ_PWRDWN; /* PJ power down */ apcr |= MPMU_PCR_PJ_SPSD; - /* fall through */ + fallthrough; case POWER_MODE_CORE_EXTIDLE: idle_cfg |= APMU_PJ_IDLE_CFG_PJ_IDLE; /* set the IDLE bit */ idle_cfg &= ~APMU_PJ_IDLE_CFG_ISO_MODE_CNTRL_MASK; diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c index 69ebe18ff209..1d71d73c1862 100644 --- a/arch/arm/mach-mmp/pm-pxa910.c +++ b/arch/arm/mach-mmp/pm-pxa910.c @@ -145,23 +145,23 @@ void pxa910_pm_enter_lowpower_mode(int state) case POWER_MODE_UDR: /* only shutdown APB in UDR */ apcr |= MPMU_APCR_STBYEN | MPMU_APCR_APBSD; - /* fall through */ + fallthrough; case POWER_MODE_SYS_SLEEP: apcr |= MPMU_APCR_SLPEN; /* set the SLPEN bit */ apcr |= MPMU_APCR_VCTCXOSD; /* set VCTCXOSD */ - /* fall through */ + fallthrough; case POWER_MODE_APPS_SLEEP: apcr |= MPMU_APCR_DDRCORSD; /* set DDRCORSD */ - /* fall through */ + fallthrough; case POWER_MODE_APPS_IDLE: apcr |= MPMU_APCR_AXISD; /* set AXISDD bit */ - /* fall through */ + fallthrough; case POWER_MODE_CORE_EXTIDLE: idle_cfg |= APMU_MOH_IDLE_CFG_MOH_IDLE; idle_cfg |= APMU_MOH_IDLE_CFG_MOH_PWRDWN; idle_cfg |= APMU_MOH_IDLE_CFG_MOH_PWR_SW(3) | APMU_MOH_IDLE_CFG_MOH_L2_PWR_SW(3); - /* fall through */ + fallthrough; case POWER_MODE_CORE_INTIDLE: break; } diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 1d119b974f5f..59755b5a1ad7 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -396,7 +396,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "3.1"; break; case 7: - /* FALLTHROUGH */ default: /* Use the latest known revision as default */ omap_revision = OMAP3430_REV_ES3_1_2; @@ -416,7 +415,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "1.0"; break; case 1: - /* FALLTHROUGH */ default: omap_revision = AM35XX_REV_ES1_1; cpu_rev = "1.1"; @@ -435,7 +433,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "1.1"; break; case 2: - /* FALLTHROUGH */ default: omap_revision = OMAP3630_REV_ES1_2; cpu_rev = "1.2"; @@ -456,7 +453,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "2.0"; break; case 3: - /* FALLTHROUGH */ default: omap_revision = TI8168_REV_ES2_1; cpu_rev = "2.1"; @@ -473,7 +469,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "2.0"; break; case 2: - /* FALLTHROUGH */ default: omap_revision = AM335X_REV_ES2_1; cpu_rev = "2.1"; @@ -491,7 +486,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "1.1"; break; case 2: - /* FALLTHROUGH */ default: omap_revision = AM437X_REV_ES1_2; cpu_rev = "1.2"; @@ -502,7 +496,6 @@ void __init omap3xxx_check_revision(void) case 0xb968: switch (rev) { case 0: - /* FALLTHROUGH */ case 1: omap_revision = TI8148_REV_ES1_0; cpu_rev = "1.0"; @@ -512,7 +505,6 @@ void __init omap3xxx_check_revision(void) cpu_rev = "2.0"; break; case 3: - /* FALLTHROUGH */ default: omap_revision = TI8148_REV_ES2_1; cpu_rev = "2.1"; diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 6b4548f3b57f..fc7bb2ca1672 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -240,7 +240,7 @@ static int _omap_device_notifier_call(struct notifier_block *nb, if (pdev->dev.of_node) omap_device_build_from_dt(pdev); omap_auxdata_legacy_init(dev); - /* fall through */ + fallthrough; default: od = to_omap_device(pdev); if (od) diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index d13344b2ddcd..87cb47220e82 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -624,7 +624,7 @@ static void __init dns323_init(void) dns323ab_leds[0].active_low = 1; gpio_request(DNS323_GPIO_LED_POWER1, "Power Led Enable"); gpio_direction_output(DNS323_GPIO_LED_POWER1, 0); - /* Fall through */ + fallthrough; case DNS323_REV_B1: i2c_register_board_info(0, dns323ab_i2c_devices, ARRAY_SIZE(dns323ab_i2c_devices)); diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index ea2c84214bac..d23970bd638d 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -46,7 +46,7 @@ static int __init parse_tag_acorn(const struct tag *tag) switch (tag->u.acorn.vram_pages) { case 512: vram_size += PAGE_SIZE * 256; - /* Fall through - ??? */ + fallthrough; /* ??? */ case 256: vram_size += PAGE_SIZE * 256; default: diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 76a65df42d10..d5c805adf7a8 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -70,7 +70,7 @@ static void __init tegra_cpu_reset_handler_enable(void) switch (err) { case -ENOSYS: tegra_cpu_reset_handler_set(reset_address); - /* fall through */ + fallthrough; case 0: is_enabled = true; break; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index f4bfc1cac91a..ea81e89e7740 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -694,7 +694,7 @@ thumb2arm(u16 tinstr) return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | (tinstr & 255); /* register_list */ } - /* Else, fall through - for illegal instruction case */ + fallthrough; /* for illegal instruction case */ default: return BAD_INSTR; @@ -750,7 +750,7 @@ do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, case 0xe8e0: case 0xe9e0: poffset->un = (tinst2 & 0xff) << 2; - /* Fall through */ + fallthrough; case 0xe940: case 0xe9c0: diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index c0fbfca5da8b..114c05ab4dd9 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -71,7 +71,7 @@ static void cpu_v7_spectre_init(void) /* Other ARM CPUs require no workaround */ if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) break; - /* fallthrough */ + fallthrough; /* Cortex A57/A72 require firmware workaround */ case ARM_CPU_PART_CORTEX_A57: case ARM_CPU_PART_CORTEX_A72: { diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index b2e9e822426f..1eb59003bdec 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -309,14 +309,14 @@ void omap_set_dma_src_burst_mode(int lch, enum omap_dma_burst_mode burst_mode) * not supported by current hardware on OMAP1 * w |= (0x03 << 7); */ - /* fall through */ + fallthrough; case OMAP_DMA_DATA_BURST_16: if (dma_omap2plus()) { burst = 0x3; break; } /* OMAP1 don't support burst 16 */ - /* fall through */ + fallthrough; default: BUG(); } @@ -393,7 +393,7 @@ void omap_set_dma_dest_burst_mode(int lch, enum omap_dma_burst_mode burst_mode) break; } /* OMAP1 don't support burst 16 */ - /* fall through */ + fallthrough; default: printk(KERN_ERR "Invalid DMA burst mode\n"); BUG(); diff --git a/arch/arm/probes/decode.c b/arch/arm/probes/decode.c index fe81a9c21f2d..c84053a81358 100644 --- a/arch/arm/probes/decode.c +++ b/arch/arm/probes/decode.c @@ -307,7 +307,7 @@ static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify) case REG_TYPE_NOPCWB: if (!is_writeback(insn)) break; /* No writeback, so any register is OK */ - /* fall through... */ + fallthrough; case REG_TYPE_NOPC: case REG_TYPE_NOPCX: /* Reject PC (R15) */ diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 90b5bc723c83..feefa2055eba 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -280,7 +280,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* A nested probe was hit in FIQ, it is a BUG */ pr_warn("Unrecoverable kprobe detected.\n"); dump_kprobe(p); - /* fall through */ + fallthrough; default: /* impossible cases */ BUG(); diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 455966401102..a85174d05473 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -322,7 +322,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) */ if (memblock_is_map_memory(phys)) return (void __iomem *)__phys_to_virt(phys); - /* fall through */ + fallthrough; default: if (region->attribute & EFI_MEMORY_WB) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a389b999482e..6424584be01e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -686,7 +686,7 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, case FTR_HIGHER_OR_ZERO_SAFE: if (!cur || !new) break; - /* Fallthrough */ + fallthrough; case FTR_HIGHER_SAFE: ret = new > cur ? new : cur; break; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 393c6fb1f1cb..1886a02c3f50 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -327,7 +327,7 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) set_bit(ICACHEF_VPIPT, &__icache_flags); break; default: - /* Fallthrough */ + fallthrough; case ICACHE_POLICY_VIPT: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index af234a1e08b7..712e97c03e54 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -257,7 +257,7 @@ static int hw_breakpoint_control(struct perf_event *bp, * level. */ enable_debug_monitors(dbg_el); - /* Fall through */ + fallthrough; case HW_BREAKPOINT_RESTORE: /* Setup the address register. */ write_wb_reg(val_reg, i, info->address); @@ -541,13 +541,13 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; - /* Fallthrough */ + fallthrough; case 3: /* Allow single byte watchpoint. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; - /* Fallthrough */ + fallthrough; default: return -EINVAL; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 1cd1a4d0ed30..2a1ad95d9b2c 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -315,21 +315,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* MOVW instruction relocations. */ case R_AARCH64_MOVW_UABS_G0_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, AARCH64_INSN_IMM_MOVKZ); @@ -397,7 +397,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; - /* Fall through */ + fallthrough; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_adrp(me, sechdrs, loc, val); if (ovf && ovf != -ERANGE) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 03957a1ae6c0..355ee9eed4dd 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -151,7 +151,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) break; } pr_crit("CPU%u: may not have shut down cleanly\n", cpu); - /* Fall through */ + fallthrough; case CPU_STUCK_IN_KERNEL: pr_crit("CPU%u: is stuck in kernel\n", cpu); if (status & CPU_STUCK_REASON_52_BIT_VA) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fe6c7d79309d..5d690d60ccad 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -128,7 +128,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; - /* fall through */ + fallthrough; case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_BKPT32: diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 0297dc63988c..5e28ea6aa097 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -21,70 +21,70 @@ #define save_debug(ptr,reg,nr) \ switch (nr) { \ case 15: ptr[15] = read_debug(reg, 15); \ - /* Fall through */ \ + fallthrough; \ case 14: ptr[14] = read_debug(reg, 14); \ - /* Fall through */ \ + fallthrough; \ case 13: ptr[13] = read_debug(reg, 13); \ - /* Fall through */ \ + fallthrough; \ case 12: ptr[12] = read_debug(reg, 12); \ - /* Fall through */ \ + fallthrough; \ case 11: ptr[11] = read_debug(reg, 11); \ - /* Fall through */ \ + fallthrough; \ case 10: ptr[10] = read_debug(reg, 10); \ - /* Fall through */ \ + fallthrough; \ case 9: ptr[9] = read_debug(reg, 9); \ - /* Fall through */ \ + fallthrough; \ case 8: ptr[8] = read_debug(reg, 8); \ - /* Fall through */ \ + fallthrough; \ case 7: ptr[7] = read_debug(reg, 7); \ - /* Fall through */ \ + fallthrough; \ case 6: ptr[6] = read_debug(reg, 6); \ - /* Fall through */ \ + fallthrough; \ case 5: ptr[5] = read_debug(reg, 5); \ - /* Fall through */ \ + fallthrough; \ case 4: ptr[4] = read_debug(reg, 4); \ - /* Fall through */ \ + fallthrough; \ case 3: ptr[3] = read_debug(reg, 3); \ - /* Fall through */ \ + fallthrough; \ case 2: ptr[2] = read_debug(reg, 2); \ - /* Fall through */ \ + fallthrough; \ case 1: ptr[1] = read_debug(reg, 1); \ - /* Fall through */ \ + fallthrough; \ default: ptr[0] = read_debug(reg, 0); \ } #define restore_debug(ptr,reg,nr) \ switch (nr) { \ case 15: write_debug(ptr[15], reg, 15); \ - /* Fall through */ \ + fallthrough; \ case 14: write_debug(ptr[14], reg, 14); \ - /* Fall through */ \ + fallthrough; \ case 13: write_debug(ptr[13], reg, 13); \ - /* Fall through */ \ + fallthrough; \ case 12: write_debug(ptr[12], reg, 12); \ - /* Fall through */ \ + fallthrough; \ case 11: write_debug(ptr[11], reg, 11); \ - /* Fall through */ \ + fallthrough; \ case 10: write_debug(ptr[10], reg, 10); \ - /* Fall through */ \ + fallthrough; \ case 9: write_debug(ptr[9], reg, 9); \ - /* Fall through */ \ + fallthrough; \ case 8: write_debug(ptr[8], reg, 8); \ - /* Fall through */ \ + fallthrough; \ case 7: write_debug(ptr[7], reg, 7); \ - /* Fall through */ \ + fallthrough; \ case 6: write_debug(ptr[6], reg, 6); \ - /* Fall through */ \ + fallthrough; \ case 5: write_debug(ptr[5], reg, 5); \ - /* Fall through */ \ + fallthrough; \ case 4: write_debug(ptr[4], reg, 4); \ - /* Fall through */ \ + fallthrough; \ case 3: write_debug(ptr[3], reg, 3); \ - /* Fall through */ \ + fallthrough; \ case 2: write_debug(ptr[2], reg, 2); \ - /* Fall through */ \ + fallthrough; \ case 1: write_debug(ptr[1], reg, 1); \ - /* Fall through */ \ + fallthrough; \ default: write_debug(ptr[0], reg, 0); \ } diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 5a0073511efb..452f4cacd674 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -340,10 +340,10 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) case 7: cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); - /* Fall through */ + fallthrough; case 6: cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); - /* Fall through */ + fallthrough; default: cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); } @@ -352,10 +352,10 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) case 7: cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); - /* Fall through */ + fallthrough; case 6: cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); - /* Fall through */ + fallthrough; default: cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } @@ -373,10 +373,10 @@ void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) case 7: __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); - /* Fall through */ + fallthrough; case 6: __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); - /* Fall through */ + fallthrough; default: __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); } @@ -385,10 +385,10 @@ void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) case 7: __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); - /* Fall through */ + fallthrough; case 6: __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); - /* Fall through */ + fallthrough; default: __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); } diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index a206655a39a5..9b11c096a042 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -45,7 +45,7 @@ static u32 get_cpu_asid_bits(void) default: pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n", smp_processor_id(), fld); - /* Fallthrough */ + fallthrough; case 0: asid = 8; break; diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c index e456652facce..d05c78eace1b 100644 --- a/arch/c6x/kernel/signal.c +++ b/arch/c6x/kernel/signal.c @@ -220,7 +220,7 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler) regs->a4 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: do_restart: regs->a4 = regs->orig_a4; @@ -252,7 +252,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs, break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->a4 = regs->orig_a4; regs->pc -= 4; diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 9452d6570b7e..970895df75ec 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -194,7 +194,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->a0 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->a0 = regs->orig_a0; regs->pc -= TRAP0_SIZE; diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 38d335488a54..69e68949787f 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -227,7 +227,7 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka) regs->er0 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: do_restart: regs->er0 = regs->orig_er0; diff --git a/arch/hexagon/kernel/module.c b/arch/hexagon/kernel/module.c index cf99fb79a124..cb3bf19b0640 100644 --- a/arch/hexagon/kernel/module.c +++ b/arch/hexagon/kernel/module.c @@ -120,7 +120,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } case R_HEXAGON_HI16: value = (value>>16) & 0xffff; - /* fallthrough */ + fallthrough; case R_HEXAGON_LO16: *location &= ~0x00c03fff; *location |= value & 0x3fff; diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index d48864c48e5a..94cc7ff52dce 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -155,7 +155,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->r00 = -EINTR; break; } - /* Fall through */ + fallthrough; case -ERESTARTNOINTR: regs->r06 = regs->syscall_nr; pt_set_elr(regs, pt_elr(regs) - 4); diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index bec762a9b418..fec70d662d0c 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -163,7 +163,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) case DIE_INIT_MONARCH_LEAVE: if (!kdump_freeze_monarch) break; - /* fall through */ + fallthrough; case DIE_INIT_SLAVE_LEAVE: case DIE_INIT_MONARCH_ENTER: case DIE_MCA_RENDZVOUS_LEAVE: diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 1a42ba885188..00a496cb346f 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -654,7 +654,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, } } else if (!is_internal(mod, val)) val = get_plt(mod, location, val, &ok); - /* FALL THROUGH */ + fallthrough; default: val -= bundle(location); break; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 971f166873aa..0dc3611e7971 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -3472,7 +3472,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) break; case PFM_CTX_LOADED: if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; - /* fall through */ + fallthrough; case PFM_CTX_UNLOADED: case PFM_CTX_ZOMBIE: DPRINT(("invalid state=%d\n", state)); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index d07ed65c9c6e..e67b22fc3c60 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -374,7 +374,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) /* note: scr->pt.r10 is already -1 */ break; } - /*FALLTHRU*/ + fallthrough; case ERESTARTNOINTR: ia64_decrement_ip(&scr->pt); restart = 0; /* don't restart twice if handle_signal() fails... */ diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 2d4e65ba5c3e..6c1a8951dfbb 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1431,7 +1431,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if (u.insn.x) /* oops, really a semaphore op (cmpxchg, etc) */ goto failure; - /*FALLTHRU*/ + fallthrough; case LDS_IMM_OP: case LDSA_IMM_OP: case LDFS_OP: @@ -1459,7 +1459,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if (u.insn.x) /* oops, really a semaphore op (cmpxchg, etc) */ goto failure; - /*FALLTHRU*/ + fallthrough; case LD_IMM_OP: case LDA_IMM_OP: case LDBIAS_IMM_OP: @@ -1475,7 +1475,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if (u.insn.x) /* oops, really a semaphore op (cmpxchg, etc) */ goto failure; - /*FALLTHRU*/ + fallthrough; case ST_IMM_OP: case STREL_IMM_OP: ret = emulate_store_int(ifa, u.insn, regs); diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 7601fe0622d2..6bd64c35e691 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -324,7 +324,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char return 0; } } - /* fall through */ + fallthrough; case UNW_NAT_NONE: dummy_nat = 0; nat_addr = &dummy_nat; diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c index 37091898adb3..5e0e682f9c61 100644 --- a/arch/m68k/atari/atakeyb.c +++ b/arch/m68k/atari/atakeyb.c @@ -207,7 +207,7 @@ repeat: self_test_last_rcv = jiffies; break; } - /* FALL THROUGH */ + fallthrough; default: break_flag = scancode & BREAK_MASK; diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index fc034fd19798..a98fca977073 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -1067,7 +1067,7 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler) regs->d0 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: do_restart: regs->d0 = regs->orig_d0; diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 5c9f3a2d6538..a621fcc1a576 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -1018,7 +1018,7 @@ int __init mac_platform_init(void) */ platform_device_register_simple("mac_scsi", 1, mac_scsi_duo_rsrc, ARRAY_SIZE(mac_scsi_duo_rsrc)); - /* fall through */ + fallthrough; case MAC_SCSI_OLD: /* Addresses from Developer Notes for Duo System, * PowerBook 180 & 160, 140 & 170, Macintosh IIsi diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 1f0fad2a98a0..ac77d73af19a 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -370,7 +370,7 @@ void via_nubus_irq_startup(int irq) /* Allow NuBus slots 9 through F. */ via2[vDirA] &= 0x80 | ~(1 << irq_idx); } - /* fall through */ + fallthrough; case MAC_VIA_IICI: via_irq_enable(irq); break; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 795f483b1050..ef46e77e97a5 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -118,7 +118,7 @@ good_area: pr_debug("do_page_fault: good_area\n"); switch (error_code & 3) { default: /* 3: write, present */ - /* fall through */ + fallthrough; case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto acc_err; diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 65bf5fd8d473..4a96b59f0bee 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -249,7 +249,7 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler) regs->r3 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: do_restart: /* offset of 4 bytes to re-execute trap (brki) instruction */ diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index 7dd4a80e05d6..6f4ac854b12d 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -28,38 +28,38 @@ BUILD_BUG_ON(!__builtin_constant_p(times)); \ \ switch (times) { \ - case 32: fn(__VA_ARGS__); /* fall through */ \ - case 31: fn(__VA_ARGS__); /* fall through */ \ - case 30: fn(__VA_ARGS__); /* fall through */ \ - case 29: fn(__VA_ARGS__); /* fall through */ \ - case 28: fn(__VA_ARGS__); /* fall through */ \ - case 27: fn(__VA_ARGS__); /* fall through */ \ - case 26: fn(__VA_ARGS__); /* fall through */ \ - case 25: fn(__VA_ARGS__); /* fall through */ \ - case 24: fn(__VA_ARGS__); /* fall through */ \ - case 23: fn(__VA_ARGS__); /* fall through */ \ - case 22: fn(__VA_ARGS__); /* fall through */ \ - case 21: fn(__VA_ARGS__); /* fall through */ \ - case 20: fn(__VA_ARGS__); /* fall through */ \ - case 19: fn(__VA_ARGS__); /* fall through */ \ - case 18: fn(__VA_ARGS__); /* fall through */ \ - case 17: fn(__VA_ARGS__); /* fall through */ \ - case 16: fn(__VA_ARGS__); /* fall through */ \ - case 15: fn(__VA_ARGS__); /* fall through */ \ - case 14: fn(__VA_ARGS__); /* fall through */ \ - case 13: fn(__VA_ARGS__); /* fall through */ \ - case 12: fn(__VA_ARGS__); /* fall through */ \ - case 11: fn(__VA_ARGS__); /* fall through */ \ - case 10: fn(__VA_ARGS__); /* fall through */ \ - case 9: fn(__VA_ARGS__); /* fall through */ \ - case 8: fn(__VA_ARGS__); /* fall through */ \ - case 7: fn(__VA_ARGS__); /* fall through */ \ - case 6: fn(__VA_ARGS__); /* fall through */ \ - case 5: fn(__VA_ARGS__); /* fall through */ \ - case 4: fn(__VA_ARGS__); /* fall through */ \ - case 3: fn(__VA_ARGS__); /* fall through */ \ - case 2: fn(__VA_ARGS__); /* fall through */ \ - case 1: fn(__VA_ARGS__); /* fall through */ \ + case 32: fn(__VA_ARGS__); fallthrough; \ + case 31: fn(__VA_ARGS__); fallthrough; \ + case 30: fn(__VA_ARGS__); fallthrough; \ + case 29: fn(__VA_ARGS__); fallthrough; \ + case 28: fn(__VA_ARGS__); fallthrough; \ + case 27: fn(__VA_ARGS__); fallthrough; \ + case 26: fn(__VA_ARGS__); fallthrough; \ + case 25: fn(__VA_ARGS__); fallthrough; \ + case 24: fn(__VA_ARGS__); fallthrough; \ + case 23: fn(__VA_ARGS__); fallthrough; \ + case 22: fn(__VA_ARGS__); fallthrough; \ + case 21: fn(__VA_ARGS__); fallthrough; \ + case 20: fn(__VA_ARGS__); fallthrough; \ + case 19: fn(__VA_ARGS__); fallthrough; \ + case 18: fn(__VA_ARGS__); fallthrough; \ + case 17: fn(__VA_ARGS__); fallthrough; \ + case 16: fn(__VA_ARGS__); fallthrough; \ + case 15: fn(__VA_ARGS__); fallthrough; \ + case 14: fn(__VA_ARGS__); fallthrough; \ + case 13: fn(__VA_ARGS__); fallthrough; \ + case 12: fn(__VA_ARGS__); fallthrough; \ + case 11: fn(__VA_ARGS__); fallthrough; \ + case 10: fn(__VA_ARGS__); fallthrough; \ + case 9: fn(__VA_ARGS__); fallthrough; \ + case 8: fn(__VA_ARGS__); fallthrough; \ + case 7: fn(__VA_ARGS__); fallthrough; \ + case 6: fn(__VA_ARGS__); fallthrough; \ + case 5: fn(__VA_ARGS__); fallthrough; \ + case 4: fn(__VA_ARGS__); fallthrough; \ + case 3: fn(__VA_ARGS__); fallthrough; \ + case 2: fn(__VA_ARGS__); fallthrough; \ + case 1: fn(__VA_ARGS__); fallthrough; \ case 0: break; \ \ default: \ diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c index 62bdafbc53f4..9edd7ed7d7bf 100644 --- a/arch/nds32/kernel/fpu.c +++ b/arch/nds32/kernel/fpu.c @@ -45,7 +45,7 @@ void save_fpu(struct task_struct *tsk) : /* no output */ : "r" (&tsk->thread.fpu) : "memory"); - /* fall through */ + fallthrough; case SP32_DP16_reg: asm volatile ("fsdi $fd15, [%0+0x78]\n\t" "fsdi $fd14, [%0+0x70]\n\t" @@ -58,7 +58,7 @@ void save_fpu(struct task_struct *tsk) : /* no output */ : "r" (&tsk->thread.fpu) : "memory"); - /* fall through */ + fallthrough; case SP16_DP8_reg: asm volatile ("fsdi $fd7, [%0+0x38]\n\t" "fsdi $fd6, [%0+0x30]\n\t" @@ -67,7 +67,7 @@ void save_fpu(struct task_struct *tsk) : /* no output */ : "r" (&tsk->thread.fpu) : "memory"); - /* fall through */ + fallthrough; case SP8_DP4_reg: asm volatile ("fsdi $fd3, [%1+0x18]\n\t" "fsdi $fd2, [%1+0x10]\n\t" @@ -108,7 +108,7 @@ void load_fpu(const struct fpu_struct *fpregs) "fldi $fd16, [%0+0x80]\n\t" : /* no output */ : "r" (fpregs)); - /* fall through */ + fallthrough; case SP32_DP16_reg: asm volatile ("fldi $fd15, [%0+0x78]\n\t" "fldi $fd14, [%0+0x70]\n\t" @@ -120,7 +120,7 @@ void load_fpu(const struct fpu_struct *fpregs) "fldi $fd8, [%0+0x40]\n\t" : /* no output */ : "r" (fpregs)); - /* fall through */ + fallthrough; case SP16_DP8_reg: asm volatile ("fldi $fd7, [%0+0x38]\n\t" "fldi $fd6, [%0+0x30]\n\t" @@ -128,7 +128,7 @@ void load_fpu(const struct fpu_struct *fpregs) "fldi $fd4, [%0+0x20]\n\t" : /* no output */ : "r" (fpregs)); - /* fall through */ + fallthrough; case SP8_DP4_reg: asm volatile ("fldi $fd3, [%1+0x18]\n\t" "fldi $fd2, [%1+0x10]\n\t" diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 330b19fcd990..36e25a410bb0 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -316,7 +316,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->uregs[0] = -EINTR; break; } - /* Else, fall through */ + fallthrough; case -ERESTARTNOINTR: regs->uregs[0] = regs->orig_r0; regs->ipc -= 4; @@ -361,7 +361,7 @@ static void do_signal(struct pt_regs *regs) switch (regs->uregs[0]) { case -ERESTART_RESTARTBLOCK: regs->uregs[15] = __NR_restart_syscall; - /* Fall through */ + fallthrough; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 97804f21a40c..c779364f0cd0 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -244,7 +244,7 @@ int do_signal(struct pt_regs *regs, int syscall) switch (retval) { case -ERESTART_RESTARTBLOCK: restart = -2; - /* Fall through */ + fallthrough; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 5df5d4cd5d4c..3c037fc96038 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -502,7 +502,7 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) regs->gr[28] = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: check_syscallno_in_delay_branch(regs); break; diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 43875c289723..a52c7abf2ca4 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -437,7 +437,6 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o break; default: - /* Fall through */ break; } @@ -644,12 +643,12 @@ void notrace handle_interruption(int code, struct pt_regs *regs) case 15: /* Data TLB miss fault/Data page fault */ - /* Fall through */ + fallthrough; case 16: /* Non-access instruction TLB miss fault */ /* The instruction TLB entry needed for the target address of the FIC is absent, and hardware can't find it, so we get to cleanup */ - /* Fall through */ + fallthrough; case 17: /* Non-access data TLB miss fault/Non-access data page fault */ /* FIXME: @@ -673,7 +672,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) handle_unaligned(regs); return; } - /* Fall Through */ + fallthrough; case 26: /* PCXL: Data memory access rights trap */ fault_address = regs->ior; @@ -683,7 +682,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) case 19: /* Data memory break trap */ regs->gr[0] |= PSW_X; /* So we can single-step over the trap */ - /* fall thru */ + fallthrough; case 21: /* Page reference trap */ handle_gdb_break(regs, TRAP_HWBKPT); @@ -730,7 +729,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } mmap_read_unlock(current->mm); } - /* Fall Through */ + fallthrough; case 27: /* Data memory protection ID trap */ if (code == 27 && !user_mode(regs) && diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 4bfe2da9fbe3..716960f5d92e 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -67,7 +67,7 @@ parisc_acctyp(unsigned long code, unsigned int inst) case 0x30000000: /* coproc2 */ if (bit22set(inst)) return VM_WRITE; - /* fall through */ + fallthrough; case 0x0: /* indexed/memory management */ if (bit22set(inst)) { @@ -370,7 +370,7 @@ bad_area: } /* probably address is outside of mapped file */ - /* fall through */ + fallthrough; case 17: /* NA data TLB miss / page fault */ case 18: /* Unaligned access - PCXS only */ signo = SIGBUS; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 16d09b36fe06..78d61f97371e 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -475,7 +475,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; - /* Fall through */ + fallthrough; cond_branch: /* same targets, can avoid doing the test :) */ if (filter[i].jt == filter[i].jf) { diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 17ba190e84a5..e996e08f1061 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -250,7 +250,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->a0 = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->a0 = regs->orig_a0; regs->epc -= 0x4; diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index bc5f2204693f..579575f9cdae 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -1020,7 +1020,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_zext64(dst, ctx); break; } - /* Fallthrough. */ + fallthrough; case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU | BPF_SUB | BPF_X: @@ -1079,7 +1079,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case 16: emit(rv_slli(lo(rd), lo(rd), 16), ctx); emit(rv_srli(lo(rd), lo(rd), 16), ctx); - /* Fallthrough. */ + fallthrough; case 32: if (!ctx->prog->aux->verifier_zext) emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c index f3dc3f25b3ff..143747c45206 100644 --- a/arch/sh/drivers/platform_early.c +++ b/arch/sh/drivers/platform_early.c @@ -246,7 +246,7 @@ static int __init sh_early_platform_driver_probe_id(char *class_str, case EARLY_PLATFORM_ID_ERROR: pr_warn("%s: unable to parse %s parameter\n", class_str, epdrv->pdrv->driver.name); - /* fall-through */ + fallthrough; case EARLY_PLATFORM_ID_UNSET: match = NULL; break; diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c index 08e1af63edd9..34e25a439c81 100644 --- a/arch/sh/kernel/disassemble.c +++ b/arch/sh/kernel/disassemble.c @@ -486,7 +486,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) pr_cont("xd%d", rn & ~1); break; } - /* else, fall through */ + fallthrough; case D_REG_N: pr_cont("dr%d", rn); break; @@ -495,7 +495,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) pr_cont("xd%d", rm & ~1); break; } - /* else, fall through */ + fallthrough; case D_REG_M: pr_cont("dr%d", rm); break; diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 0d5f3c9d52f3..e4147efa9ec6 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -266,7 +266,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->pc = addr; - /* fallthrough */ + fallthrough; case 'D': case 'k': atomic_set(&kgdb_cpu_doing_single_step, -1); diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index a0fbb8427b39..4fe3f00137bc 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -418,7 +418,7 @@ handle_syscall_restart(unsigned long save_r0, struct pt_regs *regs, case -ERESTARTSYS: if (!(sa->sa_flags & SA_RESTART)) goto no_system_call_restart; - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->regs[0] = save_r0; regs->pc -= instruction_size(__raw_readw(regs->pc - 4)); diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c index 4843f48bfe85..774a82b0c649 100644 --- a/arch/sparc/kernel/auxio_64.c +++ b/arch/sparc/kernel/auxio_64.c @@ -87,7 +87,6 @@ void auxio_set_lte(int on) __auxio_sbus_set_lte(on); break; case AUXIO_TYPE_EBUS: - /* FALL-THROUGH */ default: break; } diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c index bfae98ab8638..23f8838dd96e 100644 --- a/arch/sparc/kernel/central.c +++ b/arch/sparc/kernel/central.c @@ -55,7 +55,7 @@ static int clock_board_calc_nslots(struct clock_board *p) else return 5; } - /* Fallthrough */ + fallthrough; default: return 4; } diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 7580775a14b9..58ad3f7de1fb 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->pc = addr; linux_regs->npc = addr + 4; } - /* fall through */ + fallthrough; case 'D': case 'k': diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index 5d6c2d287e85..177746ae2c81 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->tpc = addr; linux_regs->tnpc = addr + 4; } - /* fall through */ + fallthrough; case 'D': case 'k': diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index c0886b400dad..2a12c86af956 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -359,7 +359,7 @@ int __init pcr_arch_init(void) * counter overflow interrupt so we can't make use of * their hardware currently. */ - /* fallthrough */ + fallthrough; default: err = -ENODEV; goto out_unregister; diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index da8902295c8c..3df960c137f7 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -224,7 +224,7 @@ void __init of_console_init(void) case PROMDEV_TTYB: skip = 1; - /* FALLTHRU */ + fallthrough; case PROMDEV_TTYA: type = "serial"; diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index e2c6f0abda00..e9695a06492f 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -646,7 +646,7 @@ static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs case ERESTARTSYS: if (!(sa->sa_flags & SA_RESTART)) goto no_system_call_restart; - /* fallthrough */ + fallthrough; case ERESTARTNOINTR: regs->u_regs[UREG_I0] = orig_i0; regs->tpc -= 4; @@ -686,7 +686,7 @@ void do_signal32(struct pt_regs * regs) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); - /* fall through */ + fallthrough; case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index f1f8c8ebe641..d0e0025ee3ba 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -440,7 +440,7 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, case ERESTARTSYS: if (!(sa->sa_flags & SA_RESTART)) goto no_system_call_restart; - /* fallthrough */ + fallthrough; case ERESTARTNOINTR: regs->u_regs[UREG_I0] = orig_i0; regs->pc -= 4; @@ -506,7 +506,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->pc -= 4; regs->npc -= 4; pt_regs_clear_syscall(regs); - /* fall through */ + fallthrough; case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->pc -= 4; diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 6937339a272c..255264bcb46a 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -461,7 +461,7 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, case ERESTARTSYS: if (!(sa->sa_flags & SA_RESTART)) goto no_system_call_restart; - /* fallthrough */ + fallthrough; case ERESTARTNOINTR: regs->u_regs[UREG_I0] = orig_i0; regs->tpc -= 4; @@ -532,7 +532,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); - /* fall through */ + fallthrough; case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c index 72e560ef4a09..d5beec856146 100644 --- a/arch/sparc/math-emu/math_32.c +++ b/arch/sparc/math-emu/math_32.c @@ -359,7 +359,7 @@ static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs) *pfsr |= (6 << 14); return 0; /* simulate invalid_fp_register exception */ } - /* fall through */ + fallthrough; case 2: if (freg & 1) { /* doublewords must have bit 5 zeroed */ *pfsr |= (6 << 14); @@ -380,7 +380,7 @@ static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs) *pfsr |= (6 << 14); return 0; /* simulate invalid_fp_register exception */ } - /* fall through */ + fallthrough; case 2: if (freg & 1) { /* doublewords must have bit 5 zeroed */ *pfsr |= (6 << 14); @@ -408,13 +408,13 @@ static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs) *pfsr |= (6 << 14); return 0; /* simulate invalid_fp_register exception */ } - /* fall through */ + fallthrough; case 2: if (freg & 1) { /* doublewords must have bit 5 zeroed */ *pfsr |= (6 << 14); return 0; } - /* fall through */ + fallthrough; case 1: rd = (void *)&fregs[freg]; break; diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index c8eabb973b86..b1dbf2fa8c0a 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -491,7 +491,7 @@ void bpf_jit_compile(struct bpf_prog *fp) } else { emit_loadimm(K, r_A); } - /* Fallthrough */ + fallthrough; case BPF_RET | BPF_A: if (seen_or_pass0) { if (i != flen - 1) { diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 3d57c71c532e..88cd9b5c1b74 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -70,7 +70,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) PT_REGS_SYSCALL_RET(regs) = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: PT_REGS_RESTART_SYSCALL(regs); PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 4ff01176c1cc..21d56ae83cdf 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -54,7 +54,7 @@ int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *b /* else */ state = st_wordcmp; opptr = option; - /* fall through */ + fallthrough; case st_wordcmp: if (c == '=' && !*opptr) { @@ -129,7 +129,7 @@ int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option) state = st_wordcmp; opptr = option; wstart = pos; - /* fall through */ + fallthrough; case st_wordcmp: if (!*opptr) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 0048269180d5..dde7cb3724df 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -178,7 +178,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size, } *size = 0; } - /* Fall through */ + fallthrough; default: /* * If w/o offset, only size specified, memmap=nn[KMG] has the diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 50963472ee85..31e6887d24f1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4682,7 +4682,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - /* fall through */ + fallthrough; case INTEL_FAM6_CORE2_MEROM_L: case INTEL_FAM6_CORE2_PENRYN: @@ -5062,7 +5062,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SKYLAKE_X: pmem = true; - /* fall through */ + fallthrough; case INTEL_FAM6_SKYLAKE_L: case INTEL_FAM6_SKYLAKE: case INTEL_FAM6_KABYLAKE_L: @@ -5114,7 +5114,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ICELAKE_X: case INTEL_FAM6_ICELAKE_D: pmem = true; - /* fall through */ + fallthrough; case INTEL_FAM6_ICELAKE_L: case INTEL_FAM6_ICELAKE: case INTEL_FAM6_TIGERLAKE_L: diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 63f58bdf556c..8961653c5dd2 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1268,7 +1268,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) ret = X86_BR_ZERO_CALL; break; } - /* fall through */ + fallthrough; case 0x9a: /* call far absolute */ ret = X86_BR_CALL; break; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c3daf0aaa0ee..cdaab30880b9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -239,7 +239,7 @@ void __init arch_init_ideal_nops(void) return; } - /* fall through */ + fallthrough; default: #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 21325a4a78b9..779a89e31c4c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -800,7 +800,7 @@ static int irq_polarity(int idx) return IOAPIC_POL_HIGH; case MP_IRQPOL_RESERVED: pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); - /* fall through */ + fallthrough; case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_POL_LOW; @@ -848,7 +848,7 @@ static int irq_trigger(int idx) return IOAPIC_EDGE; case MP_IRQTRIG_RESERVED: pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); - /* fall through */ + fallthrough; case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_LEVEL; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 7bda71def557..99ee61c9ba54 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -149,7 +149,7 @@ void __init default_setup_apic_routing(void) break; } /* P4 and above */ - /* fall through */ + fallthrough; case X86_VENDOR_HYGON: case X86_VENDOR_AMD: def_to_bigsmp = 1; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index c7503be92f35..57074cf3ad7c 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -248,7 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, switch (leaf) { case 1: l1 = &l1i; - /* fall through */ + fallthrough; case 0: if (!l1->val) return; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 7843ab3fde09..3a44346f2276 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -199,7 +199,7 @@ static int raise_local(void) * calling irq_enter, but the necessary * machinery isn't exported currently. */ - /*FALL THROUGH*/ + fallthrough; case MCJ_CTX_PROCESS: raise_exception(m, NULL); break; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index d8f9230d2034..abe9fe0fb851 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -193,7 +193,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) if (!atomic_sub_return(1, &cmci_storm_on_cpus)) pr_notice("CMCI storm subsided: switching to interrupt mode\n"); - /* FALLTHROUGH */ + fallthrough; case CMCI_STORM_SUBSIDED: /* diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 72182809b333..ca670919b561 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -98,7 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) case 7: if (size < 0x40) break; - /* Else, fall through */ + fallthrough; case 6: case 5: case 4: diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 8cdf29ffd95f..b98ff620ba77 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -349,7 +349,7 @@ static int arch_build_bp_info(struct perf_event *bp, hw->len = X86_BREAKPOINT_LEN_X; return 0; } - /* fall through */ + fallthrough; default: return -EINVAL; } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 68acd30c6b87..c2f02f308ecf 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -450,7 +450,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->ip = addr; - /* fall through */ + fallthrough; case 'D': case 'k': /* clear the trace bit */ @@ -539,7 +539,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) * a system call which should be ignored */ return NOTIFY_DONE; - /* fall through */ + fallthrough; default: if (user_mode(regs)) return NOTIFY_DONE; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 411af4aa7b51..baa21090c9be 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -312,7 +312,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) case 2: if (i == 0 || i == 13) continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ + fallthrough; default: if (i == 2) continue; /* IRQ2 is never connected */ @@ -356,7 +356,7 @@ static void __init construct_ioapic_table(int mpc_default_type) default: pr_err("???\nUnknown standard configuration %d\n", mpc_default_type); - /* fall through */ + fallthrough; case 1: case 5: memcpy(bus.bustype, "ISA ", 6); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 5679aa3fdcb8..e7537c5440bb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -204,7 +204,7 @@ static int set_segment_reg(struct task_struct *task, case offsetof(struct user_regs_struct, ss): if (unlikely(value == 0)) return -EIO; - /* Else, fall through */ + fallthrough; default: *pt_regs_access(task_pt_regs(task), offset) = value; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0ec7ced727fe..a515e2d230b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -654,7 +654,7 @@ static void native_machine_emergency_restart(void) case BOOT_CF9_FORCE: port_cf9_safe = true; - /* Fall through */ + fallthrough; case BOOT_CF9_SAFE: if (port_cf9_safe) { diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d5fa494c2304..be0d7d4152ec 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -726,7 +726,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->ax = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->ax = regs->orig_ax; regs->ip -= 2; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 15e5aad8ac2c..3fdaa042823d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -735,7 +735,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * OPCODE1() of the "short" jmp which checks the same condition. */ opc1 = OPCODE2(insn) - 0x10; - /* fall through */ + fallthrough; default: if (!is_cond_jmp_opcode(opc1)) return -ENOSYS; @@ -892,7 +892,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, fix_ip_or_call = 0; break; } - /* fall through */ + fallthrough; default: riprel_analyze(auprobe, &insn); } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d0e2825ae617..5299ef5ff18d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3016,7 +3016,7 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) case 0xa4: /* movsb */ case 0xa5: /* movsd/w */ *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1; - /* fall through */ + fallthrough; case 0xaa: /* stosb */ case 0xab: /* stosd/w */ *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 814d3aee5cef..1d330564eed8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1779,7 +1779,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); if (ret != HV_STATUS_INVALID_PORT_ID) break; - /* fall through - maybe userspace knows this conn_id. */ + fallthrough; /* maybe userspace knows this conn_id */ case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index c47d2acec529..4aa1c2e00e2a 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -285,7 +285,7 @@ int kvm_set_routing_entry(struct kvm *kvm, switch (ue->u.irqchip.irqchip) { case KVM_IRQCHIP_PIC_SLAVE: e->irqchip.pin += PIC_NUM_PINS / 2; - /* fall through */ + fallthrough; case KVM_IRQCHIP_PIC_MASTER: if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2) return -EINVAL; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5ccbee7165a2..35cca2e0c802 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1053,7 +1053,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, switch (delivery_mode) { case APIC_DM_LOWEST: vcpu->arch.apic_arb_prio++; - /* fall through */ + fallthrough; case APIC_DM_FIXED: if (unlikely(trig_mode && !level)) break; @@ -1341,7 +1341,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; case APIC_TASKPRI: report_tpr_access(apic, false); - /* fall thru */ + fallthrough; default: val = kvm_lapic_get_reg(apic, offset); break; @@ -2027,7 +2027,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT0: apic_manage_nmi_watchdog(apic, val); - /* fall through */ + fallthrough; case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a5d0207e7189..43fdb0c12a5d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4422,7 +4422,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; - /* fall through */ + fallthrough; case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 03dd7bac8034..0194336b64a4 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2668,7 +2668,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); - /* Fall through */ + fallthrough; default: return kvm_set_msr_common(vcpu, msr); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46ba2e03a892..819c185adf09 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4654,7 +4654,7 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) vmcs_read32(VM_EXIT_INSTRUCTION_LEN); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) return false; - /* fall through */ + fallthrough; case DB_VECTOR: return !(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)); @@ -4827,7 +4827,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) } kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); - /* fall through */ + fallthrough; case BP_VECTOR: /* * Update instruction length as we may reinject #BP from @@ -5257,7 +5257,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) error_code = vmcs_read32(IDT_VECTORING_ERROR_CODE); } - /* fall through */ + fallthrough; case INTR_TYPE_SOFT_EXCEPTION: kvm_clear_exception_queue(vcpu); break; @@ -5610,7 +5610,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) * keeping track of global entries in shadow page tables. */ - /* fall-through */ + fallthrough; case INVPCID_TYPE_ALL_INCL_GLOBAL: kvm_mmu_unload(vcpu); return kvm_skip_emulated_instruction(vcpu); @@ -6578,7 +6578,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, break; case INTR_TYPE_SOFT_EXCEPTION: vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ + fallthrough; case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { u32 err = vmcs_read32(error_code_field); @@ -6588,7 +6588,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, break; case INTR_TYPE_SOFT_INTR: vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ + fallthrough; case INTR_TYPE_EXT_INTR: kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33945283fe07..d39d6cf1d473 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1116,14 +1116,12 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) vcpu->arch.eff_db[dr] = val; break; case 4: - /* fall through */ case 6: if (!kvm_dr6_valid(val)) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); break; case 5: - /* fall through */ default: /* 7 */ if (!kvm_dr7_valid(val)) return -1; /* #GP */ @@ -1154,12 +1152,10 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: - /* fall through */ case 6: *val = vcpu->arch.dr6; break; case 5: - /* fall through */ default: /* 7 */ *val = vcpu->arch.dr7; break; @@ -3051,7 +3047,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: - pr = true; /* fall through */ + pr = true; + fallthrough; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: if (kvm_pmu_is_valid_msr(vcpu, msr)) @@ -4359,7 +4356,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_HYPERV_SYNIC2: if (cap->args[0]) return -EINVAL; - /* fall through */ + fallthrough; case KVM_CAP_HYPERV_SYNIC: if (!irqchip_in_kernel(vcpu->kvm)) @@ -8672,7 +8669,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - /* fall through */ + fallthrough; case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 4f1719e22d3c..b6da09339308 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -58,7 +58,7 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, state = st_wordcmp; opptr = option; wstart = pos; - /* fall through */ + fallthrough; case st_wordcmp: if (!*opptr) { @@ -89,7 +89,7 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, break; } state = st_wordskip; - /* fall through */ + fallthrough; case st_wordskip: if (!c) @@ -151,7 +151,7 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, state = st_wordcmp; opptr = option; - /* fall through */ + fallthrough; case st_wordcmp: if ((c == '=') && !*opptr) { @@ -172,7 +172,7 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, break; } state = st_wordskip; - /* fall through */ + fallthrough; case st_wordskip: if (myisspace(c)) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 31600d851fd8..5e69603ff63f 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -179,7 +179,7 @@ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) if (insn->addr_bytes == 2) return -EINVAL; - /* fall through */ + fallthrough; case -EDOM: case offsetof(struct pt_regs, bx): @@ -362,7 +362,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) case INAT_SEG_REG_GS: return vm86regs->gs; case INAT_SEG_REG_IGNORE: - /* fall through */ default: return -EINVAL; } @@ -386,7 +385,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) */ return get_user_gs(regs); case INAT_SEG_REG_IGNORE: - /* fall through */ default: return -EINVAL; } @@ -786,7 +784,7 @@ int insn_get_code_seg_params(struct pt_regs *regs) */ return INSN_CODE_SEG_PARAMS(4, 8); case 3: /* Invalid setting. CS.L=1, CS.D=1 */ - /* fall through */ + fallthrough; default: return -EINVAL; } diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c index 73dc66d887f3..ec071cbb0804 100644 --- a/arch/x86/math-emu/errors.c +++ b/arch/x86/math-emu/errors.c @@ -186,7 +186,7 @@ void FPU_printall(void) case TAG_Special: /* Update tagi for the printk below */ tagi = FPU_Special(r); - /* fall through */ + fallthrough; case TAG_Valid: printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, getsign(r) ? '-' : '+', diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c index 127ea54122d7..4a9887851ad8 100644 --- a/arch/x86/math-emu/fpu_trig.c +++ b/arch/x86/math-emu/fpu_trig.c @@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) case TW_Denormal: if (denormal_operand() < 0) return; - /* fall through */ + fallthrough; case TAG_Zero: case TAG_Valid: setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 84d85dbd1dad..9e5ccc56f8e0 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -574,7 +574,7 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr, /* For SEV, these areas are encrypted */ if (sev_active()) break; - /* Fallthrough */ + fallthrough; case E820_TYPE_PRAM: return true; diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 76cee341507b..b3b17d6c50f0 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -448,7 +448,7 @@ static void do_signal(struct pt_regs *regs) regs->areg[2] = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->areg[2] = regs->syscall; regs->pc -= 3; -- cgit v1.2.3 From 12564485ed8caac3c18572793ec01330792c7191 Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Fri, 21 Aug 2020 13:55:56 -0500 Subject: Revert "powerpc/64s: Remove PROT_SAO support" This reverts commit 5c9fa16e8abd342ce04dc830c1ebb2a03abf6c05. Since PROT_SAO can still be useful for certain classes of software, reintroduce it. Concerns about guest migration for LPARs using SAO will be addressed next. Signed-off-by: Shawn Anastasio Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821185558.35561-2-shawn@anastas.io --- arch/powerpc/include/asm/book3s/64/pgtable.h | 8 +++----- arch/powerpc/include/asm/cputable.h | 10 +++++----- arch/powerpc/include/asm/mman.h | 26 ++++++++++++++++++++++---- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 ++ arch/powerpc/include/uapi/asm/mman.h | 2 +- arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 2 ++ 7 files changed, 36 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6de56c3b33c4..495fc0ccb453 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -20,13 +20,9 @@ #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ - -#define _PAGE_CACHE_CTL 0x00030 /* Bits for the folowing cache modes */ - /* No bits set is normal cacheable memory */ - /* 0x00010 unused, is SAO bit on radix POWER9 */ +#define _PAGE_SAO 0x00010 /* Strong access order */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ #define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ - #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ /* @@ -828,6 +824,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, return hash__set_pte_at(mm, addr, ptep, pte, percpu); } +#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t prot) { diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index e005b4581023..32a15dc49e8c 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -196,7 +196,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) #define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) -// Free LONG_ASM_CONST(0x0000000008000000) +#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) @@ -441,7 +441,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \ + CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX ) @@ -450,7 +450,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ @@ -461,7 +461,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ @@ -479,7 +479,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 7c07728af300..4ba303ea27f5 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,20 +13,38 @@ #include #include -#ifdef CONFIG_PPC_MEM_KEYS static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { - return pkey_to_vmflag_bits(pkey); +#ifdef CONFIG_PPC_MEM_KEYS + return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); +#else + return ((prot & PROT_SAO) ? VM_SAO : 0); +#endif } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { - return __pgprot(vmflag_to_pte_pkey_bits(vm_flags)); +#ifdef CONFIG_PPC_MEM_KEYS + return (vm_flags & VM_SAO) ? + __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : + __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); +#else + return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); +#endif } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) -#endif + +static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) +{ + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) + return false; + if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) + return false; + return true; +} +#define arch_validate_prot arch_validate_prot #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 59ee9fa4ae09..6cb8aa357191 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -82,6 +82,8 @@ */ #include +#define _PAGE_SAO 0 + #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) /* diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 3a700351feca..c0c737215b00 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -11,7 +11,7 @@ #include -#define PROT_SAO 0x10 /* Unsupported since v5.9 */ +#define PROT_SAO 0x10 /* Strong Access Ordering */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8dc46f38680b..f204ad79b6b5 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -653,7 +653,7 @@ static struct dt_cpu_feature_match __initdata {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-utilization-of-resources-register", feat_enable_purr, 0}, {"no-execute", feat_enable, 0}, - /* strong-access-ordering is unused */ + {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, {"cache-inhibited-large-page", feat_enable_large_ci, 0}, {"coprocessor-icswx", feat_enable, 0}, {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 890a71c5293e..c663e7ba801f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -232,6 +232,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= HPTE_R_I; else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); else /* * Add memory coherence if cache inhibited is not set -- cgit v1.2.3 From 9b725a90a8f127802e19466d4e336e701bcea0d2 Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Fri, 21 Aug 2020 13:55:57 -0500 Subject: powerpc/64s: Disallow PROT_SAO in LPARs by default Since migration of guests using SAO to ISA 3.1 hosts may cause issues, disable PROT_SAO in LPARs by default and introduce a new Kconfig option PPC_PROT_SAO_LPAR to allow users to enable it if desired. Signed-off-by: Shawn Anastasio Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821185558.35561-3-shawn@anastas.io --- arch/powerpc/Kconfig | 12 ++++++++++++ arch/powerpc/include/asm/mman.h | 9 +++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f48bbfb3ce9..65bed1fdeaad 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -860,6 +860,18 @@ config PPC_SUBPAGE_PROT If unsure, say N here. +config PPC_PROT_SAO_LPAR + bool "Support PROT_SAO mappings in LPARs" + depends on PPC_BOOK3S_64 + help + This option adds support for PROT_SAO mappings from userspace + inside LPARs on supported CPUs. + + This may cause issues when performing guest migration from + a CPU that supports SAO to one that does not. + + If unsure, say N here. + config PPC_COPRO_BASE bool diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 4ba303ea27f5..7cb6d18f5cd6 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -40,8 +40,13 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) return false; - if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return false; + if (prot & PROT_SAO) { + if (!cpu_has_feature(CPU_FTR_SAO)) + return false; + if (firmware_has_feature(FW_FEATURE_LPAR) && + !IS_ENABLED(CONFIG_PPC_PROT_SAO_LPAR)) + return false; + } return true; } #define arch_validate_prot arch_validate_prot -- cgit v1.2.3 From bf9282dc26e7fe2a0736edc568762f0f05d12416 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 12:22:17 +0200 Subject: cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic This allows moving the leave_mm() call into generic code before rcu_idle_enter(). Gets rid of more trace_*_rcuidle() users. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.369441600@infradead.org --- arch/x86/include/asm/mmu.h | 1 + arch/x86/mm/tlb.c | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 0a301ad0b02f..9257667d13c5 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -59,5 +59,6 @@ typedef struct { } void leave_mm(int cpu); +#define leave_mm leave_mm #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 1a3569b43aa5..0951b47e64c1 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -555,21 +555,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, true); - /* - * NB: This gets called via leave_mm() in the idle path - * where RCU functions differently. Tracing normally - * uses RCU, so we need to use the _rcuidle variant. - * - * (There is no good reason for this. The idle code should - * be rearranged to call this before rcu_idle_enter().) - */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ load_new_mm_cr3(next->pgd, new_asid, false); - /* See above wrt _rcuidle. */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); } /* Make sure we write CR3 before loaded_mm. */ -- cgit v1.2.3 From 9864f5b5943ab0f1f835f21dc3f9f068d06f5b52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 12:27:10 +0200 Subject: cpuidle: Move trace_cpu_idle() into generic code Remove trace_cpu_idle() from the arch_cpu_idle() implementations and put it in the generic code, right before disabling RCU. Gets rid of more trace_*_rcuidle() users. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.428433395@infradead.org --- arch/arm/mach-omap2/pm34xx.c | 4 ---- arch/arm64/kernel/process.c | 2 -- arch/s390/kernel/idle.c | 3 +-- arch/x86/kernel/process.c | 4 ---- 4 files changed, 1 insertion(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 6df395fff971..f5dfddf492e2 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -298,11 +298,7 @@ static void omap3_pm_idle(void) if (omap_irq_pending()) return; - trace_cpu_idle_rcuidle(1, smp_processor_id()); - omap_sram_idle(); - - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_SUSPEND diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b63ce4c54cfe..f1804496b935 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -123,10 +123,8 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - trace_cpu_idle_rcuidle(1, smp_processor_id()); cpu_do_idle(); local_irq_enable(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 88bb42ca5008..c73f50649e7e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -33,14 +33,13 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); - trace_cpu_idle_rcuidle(1, smp_processor_id()); local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); local_irq_restore(flags); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); /* Account time spent with enabled wait psw loaded as idle time. */ + /* XXX seqcount has tracepoints that require RCU */ write_seqcount_begin(&idle->seqcount); idle_time = idle->clock_idle_exit - idle->clock_idle_enter; idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994d8393f2f7..13ce616cc7af 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -684,9 +684,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); safe_halt(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -792,7 +790,6 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -804,7 +801,6 @@ static __cpuidle void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } -- cgit v1.2.3 From 7da93f379330f2be1122ca7f54ab1eb44ef9aa59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 19:28:07 +0200 Subject: x86/entry: Remove unused THUNKs Unused remnants Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.487040689@infradead.org --- arch/x86/entry/thunk_32.S | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 3a07ce3ec70b..f1f96d4d8cd6 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -29,11 +29,6 @@ SYM_CODE_START_NOALIGN(\name) SYM_CODE_END(\name) .endm -#ifdef CONFIG_TRACE_IRQFLAGS - THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 - THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 -#endif - #ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace -- cgit v1.2.3 From 36206b588bc815e5f64e8da72d7ab79e00b76281 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Aug 2020 09:27:52 +0200 Subject: nds32: Implement arch_irqs_disabled() Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.604899379@infradead.org --- arch/nds32/include/asm/irqflags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/nds32/include/asm/irqflags.h b/arch/nds32/include/asm/irqflags.h index fb45ec46bb1b..51ef800bb301 100644 --- a/arch/nds32/include/asm/irqflags.h +++ b/arch/nds32/include/asm/irqflags.h @@ -34,3 +34,8 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) { return !flags; } + +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} -- cgit v1.2.3 From 021c109330ebc1f54b546c63a078ea3c31356ecb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Aug 2020 10:40:49 +0200 Subject: arm64: Implement arch_irqs_disabled() Cc: Catalin Marinas Cc: Will Deacon Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/20200821085348.664425120@infradead.org --- arch/arm64/include/asm/irqflags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index aa4b6521ef14..ff328e5bbb75 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -95,6 +95,11 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) return res; } +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + static inline unsigned long arch_local_irq_save(void) { unsigned long flags; -- cgit v1.2.3 From 99dc56feb7932020502d40107a712fa302b32082 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 22 Aug 2020 18:04:15 +0200 Subject: mips: Implement arch_irqs_disabled() Cc: Thomas Bogendoerfer Cc: Paul Burton Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200826101653.GE1362448@hirez.programming.kicks-ass.net --- arch/mips/include/asm/irqflags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h index 47a8ffc0b413..f5b8300f4573 100644 --- a/arch/mips/include/asm/irqflags.h +++ b/arch/mips/include/asm/irqflags.h @@ -137,6 +137,11 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) return !(flags & 1); } +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + #endif /* #ifndef __ASSEMBLY__ */ /* -- cgit v1.2.3 From 044d0d6de9f50192f9697583504a382347ee95ca Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 23 Jul 2020 20:56:14 +1000 Subject: lockdep: Only trace IRQ edges Problem: raw_local_irq_save(); // software state on local_irq_save(); // software state off ... local_irq_restore(); // software state still off, because we don't enable IRQs raw_local_irq_restore(); // software state still off, *whoopsie* existing instances: - lock_acquire() raw_local_irq_save() __lock_acquire() arch_spin_lock(&graph_lock) pv_wait() := kvm_wait() (same or worse for Xen/HyperV) local_irq_save() - trace_clock_global() raw_local_irq_save() arch_spin_lock() pv_wait() := kvm_wait() local_irq_save() - apic_retrigger_irq() raw_local_irq_save() apic->send_IPI() := default_send_IPI_single_phys() local_irq_save() Possible solutions: A) make it work by enabling the tracing inside raw_*() B) make it work by keeping tracing disabled inside raw_*() C) call it broken and clean it up now Now, given that the only reason to use the raw_* variant is because you don't want tracing. Therefore A) seems like a weird option (although it can be done). C) is tempting, but OTOH it ends up converting a _lot_ of code to raw just because there is one raw user, this strips the validation/tracing off for all the other users. So we pick B) and declare any code that ends up doing: raw_local_irq_save() local_irq_save() lockdep_assert_irqs_disabled(); broken. AFAICT this problem has existed forever, the only reason it came up is because commit: 859d069ee1dd ("lockdep: Prepare for NMI IRQ state tracking") changed IRQ tracing vs lockdep recursion and the first instance is fairly common, the other cases hardly ever happen. Signed-off-by: Nicholas Piggin [rewrote changelog] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200723105615.1268126-1-npiggin@gmail.com --- arch/powerpc/include/asm/hw_irq.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3a0db7b0b46e..35060be09073 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void) #define powerpc_local_irq_pmu_save(flags) \ do { \ raw_local_irq_pmu_save(flags); \ - trace_hardirqs_off(); \ + if (!raw_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while(0) #define powerpc_local_irq_pmu_restore(flags) \ do { \ - if (raw_irqs_disabled_flags(flags)) { \ - raw_local_irq_pmu_restore(flags); \ - trace_hardirqs_off(); \ - } else { \ + if (!raw_irqs_disabled_flags(flags)) \ trace_hardirqs_on(); \ - raw_local_irq_pmu_restore(flags); \ - } \ + raw_local_irq_pmu_restore(flags); \ } while(0) #else #define powerpc_local_irq_pmu_save(flags) \ -- cgit v1.2.3 From 1196f12a2c960951d02262af25af0bb1775ebcc2 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 20 Aug 2020 09:48:23 +0200 Subject: s390: don't trace preemption in percpu macros Since commit a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") the lockdep code itself uses percpu variables. This leads to recursions because the percpu macros are calling preempt_enable() which might call trace_preempt_on(). Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/percpu.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 50b4ce8cddfd..918f0ba4f4d2 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -29,7 +29,7 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ old__, new__, prev__; \ pcp_op_T__ *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ prev__ = *ptr__; \ do { \ @@ -37,7 +37,7 @@ new__ = old__ op (val); \ prev__ = cmpxchg(ptr__, old__, new__); \ } while (prev__ != old__); \ - preempt_enable(); \ + preempt_enable_notrace(); \ new__; \ }) @@ -68,7 +68,7 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ if (__builtin_constant_p(val__) && \ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ @@ -84,7 +84,7 @@ : [val__] "d" (val__) \ : "cc"); \ } \ - preempt_enable(); \ + preempt_enable_notrace(); \ } #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) @@ -95,14 +95,14 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ - preempt_enable(); \ + preempt_enable_notrace(); \ old__ + val__; \ }) @@ -114,14 +114,14 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } #define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan") @@ -136,10 +136,10 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ ret__; \ pcp_op_T__ *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = cmpxchg(ptr__, oval, nval); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) @@ -152,10 +152,10 @@ ({ \ typeof(pcp) *ptr__; \ typeof(pcp) ret__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = xchg(ptr__, nval); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) @@ -171,11 +171,11 @@ typeof(pcp1) *p1__; \ typeof(pcp2) *p2__; \ int ret__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ p1__ = raw_cpu_ptr(&(pcp1)); \ p2__ = raw_cpu_ptr(&(pcp2)); \ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) -- cgit v1.2.3 From bffc2f7aa96343f91931272d7a8a2d8d925e1ab2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 21 Aug 2020 18:27:36 +0200 Subject: s390/vmem: fix vmem_add_range for 4-level paging The kernel currently crashes if 4-level paging is used. Add missing p4d_populate for just allocated pud entry. Fixes: 3e0d3e408e63 ("s390/vmem: consolidate vmem_add_range() and vmem_remove_range()") Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/mm/vmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 1aed1a4dfc2d..eddf71c22875 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -402,6 +402,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); if (!pud) goto out; + p4d_populate(&init_mm, p4d, pud); } ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) -- cgit v1.2.3 From 52d6b926aabc47643cd910c85edb262b7f44c168 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 26 Aug 2020 21:12:10 -0700 Subject: x86/hotplug: Silence APIC only after all interrupts are migrated There is a race when taking a CPU offline. Current code looks like this: native_cpu_disable() { ... apic_soft_disable(); /* * Any existing set bits for pending interrupt to * this CPU are preserved and will be sent via IPI * to another CPU by fixup_irqs(). */ cpu_disable_common(); { .... /* * Race window happens here. Once local APIC has been * disabled any new interrupts from the device to * the old CPU are lost */ fixup_irqs(); // Too late to capture anything in IRR. ... } } The fix is to disable the APIC *after* cpu_disable_common(). Testing was done with a USB NIC that provided a source of frequent interrupts. A script migrated interrupts to a specific CPU and then took that CPU offline. Fixes: 60dcaad5736f ("x86/hotplug: Silence APIC and NMI when CPU is dead") Reported-by: Evan Green Signed-off-by: Ashok Raj Signed-off-by: Thomas Gleixner Tested-by: Mathias Nyman Tested-by: Evan Green Reviewed-by: Evan Green Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/875zdarr4h.fsf@nanos.tec.linutronix.de/ Link: https://lore.kernel.org/r/1598501530-45821-1-git-send-email-ashok.raj@intel.com --- arch/x86/kernel/smpboot.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 27aa04a95702..f5ef689dd62a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1594,14 +1594,28 @@ int native_cpu_disable(void) if (ret) return ret; - /* - * Disable the local APIC. Otherwise IPI broadcasts will reach - * it. It still responds normally to INIT, NMI, SMI, and SIPI - * messages. - */ - apic_soft_disable(); cpu_disable_common(); + /* + * Disable the local APIC. Otherwise IPI broadcasts will reach + * it. It still responds normally to INIT, NMI, SMI, and SIPI + * messages. + * + * Disabling the APIC must happen after cpu_disable_common() + * which invokes fixup_irqs(). + * + * Disabling the APIC preserves already set bits in IRR, but + * an interrupt arriving after disabling the local APIC does not + * set the corresponding IRR bit. + * + * fixup_irqs() scans IRR for set bits so it can raise a not + * yet handled interrupt on the new destination CPU via an IPI + * but obviously it can't do so for IRR bits which are not set. + * IOW, interrupts arriving after disabling the local APIC will + * be lost. + */ + apic_soft_disable(); + return 0; } -- cgit v1.2.3 From e027fffff799cdd70400c5485b1a54f482255985 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 22:21:44 +0200 Subject: x86/irq: Unbreak interrupt affinity setting Several people reported that 5.8 broke the interrupt affinity setting mechanism. The consolidation of the entry code reused the regular exception entry code for device interrupts and changed the way how the vector number is conveyed from ptregs->orig_ax to a function argument. The low level entry uses the hardware error code slot to push the vector number onto the stack which is retrieved from there into a function argument and the slot on stack is set to -1. The reason for setting it to -1 is that the error code slot is at the position where pt_regs::orig_ax is. A positive value in pt_regs::orig_ax indicates that the entry came via a syscall. If it's not set to a negative value then a signal delivery on return to userspace would try to restart a syscall. But there are other places which rely on pt_regs::orig_ax being a valid indicator for syscall entry. But setting pt_regs::orig_ax to -1 has a nasty side effect vs. the interrupt affinity setting mechanism, which was overlooked when this change was made. Moving interrupts on x86 happens in several steps. A new vector on a different CPU is allocated and the relevant interrupt source is reprogrammed to that. But that's racy and there might be an interrupt already in flight to the old vector. So the old vector is preserved until the first interrupt arrives on the new vector and the new target CPU. Once that happens the old vector is cleaned up, but this cleanup still depends on the vector number being stored in pt_regs::orig_ax, which is now -1. That -1 makes the check for cleanup: pt_regs::orig_ax == new_vector always false. As a consequence the interrupt is moved once, but then it cannot be moved anymore because the cleanup of the old vector never happens. There would be several ways to convey the vector information to that place in the guts of the interrupt handling, but on deeper inspection it turned out that this check is pointless and a leftover from the old affinity model of X86 which supported multi-CPU affinities. Under this model it was possible that an interrupt had an old and a new vector on the same CPU, so the vector match was required. Under the new model the effective affinity of an interrupt is always a single CPU from the requested affinity mask. If the affinity mask changes then either the interrupt stays on the CPU and on the same vector when that CPU is still in the new affinity mask or it is moved to a different CPU, but it is never moved to a different vector on the same CPU. Ergo the cleanup check for the matching vector number is not required and can be removed which makes the dependency on pt_regs:orig_ax go away. The remaining check for new_cpu == smp_processsor_id() is completely sufficient. If it matches then the interrupt was successfully migrated and the cleanup can proceed. For paranoia sake add a warning into the vector assignment code to validate that the assumption of never moving to a different vector on the same CPU holds. Fixes: 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs") Reported-by: Alex bykov Reported-by: Avi Kivity Reported-by: Alexander Graf Signed-off-by: Thomas Gleixner Tested-by: Alexander Graf Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87wo1ltaxz.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/apic/vector.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index dae32d948bf2..f8a56b5dc29f 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -161,6 +161,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->move_in_progress = true; apicd->prev_vector = apicd->vector; apicd->prev_cpu = apicd->cpu; + WARN_ON_ONCE(apicd->cpu == newcpu); } else { irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, managed); @@ -910,7 +911,7 @@ void send_cleanup_vector(struct irq_cfg *cfg) __send_cleanup_vector(apicd); } -static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) +void irq_complete_move(struct irq_cfg *cfg) { struct apic_chip_data *apicd; @@ -918,15 +919,16 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) if (likely(!apicd->move_in_progress)) return; - if (vector == apicd->vector && apicd->cpu == smp_processor_id()) + /* + * If the interrupt arrived on the new target CPU, cleanup the + * vector on the old target CPU. A vector check is not required + * because an interrupt can never move from one vector to another + * on the same CPU. + */ + if (apicd->cpu == smp_processor_id()) __send_cleanup_vector(apicd); } -void irq_complete_move(struct irq_cfg *cfg) -{ - __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); -} - /* * Called from fixup_irqs() with @desc->lock held and interrupts disabled. */ -- cgit v1.2.3 From e5fe56092e753c50093c60e757561984abff335e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Aug 2020 17:53:09 +1000 Subject: powerpc/64s: scv entry should set PPR Kernel entry sets PPR to HMT_MEDIUM by convention. The scv entry path missed this. Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200825075309.224184-1-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 33a42e42c56f..733e40eba4eb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -113,6 +113,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ +BEGIN_FTR_SECTION + HMT_MEDIUM +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + /* * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which * would clobber syscall parameters. Also we always enter with IRQs -- cgit v1.2.3 From b91eb5182405b01a8aeb42e9b5207831767e97ee Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Aug 2020 19:34:24 +1000 Subject: powerpc/64s: Fix crash in load_fp_state() due to fpexc_mode The recent commit 01eb01877f33 ("powerpc/64s: Fix restore_math unnecessarily changing MSR") changed some of the handling of floating point/vector restore. In particular it caused current->thread.fpexc_mode to be copied into the current MSR (via msr_check_and_set()), rather than just into regs->msr (which is moved into MSR on return to userspace). This can lead to a crash in the kernel if we take a floating point exception when restoring FPSCR: Oops: Exception in kernel mode, sig: 8 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 3 PID: 101213 Comm: ld64.so.2 Not tainted 5.9.0-rc1-00098-g18445bf405cb-dirty #9 NIP: c00000000000fbb4 LR: c00000000001a7ac CTR: c000000000183570 REGS: c0000016b7cfb3b0 TRAP: 0700 Not tainted (5.9.0-rc1-00098-g18445bf405cb-dirty) MSR: 900000000290b933 CR: 44002444 XER: 00000000 CFAR: c00000000001a7a8 IRQMASK: 1 GPR00: c00000000001ae40 c0000016b7cfb640 c0000000011b7f00 c000001542a0f740 GPR04: c000001542a0f720 c000001542a0eb00 0000000000000900 c000001542a0eb00 GPR08: 000000000000000a 0000000000002000 9000000000009033 0000000000000000 GPR12: 0000000000004000 c0000017ffffd900 0000000000000001 c000000000df5a58 GPR16: c000000000e19c18 c0000000010e1123 0000000000000001 c000000000e1a638 GPR20: 0000000000000000 c0000000044b1d00 0000000000000000 c000001542a0f2a0 GPR24: 00000016c7fe0000 c000001542a0f720 c000000001c93da0 c000000000fe5f28 GPR28: c000001542a0f720 0000000000800000 c0000016b7cfbe90 0000000002802900 NIP load_fp_state+0x4/0x214 LR restore_math+0x17c/0x1f0 Call Trace: 0xc0000016b7cfb680 (unreliable) __switch_to+0x330/0x460 __schedule+0x318/0x920 schedule+0x74/0x140 schedule_timeout+0x318/0x3f0 wait_for_completion+0xc8/0x210 call_usermodehelper_exec+0x234/0x280 do_coredump+0xedc/0x13c0 get_signal+0x1d4/0xbe0 do_notify_resume+0x1a0/0x490 interrupt_exit_user_prepare+0x1c4/0x230 interrupt_return+0x14/0x1c0 Instruction dump: ebe10168 e88101a0 7c8ff120 382101e0 e8010010 7c0803a6 4e800020 790605c4 782905c4 7c0008a8 7c0008a8 c8030200 48000088 c8030000 c8230010 Fix it by only loading the fpexc_mode value into regs->msr. Also add a comment to explain that although VSX is subject to the value of fpexc_mode, we don't have to handle that separately because we only allow VSX to be enabled if FP is also enabled. Fixes: 01eb01877f33 ("powerpc/64s: Fix restore_math unnecessarily changing MSR") Reported-by: Milton Miller Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Link: https://lore.kernel.org/r/20200825093424.3967813-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 016bd831908e..73a57043ee66 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -548,7 +548,7 @@ void notrace restore_math(struct pt_regs *regs) * are live for the user thread). */ if ((!(msr & MSR_FP)) && should_restore_fp()) - new_msr |= MSR_FP | current->thread.fpexc_mode; + new_msr |= MSR_FP; if ((!(msr & MSR_VEC)) && should_restore_altivec()) new_msr |= MSR_VEC; @@ -559,11 +559,17 @@ void notrace restore_math(struct pt_regs *regs) } if (new_msr) { + unsigned long fpexc_mode = 0; + msr_check_and_set(new_msr); - if (new_msr & MSR_FP) + if (new_msr & MSR_FP) { do_restore_fp(); + // This also covers VSX, because VSX implies FP + fpexc_mode = current->thread.fpexc_mode; + } + if (new_msr & MSR_VEC) do_restore_altivec(); @@ -572,7 +578,7 @@ void notrace restore_math(struct pt_regs *regs) msr_check_and_clear(new_msr); - regs->msr |= new_msr; + regs->msr |= new_msr | fpexc_mode; } } #endif -- cgit v1.2.3 From b460b512417ae9c8b51a3bdcc09020cd6c60ff69 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 2 Jun 2020 12:56:12 +1000 Subject: powerpc/perf: Fix crashes with generic_compat_pmu & BHRB The bhrb_filter_map ("The Branch History Rolling Buffer") callback is only defined in raw CPUs' power_pmu structs. The "architected" CPUs use generic_compat_pmu, which does not have this callback, and crashes occur if a user tries to enable branch stack for an event. This add a NULL pointer check for bhrb_filter_map() which behaves as if the callback returned an error. This does not add the same check for config_bhrb() as the only caller checks for cpuhw->bhrb_users which remains zero if bhrb_filter_map==0. Fixes: be80e758d0c2 ("powerpc/perf: Add generic compat mode pmu driver") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Alexey Kardashevskiy Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200602025612.62707-1-aik@ozlabs.ru --- arch/powerpc/perf/core-book3s.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 93d20e1ed845..08643cba1494 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1557,9 +1557,16 @@ nocheck: ret = 0; out: if (has_branch_stack(event)) { - power_pmu_bhrb_enable(event); - cpuhw->bhrb_filter = ppmu->bhrb_filter_map( - event->attr.branch_sample_type); + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + + if (bhrb_filter != -1) { + cpuhw->bhrb_filter = bhrb_filter; + power_pmu_bhrb_enable(event); + } } perf_pmu_enable(event->pmu); @@ -1881,7 +1888,6 @@ static int power_pmu_event_init(struct perf_event *event) int n; int err; struct cpu_hw_events *cpuhw; - u64 bhrb_filter; if (!ppmu) return -ENOENT; @@ -1987,7 +1993,10 @@ static int power_pmu_event_init(struct perf_event *event) err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { - bhrb_filter = ppmu->bhrb_filter_map( + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); if (bhrb_filter == -1) { -- cgit v1.2.3 From 82715a0f332843d3a1830d7ebc9ac7c99a00c880 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 26 Aug 2020 02:40:29 -0400 Subject: powerpc/perf: Fix reading of MSR[HV/PR] bits in trace-imc IMC trace-mode uses MSR[HV/PR] bits to set the cpumode for the instruction pointer captured in each sample. The bits are fetched from the third double word of the trace record. Reading third double word from IMC trace record should use be64_to_cpu() along with READ_ONCE inorder to fetch correct MSR[HV/PR] bits. Patch addresses this change. Currently we are using PERF_RECORD_MISC_HYPERVISOR as cpumode if MSR HV is 1 and PR is 0 which means the address is from host counter. But using PERF_RECORD_MISC_HYPERVISOR for host counter data will fail to resolve the address -> symbol during "perf report" because perf tools side uses PERF_RECORD_MISC_KERNEL to represent the host counter data. Therefore, fix the trace imc sample data to use PERF_RECORD_MISC_KERNEL as cpumode for host kernel information. Fixes: 77ca3951cc37 ("powerpc/perf: Add kernel support for new MSR[HV PR] bits in trace-imc") Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1598424029-1662-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/imc-pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index a45d694a5d5d..62d0b54086f8 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1289,7 +1289,7 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->misc = 0; if (cpu_has_feature(CPU_FTR_ARCH_31)) { - switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) { case 0:/* when MSR HV and PR not set in the trace-record */ header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; break; @@ -1297,7 +1297,7 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->misc |= PERF_RECORD_MISC_GUEST_USER; break; case 2: /* MSR HV is 1 and PR is 0 */ - header->misc |= PERF_RECORD_MISC_HYPERVISOR; + header->misc |= PERF_RECORD_MISC_KERNEL; break; case 3: /* MSR HV is 1 and PR is 1 */ header->misc |= PERF_RECORD_MISC_USER; -- cgit v1.2.3 From 16d83a540ca4e7f1ebb2b3756869b77451d31414 Mon Sep 17 00:00:00 2001 From: Pratik Rajesh Sampat Date: Wed, 26 Aug 2020 13:59:18 +0530 Subject: Revert "powerpc/powernv/idle: Replace CPU feature check with PVR check" cpuidle stop state implementation has minor optimizations for P10 where hardware preserves more SPR registers compared to P9. The current P9 driver works for P10, although does few extra save-restores. P9 driver can provide the required power management features like SMT thread folding and core level power savings on a P10 platform. Until the P10 stop driver is available, revert the commit which allows for only P9 systems to utilize cpuidle and blocks all idle stop states for P10. CPU idle states are enabled and tested on the P10 platform with this fix. This reverts commit 8747bf36f312356f8a295a0c39ff092d65ce75ae. Fixes: 8747bf36f312 ("powerpc/powernv/idle: Replace CPU feature check with PVR check") Signed-off-by: Pratik Rajesh Sampat Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200826082918.89306-1-psampat@linux.ibm.com --- arch/powerpc/platforms/powernv/idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 77513a80cef9..345ab062b21a 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1223,7 +1223,7 @@ static void __init pnv_probe_idle_states(void) return; } - if (pvr_version_is(PVR_POWER9)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) pnv_power9_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) -- cgit v1.2.3 From e5c388b4b967037a0e00b60194b0dbcf94881a9b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Aug 2020 21:29:28 -0700 Subject: ARC: show_regs: fix r12 printing and simplify when working on ARC64, spotted an issue in ARCv2 reg file printing. print_reg_file() assumes contiguous reg-file whereas in ARCv2 they are not: r12 comes before r0-r11 due to hardware auto-save. Apparently this issue has been present since v2 port submission. To avoid bolting hacks for this discontinuity while looping through pt_regs, just ditching the loop and print pt_regs directly. Signed-off-by: Vineet Gupta --- arch/arc/kernel/troubleshoot.c | 77 ++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 47 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 28e8bf04b253..a331bb5d8319 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -18,44 +18,37 @@ #define ARC_PATH_MAX 256 -/* - * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25) - * -Prints 3 regs per line and a CR. - * -To continue, callee regs right after scratch, special handling of CR - */ -static noinline void print_reg_file(long *reg_rev, int start_num) +static noinline void print_regs_scratch(struct pt_regs *regs) { - unsigned int i; - char buf[512]; - int n = 0, len = sizeof(buf); - - for (i = start_num; i < start_num + 13; i++) { - n += scnprintf(buf + n, len - n, "r%02u: 0x%08lx\t", - i, (unsigned long)*reg_rev); - - if (((i + 1) % 3) == 0) - n += scnprintf(buf + n, len - n, "\n"); - - /* because pt_regs has regs reversed: r12..r0, r25..r13 */ - if (is_isa_arcv2() && start_num == 0) - reg_rev++; - else - reg_rev--; - } - - if (start_num != 0) - n += scnprintf(buf + n, len - n, "\n\n"); + pr_cont("BTA: 0x%08lx\n SP: 0x%08lx FP: 0x%08lx BLK: %pS\n", + regs->bta, regs->sp, regs->fp, (void *)regs->blink); + pr_cont("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", + regs->lp_start, regs->lp_end, regs->lp_count); - /* To continue printing callee regs on same line as scratch regs */ - if (start_num == 0) - pr_info("%s", buf); - else - pr_cont("%s\n", buf); + pr_info("r00: 0x%08lx\tr01: 0x%08lx\tr02: 0x%08lx\n" \ + "r03: 0x%08lx\tr04: 0x%08lx\tr05: 0x%08lx\n" \ + "r06: 0x%08lx\tr07: 0x%08lx\tr08: 0x%08lx\n" \ + "r09: 0x%08lx\tr10: 0x%08lx\tr11: 0x%08lx\n" \ + "r12: 0x%08lx\t", + regs->r0, regs->r1, regs->r2, + regs->r3, regs->r4, regs->r5, + regs->r6, regs->r7, regs->r8, + regs->r9, regs->r10, regs->r11, + regs->r12); } -static void show_callee_regs(struct callee_regs *cregs) +static void print_regs_callee(struct callee_regs *regs) { - print_reg_file(&(cregs->r13), 13); + pr_cont("r13: 0x%08lx\tr14: 0x%08lx\n" \ + "r15: 0x%08lx\tr16: 0x%08lx\tr17: 0x%08lx\n" \ + "r18: 0x%08lx\tr19: 0x%08lx\tr20: 0x%08lx\n" \ + "r21: 0x%08lx\tr22: 0x%08lx\tr23: 0x%08lx\n" \ + "r24: 0x%08lx\tr25: 0x%08lx\n", + regs->r13, regs->r14, + regs->r15, regs->r16, regs->r17, + regs->r18, regs->r19, regs->r20, + regs->r21, regs->r22, regs->r23, + regs->r24, regs->r25); } static void print_task_path_n_nm(struct task_struct *tsk) @@ -175,7 +168,7 @@ static void show_ecr_verbose(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { struct task_struct *tsk = current; - struct callee_regs *cregs; + struct callee_regs *cregs = (struct callee_regs *)tsk->thread.callee_reg; /* * generic code calls us with preemption disabled, but some calls @@ -204,25 +197,15 @@ void show_regs(struct pt_regs *regs) STS_BIT(regs, A2), STS_BIT(regs, A1), STS_BIT(regs, E2), STS_BIT(regs, E1)); #else - pr_cont(" [%2s%2s%2s%2s]", + pr_cont(" [%2s%2s%2s%2s] ", STS_BIT(regs, IE), (regs->status32 & STATUS_U_MASK) ? "U " : "K ", STS_BIT(regs, DE), STS_BIT(regs, AE)); #endif - pr_cont(" BTA: 0x%08lx\n SP: 0x%08lx FP: 0x%08lx BLK: %pS\n", - regs->bta, regs->sp, regs->fp, (void *)regs->blink); - pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", - regs->lp_start, regs->lp_end, regs->lp_count); - - /* print regs->r0 thru regs->r12 - * Sequential printing was generating horrible code - */ - print_reg_file(&(regs->r0), 0); - /* If Callee regs were saved, display them too */ - cregs = (struct callee_regs *)current->thread.callee_reg; + print_regs_scratch(regs); if (cregs) - show_callee_regs(cregs); + print_regs_callee(cregs); preempt_disable(); } -- cgit v1.2.3 From 89d29997f103d08264b0685796b420d911658b96 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 24 Aug 2020 12:10:33 -0700 Subject: irqchip/eznps: Fix build error for !ARC700 builds eznps driver is supposed to be platform independent however it ends up including stuff from inside arch/arc headers leading to rand config build errors. The quick hack to fix this (proper fix is too much chrun for non active user-base) is to add following to nps platform agnostic header. - copy AUX_IENABLE from arch/arc header - move CTOP_AUX_IACK from arch/arc/plat-eznps/*/** Reported-by: kernel test robot Reported-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20200824095831.5lpkmkafelnvlpi2@linutronix.de Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index a4a61531c7fb..77712c5ffe84 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -33,7 +33,6 @@ #define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C) #define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030) #define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080) -#define CTOP_AUX_IACK (CTOP_AUX_BASE + 0x088) #define CTOP_AUX_GPA1 (CTOP_AUX_BASE + 0x08C) #define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300) -- cgit v1.2.3 From c165a08d2b2857c91c627039c4881f9d7ad1a3bd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 27 Aug 2020 15:44:32 -0500 Subject: arm64/cpuinfo: Remove unnecessary fallthrough annotation Fallthrough annotations for consecutive default and case labels are not necessary. Reported-by: Linus Torvalds Signed-off-by: Gustavo A. R. Silva --- arch/arm64/kernel/cpuinfo.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 1886a02c3f50..d0076c2159e6 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -327,7 +327,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) set_bit(ICACHEF_VPIPT, &__icache_flags); break; default: - fallthrough; case ICACHE_POLICY_VIPT: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); -- cgit v1.2.3 From 4a133eb351ccc275683ad49305d0b04dde903733 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Aug 2020 18:30:27 +0000 Subject: powerpc/32s: Disable VMAP stack which CONFIG_ADB_PMU low_sleep_handler() can't restore the context from virtual stack because the stack can hardly be accessed with MMU OFF. For now, disable VMAP stack when CONFIG_ADB_PMU is selected. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Cc: stable@vger.kernel.org # v5.6+ Reported-by: Giuseppe Sacco Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec96c15bfa1a7415ab604ee1c98cd45779c08be0.1598553015.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87737ec86d39..1dc9d3c81872 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK + select HAVE_ARCH_VMAP_STACK if !ADB_PMU config PPC_BOOK3S_601 bool "PowerPC 601" -- cgit v1.2.3 From d6f6cbeee4e5ee6976792851e0461c19f1ede864 Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Fri, 14 Aug 2020 09:43:45 +0800 Subject: arm64: dts: mt7622: add reset node for mmc device This commit adds reset node for mmc device. Cc: # v5.4+ Fixes: 966580ad236e ("mmc: mediatek: add support for MT7622 SoC") Signed-off-by: Wenbin Mei Tested-by: Frank Wunderlich Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20200814014346.6496-3-wenbin.mei@mediatek.com Signed-off-by: Ulf Hansson --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 1a39e0ef776b..5b9ec032ce8d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -686,6 +686,8 @@ clocks = <&pericfg CLK_PERI_MSDC30_0_PD>, <&topckgen CLK_TOP_MSDC50_0_SEL>; clock-names = "source", "hclk"; + resets = <&pericfg MT7622_PERI_MSDC0_SW_RST>; + reset-names = "hrst"; status = "disabled"; }; -- cgit v1.2.3 From 679f71fa0db2d777f39c7a5af7f7c0689fc713fa Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Thu, 27 Aug 2020 10:20:58 -0700 Subject: arm64: tegra: Add missing timeout clock to Tegra210 SDMMC commit 742af7e7a0a1 ("arm64: tegra: Add Tegra210 support") Tegra210 uses separate SDMMC_LEGACY_TM clock for data timeout and this clock is not enabled currently which is not recommended. Tegra SDMMC advertises 12Mhz as timeout clock frequency in host capability register. So, this clock should be kept enabled by SDMMC driver. Fixes: 742af7e7a0a1 ("arm64: tegra: Add Tegra210 support") Cc: stable # 5.4 Tested-by: Jon Hunter Reviewed-by: Jon Hunter Signed-off-by: Sowjanya Komatineni Link: https://lore.kernel.org/r/1598548861-32373-5-git-send-email-skomatineni@nvidia.com Signed-off-by: Ulf Hansson --- arch/arm64/boot/dts/nvidia/tegra210.dtsi | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi index 829f786af133..8cca2166a446 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi @@ -1194,8 +1194,9 @@ compatible = "nvidia,tegra210-sdhci"; reg = <0x0 0x700b0000 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC1>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC1>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 14>; reset-names = "sdhci"; pinctrl-names = "sdmmc-3v3", "sdmmc-1v8", @@ -1222,8 +1223,9 @@ compatible = "nvidia,tegra210-sdhci"; reg = <0x0 0x700b0200 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC2>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC2>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 9>; reset-names = "sdhci"; pinctrl-names = "sdmmc-1v8-drv"; @@ -1239,8 +1241,9 @@ compatible = "nvidia,tegra210-sdhci"; reg = <0x0 0x700b0400 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC3>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC3>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 69>; reset-names = "sdhci"; pinctrl-names = "sdmmc-3v3", "sdmmc-1v8", @@ -1262,8 +1265,9 @@ compatible = "nvidia,tegra210-sdhci"; reg = <0x0 0x700b0600 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC4>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC4>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 15>; reset-names = "sdhci"; pinctrl-names = "sdmmc-3v3-drv", "sdmmc-1v8-drv"; -- cgit v1.2.3 From baba217d2c4446b6eef309d81d8776cb5c68cb55 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Thu, 27 Aug 2020 10:20:59 -0700 Subject: arm64: tegra: Add missing timeout clock to Tegra186 SDMMC nodes commit 39cb62cb8973 ("arm64: tegra: Add Tegra186 support") Tegra186 uses separate SDMMC_LEGACY_TM clock for data timeout and this clock is not enabled currently which is not recommended. Tegra186 SDMMC advertises 12Mhz as timeout clock frequency in host capability register and uses it by default. So, this clock should be kept enabled by the SDMMC driver. Fixes: 39cb62cb8973 ("arm64: tegra: Add Tegra186 support") Cc: stable # 5.4 Tested-by: Jon Hunter Reviewed-by: Jon Hunter Signed-off-by: Sowjanya Komatineni Link: https://lore.kernel.org/r/1598548861-32373-6-git-send-email-skomatineni@nvidia.com Signed-off-by: Ulf Hansson --- arch/arm64/boot/dts/nvidia/tegra186.dtsi | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 34d249d85da7..8eb61dd9921e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -337,8 +337,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03400000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC1>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC1>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA186_RESET_SDMMC1>; reset-names = "sdhci"; interconnects = <&mc TEGRA186_MEMORY_CLIENT_SDMMCRA &emc>, @@ -366,8 +367,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03420000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC2>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC2>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA186_RESET_SDMMC2>; reset-names = "sdhci"; interconnects = <&mc TEGRA186_MEMORY_CLIENT_SDMMCRAA &emc>, @@ -390,8 +392,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03440000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC3>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC3>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA186_RESET_SDMMC3>; reset-names = "sdhci"; interconnects = <&mc TEGRA186_MEMORY_CLIENT_SDMMCR &emc>, @@ -416,8 +419,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03460000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC4>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC4>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; assigned-clocks = <&bpmp TEGRA186_CLK_SDMMC4>, <&bpmp TEGRA186_CLK_PLLC4_VCO>; assigned-clock-parents = <&bpmp TEGRA186_CLK_PLLC4_VCO>; -- cgit v1.2.3 From c956c0cd4f6f4aac4f095621b1c4e1c5ee1df877 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Thu, 27 Aug 2020 10:21:00 -0700 Subject: arm64: tegra: Add missing timeout clock to Tegra194 SDMMC nodes commit 5425fb15d8ee ("arm64: tegra: Add Tegra194 chip device tree") Tegra194 uses separate SDMMC_LEGACY_TM clock for data timeout and this clock is not enabled currently which is not recommended. Tegra194 SDMMC advertises 12Mhz as timeout clock frequency in host capability register. So, this clock should be kept enabled by SDMMC driver. Fixes: 5425fb15d8ee ("arm64: tegra: Add Tegra194 chip device tree") Cc: stable # 5.4 Tested-by: Jon Hunter Reviewed-by: Jon Hunter Signed-off-by: Sowjanya Komatineni Link: https://lore.kernel.org/r/1598548861-32373-7-git-send-email-skomatineni@nvidia.com Signed-off-by: Ulf Hansson --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 48160f48003a..ca5cb6aef5ee 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -460,8 +460,9 @@ compatible = "nvidia,tegra194-sdhci"; reg = <0x03400000 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA194_CLK_SDMMC1>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA194_CLK_SDMMC1>, + <&bpmp TEGRA194_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA194_RESET_SDMMC1>; reset-names = "sdhci"; interconnects = <&mc TEGRA194_MEMORY_CLIENT_SDMMCRA &emc>, @@ -485,8 +486,9 @@ compatible = "nvidia,tegra194-sdhci"; reg = <0x03440000 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA194_CLK_SDMMC3>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA194_CLK_SDMMC3>, + <&bpmp TEGRA194_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA194_RESET_SDMMC3>; reset-names = "sdhci"; interconnects = <&mc TEGRA194_MEMORY_CLIENT_SDMMCR &emc>, @@ -511,8 +513,9 @@ compatible = "nvidia,tegra194-sdhci"; reg = <0x03460000 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA194_CLK_SDMMC4>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA194_CLK_SDMMC4>, + <&bpmp TEGRA194_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; assigned-clocks = <&bpmp TEGRA194_CLK_SDMMC4>, <&bpmp TEGRA194_CLK_PLLC4>; assigned-clock-parents = -- cgit v1.2.3 From 1764c3edc66880778604f5053fe2dda7b3ddd2c1 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 27 Aug 2020 13:36:08 -0700 Subject: arm64: use a common .arch preamble for inline assembly Commit 7c78f67e9bd9 ("arm64: enable tlbi range instructions") breaks LLVM's integrated assembler, because -Wa,-march is only passed to external assemblers and therefore, the new instructions are not enabled when IAS is used. This change adds a common architecture version preamble, which can be used in inline assembly blocks that contain instructions that require a newer architecture version, and uses it to fix __TLBI_0 and __TLBI_1 with ARM64_TLB_RANGE. Fixes: 7c78f67e9bd9 ("arm64: enable tlbi range instructions") Signed-off-by: Sami Tolvanen Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1106 Link: https://lore.kernel.org/r/20200827203608.1225689-1-samitolvanen@google.com Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 11 ++++++++--- arch/arm64/include/asm/compiler.h | 6 ++++++ arch/arm64/include/asm/tlbflush.h | 6 ++++-- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b45f0124cc16..20ab5c9375a5 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -82,8 +82,8 @@ endif # compiler to generate them and consequently to break the single image contract # we pass it only to the assembler. This option is utilized only in case of non # integrated assemblers. -ifneq ($(CONFIG_AS_HAS_ARMV8_4), y) -branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a +ifeq ($(CONFIG_AS_HAS_PAC), y) +asm-arch := armv8.3-a endif endif @@ -91,7 +91,12 @@ KBUILD_CFLAGS += $(branch-prot-flags-y) ifeq ($(CONFIG_AS_HAS_ARMV8_4), y) # make sure to pass the newest target architecture to -march. -KBUILD_CFLAGS += -Wa,-march=armv8.4-a +asm-arch := armv8.4-a +endif + +ifdef asm-arch +KBUILD_CFLAGS += -Wa,-march=$(asm-arch) \ + -DARM64_ASM_ARCH='"$(asm-arch)"' endif ifeq ($(CONFIG_SHADOW_CALL_STACK), y) diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index 51a7ce87cdfe..6fb2e6bcc392 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -2,6 +2,12 @@ #ifndef __ASM_COMPILER_H #define __ASM_COMPILER_H +#ifdef ARM64_ASM_ARCH +#define ARM64_ASM_PREAMBLE ".arch " ARM64_ASM_ARCH "\n" +#else +#define ARM64_ASM_PREAMBLE +#endif + /* * The EL0/EL1 pointer bits used by a pointer authentication code. * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index d493174415db..cc3f5a33ff9c 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -28,14 +28,16 @@ * not. The macros handles invoking the asm with or without the * register argument as appropriate. */ -#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \ +#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ + "tlbi " #op "\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op, \ ARM64_WORKAROUND_REPEAT_TLBI, \ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) -#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ +#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ + "tlbi " #op ", %0\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op ", %0", \ ARM64_WORKAROUND_REPEAT_TLBI, \ -- cgit v1.2.3 From 5d28ba5f8a0cfa3a874fa96c33731b8fcd141b3a Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Thu, 27 Aug 2020 23:40:12 +0000 Subject: arm64: vdso32: make vdso32 install conditional vdso32 should only be installed if CONFIG_COMPAT_VDSO is enabled, since it's not even supposed to be compiled otherwise, and arm64 builds without a 32bit crosscompiler will fail. Fixes: 8d75785a8142 ("ARM64: vdso32: Install vdso32 from vdso_install") Signed-off-by: Frank van der Linden Cc: stable@vger.kernel.org [5.4+] Link: https://lore.kernel.org/r/20200827234012.19757-1-fllinden@amazon.com Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 20ab5c9375a5..130569f90c54 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -170,7 +170,8 @@ zinstall install: PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ - $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@ + $(if $(CONFIG_COMPAT_VDSO), \ + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) # We use MRPROPER_FILES and CLEAN_FILES now archclean: -- cgit v1.2.3 From e9ee186bb735bfc17fa81dbc9aebf268aee5b41e Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 21 Aug 2020 15:07:05 +0100 Subject: KVM: arm64: Add kvm_extable for vaxorcism code KVM has a one instruction window where it will allow an SError exception to be consumed by the hypervisor without treating it as a hypervisor bug. This is used to consume asynchronous external abort that were caused by the guest. As we are about to add another location that survives unexpected exceptions, generalise this code to make it behave like the host's extable. KVM's version has to be mapped to EL2 to be accessible on nVHE systems. The SError vaxorcism code is a one instruction window, so has two entries in the extable. Because the KVM code is copied for VHE and nVHE, we end up with four entries, half of which correspond with code that isn't mapped. Signed-off-by: James Morse Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_asm.h | 15 ++++++++++ arch/arm64/kernel/image-vars.h | 4 +++ arch/arm64/kernel/vmlinux.lds.S | 8 ++++++ arch/arm64/kvm/hyp/entry.S | 15 ++++++---- arch/arm64/kvm/hyp/hyp-entry.S | 51 ++++++++++++++++++++------------- arch/arm64/kvm/hyp/include/hyp/switch.h | 31 ++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 5 ++++ arch/arm64/kvm/hyp/vhe/switch.c | 5 ++++ 8 files changed, 108 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index fb1a922b31ba..63350f0e3453 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -193,6 +193,21 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] .endm +/* + * KVM extable for unexpected exceptions. + * In the same format _asm_extable, but output to a different section so that + * it can be mapped to EL2. The KVM version is not sorted. The caller must + * ensure: + * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented + * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. + */ +.macro _kvm_extable, from, to + .pushsection __kvm_ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection +.endm + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 9e897c500237..8982b68289b7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -103,6 +103,10 @@ KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); KVM_NVHE_ALIAS(gic_pmr_sync); #endif +/* EL2 exception handling */ +KVM_NVHE_ALIAS(__start___kvm_ex_table); +KVM_NVHE_ALIAS(__stop___kvm_ex_table); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index ec8e894684a7..7cba7623fcec 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -20,6 +20,13 @@ ENTRY(_text) jiffies = jiffies_64; + +#define HYPERVISOR_EXTABLE \ + . = ALIGN(SZ_8); \ + __start___kvm_ex_table = .; \ + *(__kvm_ex_table) \ + __stop___kvm_ex_table = .; + #define HYPERVISOR_TEXT \ /* \ * Align to 4 KB so that \ @@ -35,6 +42,7 @@ jiffies = jiffies_64; __hyp_idmap_text_end = .; \ __hyp_text_start = .; \ *(.hyp.text) \ + HYPERVISOR_EXTABLE \ __hyp_text_end = .; #define IDMAP_TEXT \ diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index ee32a7743389..76e7eaf4675e 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -196,20 +196,23 @@ alternative_endif // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. - .global abort_guest_exit_start abort_guest_exit_start: isb - .global abort_guest_exit_end abort_guest_exit_end: msr daifset, #4 // Mask aborts + ret + + _kvm_extable abort_guest_exit_start, 9997f + _kvm_extable abort_guest_exit_end, 9997f +9997: + msr daifset, #4 // Mask aborts + mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) - // If the exception took place, restore the EL1 exception - // context so that we can report some information. - // Merge the exception code with the SError pending bit. - tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f + // restore the EL1 exception context so that we can report some + // information. Merge the exception code with the SError pending bit. msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 689fccbc9de7..e47547e276d8 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -15,6 +15,30 @@ #include #include +.macro save_caller_saved_regs_vect + /* x0 and x1 were saved in the vector entry */ + stp x2, x3, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x16, x17, [sp, #-16]! +.endm + +.macro restore_caller_saved_regs_vect + ldp x16, x17, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + .text .macro do_el2_call @@ -157,27 +181,14 @@ el2_sync: el2_error: - ldp x0, x1, [sp], #16 + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + + bl kvm_unexpected_el2_exception + + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect - /* - * Only two possibilities: - * 1) Either we come from the exit path, having just unmasked - * PSTATE.A: change the return code to an EL2 fault, and - * carry on, as we're already in a sane state to handle it. - * 2) Or we come from anywhere else, and that's a bug: we panic. - * - * For (1), x0 contains the original return code and x1 doesn't - * contain anything meaningful at that stage. We can reuse them - * as temp registers. - * For (2), who cares? - */ - mrs x0, elr_el2 - adr x1, abort_guest_exit_start - cmp x0, x1 - adr x1, abort_guest_exit_end - ccmp x0, x1, #4, ne - b.ne __hyp_panic - mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) eret sb diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 426ef65601dd..6d8d4ed4287c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,9 @@ extern const char __hyp_panic_string[]; +extern struct exception_table_entry __start___kvm_ex_table; +extern struct exception_table_entry __stop___kvm_ex_table; + /* Check whether the FP regs were dirtied while in the host-side run loop: */ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) { @@ -508,4 +512,31 @@ static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) #endif } +static inline void __kvm_unexpected_el2_exception(void) +{ + unsigned long addr, fixup; + struct kvm_cpu_context *host_ctxt; + struct exception_table_entry *entry, *end; + unsigned long elr_el2 = read_sysreg(elr_el2); + + entry = hyp_symbol_addr(__start___kvm_ex_table); + end = hyp_symbol_addr(__stop___kvm_ex_table); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + + while (entry < end) { + addr = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; + + if (addr != elr_el2) { + entry++; + continue; + } + + write_sysreg(fixup, elr_el2); + return; + } + + hyp_panic(host_ctxt); +} + #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 341be2f2f312..0970442d2dbc 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -270,3 +270,8 @@ void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) read_sysreg(hpfar_el2), par, vcpu); unreachable(); } + +asmlinkage void kvm_unexpected_el2_exception(void) +{ + return __kvm_unexpected_el2_exception(); +} diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c52d714e0d75..c1da4f86ccac 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -217,3 +217,8 @@ void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) __hyp_call_panic(spsr, elr, par, host_ctxt); unreachable(); } + +asmlinkage void kvm_unexpected_el2_exception(void) +{ + return __kvm_unexpected_el2_exception(); +} -- cgit v1.2.3 From 88a84ccccb3966bcc3f309cdb76092a9892c0260 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 21 Aug 2020 15:07:06 +0100 Subject: KVM: arm64: Survive synchronous exceptions caused by AT instructions KVM doesn't expect any synchronous exceptions when executing, any such exception leads to a panic(). AT instructions access the guest page tables, and can cause a synchronous external abort to be taken. The arm-arm is unclear on what should happen if the guest has configured the hardware update of the access-flag, and a memory type in TCR_EL1 that does not support atomic operations. B2.2.6 "Possible implementation restrictions on using atomic instructions" from DDI0487F.a lists synchronous external abort as a possible behaviour of atomic instructions that target memory that isn't writeback cacheable, but the page table walker may behave differently. Make KVM robust to synchronous exceptions caused by AT instructions. Add a get_user() style helper for AT instructions that returns -EFAULT if an exception was generated. While KVM's version of the exception table mixes synchronous and asynchronous exceptions, only one of these can occur at each location. Re-enter the guest when the AT instructions take an exception on the assumption the guest will take the same exception. This isn't guaranteed to make forward progress, as the AT instructions may always walk the page tables, but guest execution may use the translation cached in the TLB. This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest entry when an asynchronous exception is pending"), KVM will return to the host to process IRQs allowing the rest of the system to keep running. Cc: stable@vger.kernel.org # Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_asm.h | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/hyp-entry.S | 14 ++++++++++---- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 ++++---- 3 files changed, 42 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 63350f0e3453..6f98fbd0ac81 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -169,6 +169,34 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; *__hyp_this_cpu_ptr(sym); \ }) +#define __KVM_EXTABLE(from, to) \ + " .pushsection __kvm_ex_table, \"a\"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + + +#define __kvm_at(at_op, addr) \ +( { \ + int __kvm_at_err = 0; \ + u64 spsr, elr; \ + asm volatile( \ + " mrs %1, spsr_el2\n" \ + " mrs %2, elr_el2\n" \ + "1: at "at_op", %3\n" \ + " isb\n" \ + " b 9f\n" \ + "2: msr spsr_el2, %1\n" \ + " msr elr_el2, %2\n" \ + " mov %w0, %4\n" \ + "9:\n" \ + __KVM_EXTABLE(1b, 2b) \ + : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \ + : "r" (addr), "i" (-EFAULT)); \ + __kvm_at_err; \ +} ) + + #else /* __ASSEMBLY__ */ .macro hyp_adr_this_cpu reg, sym, tmp diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index e47547e276d8..46b4dab933d0 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -167,13 +167,19 @@ el1_error: b __guest_exit el2_sync: - /* Check for illegal exception return, otherwise panic */ + /* Check for illegal exception return */ mrs x0, spsr_el2 + tbnz x0, #20, 1f - /* if this was something else, then panic! */ - tst x0, #PSR_IL_BIT - b.eq __hyp_panic + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + bl kvm_unexpected_el2_exception + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect + + eret +1: /* Let's attempt a recovery from the illegal exception return */ get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IL diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 6d8d4ed4287c..5b6b8fa00f0a 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -146,10 +146,10 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) * saved the guest context yet, and we may return early... */ par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg(par_el1); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ write_sysreg(par, par_el1); if (unlikely(tmp & SYS_PAR_EL1_F)) -- cgit v1.2.3 From 71a7f8cb1ca4ca7214a700b1243626759b6c11d4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 21 Aug 2020 15:07:07 +0100 Subject: KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception AT instructions do a translation table walk and return the result, or the fault in PAR_EL1. KVM uses these to find the IPA when the value is not provided by the CPU in HPFAR_EL1. If a translation table walk causes an external abort it is taken as an exception, even if it was due to an AT instruction. (DDI0487F.a's D5.2.11 "Synchronous faults generated by address translation instructions") While we previously made KVM resilient to exceptions taken due to AT instructions, the device access causes mismatched attributes, and may occur speculatively. Prevent this, by forbidding a walk through memory described as device at stage2. Now such AT instructions will report a stage2 fault. Such a fault will cause KVM to restart the guest. If the AT instructions always walk the page tables, but guest execution uses the translation cached in the TLB, the guest can't make forward progress until the TLB entry is evicted. This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest entry when an asynchronous exception is pending"), KVM will return to the host to process IRQs allowing the rest of the system to keep running. Cc: stable@vger.kernel.org # Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_arm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 51c1d9918999..1da8e3dc4455 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -71,11 +71,12 @@ * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate + * PTW: Take a stage2 fault if a stage1 walk steps in device memory */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO) + HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -- cgit v1.2.3 From 08134e79e0ab138db211287ef3a705b09ba45f50 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 28 Aug 2020 17:01:10 -0700 Subject: microblaze: fix min_low_pfn/max_low_pfn build errors Fix min_low_pfn/max_low_pfn build errors for arch/microblaze/: (e.g.) ERROR: "min_low_pfn" [drivers/rpmsg/virtio_rpmsg_bus.ko] undefined! ERROR: "min_low_pfn" [drivers/hwtracing/intel_th/intel_th_msu_sink.ko] undefined! ERROR: "min_low_pfn" [drivers/hwtracing/intel_th/intel_th_msu.ko] undefined! ERROR: "min_low_pfn" [drivers/mmc/core/mmc_core.ko] undefined! ERROR: "min_low_pfn" [drivers/md/dm-crypt.ko] undefined! ERROR: "min_low_pfn" [drivers/net/wireless/ath/ath6kl/ath6kl_sdio.ko] undefined! ERROR: "min_low_pfn" [crypto/tcrypt.ko] undefined! ERROR: "min_low_pfn" [crypto/asymmetric_keys/asym_tpm.ko] undefined! Mike had/has an alternate patch for Microblaze: https://lore.kernel.org/lkml/20200630111519.GA1951986@linux.ibm.com/ David suggested just exporting min_low_pfn & max_low_pfn in mm/memblock.c: https://lore.kernel.org/lkml/alpine.DEB.2.22.394.2006291911220.1118534@chino.kir.corp.google.com/ Reported-by: kernel test robot Signed-off-by: Randy Dunlap Acked-by: Michal Simek Acked-by: David Rientjes Cc: linux-mm@kvack.org Cc: Andrew Morton Cc: David Rientjes Cc: Mike Rapoport Cc: Michal Simek Cc: Michal Simek Signed-off-by: Mike Rapoport --- arch/microblaze/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 0880a003573d..3344d4a1fe89 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -46,6 +46,9 @@ unsigned long memory_size; EXPORT_SYMBOL(memory_size); unsigned long lowmem_size; +EXPORT_SYMBOL(min_low_pfn); +EXPORT_SYMBOL(max_low_pfn); + #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; EXPORT_SYMBOL(kmap_pte); -- cgit v1.2.3 From a231995700c392c0807da95deea231b23fc51a3c Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 27 Aug 2020 16:03:08 +0800 Subject: MIPS: perf: Fix wrong check condition of Loongson event IDs According to the user's manual chapter 8.2.1 of Loongson 3A2000 CPU [1] and 3A3000 CPU [2], we should take some event IDs such as 274, 358, 359 and 360 as valid in the check condition, otherwise they are recognized as "not supported", fix it. [1] http://www.loongson.cn/uploadfile/cpu/3A2000/Loongson3A2000_user2.pdf [2] http://www.loongson.cn/uploadfile/cpu/3A3000/Loongson3A3000_3B3000user2.pdf Fixes: e9dfbaaeef1c ("MIPS: perf: Add hardware perf events support for new Loongson-3") Signed-off-by: Tiezhu Yang Acked-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/perf_event_mipsxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index efce5defcc5c..011eb6bbf81a 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1898,8 +1898,8 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) (base_id >= 64 && base_id < 90) || (base_id >= 128 && base_id < 164) || (base_id >= 192 && base_id < 200) || - (base_id >= 256 && base_id < 274) || - (base_id >= 320 && base_id < 358) || + (base_id >= 256 && base_id < 275) || + (base_id >= 320 && base_id < 361) || (base_id >= 384 && base_id < 574)) break; -- cgit v1.2.3 From 5f7b81c18366c38446f6eedab570b98dbdc07cff Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 28 Aug 2020 17:01:26 -0700 Subject: ia64: fix min_low_pfn/max_low_pfn build errors Fix min_low_pfn/max_low_pfn build errors for arch/ia64/: (e.g.) ERROR: "max_low_pfn" [drivers/rpmsg/virtio_rpmsg_bus.ko] undefined! ERROR: "min_low_pfn" [drivers/rpmsg/virtio_rpmsg_bus.ko] undefined! ERROR: "min_low_pfn" [drivers/hwtracing/intel_th/intel_th_msu.ko] undefined! ERROR: "max_low_pfn" [drivers/hwtracing/intel_th/intel_th_msu.ko] undefined! ERROR: "min_low_pfn" [drivers/crypto/cavium/nitrox/n5pf.ko] undefined! ERROR: "max_low_pfn" [drivers/crypto/cavium/nitrox/n5pf.ko] undefined! ERROR: "max_low_pfn" [drivers/md/dm-integrity.ko] undefined! ERROR: "min_low_pfn" [drivers/md/dm-integrity.ko] undefined! ERROR: "max_low_pfn" [crypto/tcrypt.ko] undefined! ERROR: "min_low_pfn" [crypto/tcrypt.ko] undefined! ERROR: "min_low_pfn" [security/keys/encrypted-keys/encrypted-keys.ko] undefined! ERROR: "max_low_pfn" [security/keys/encrypted-keys/encrypted-keys.ko] undefined! ERROR: "min_low_pfn" [arch/ia64/kernel/mca_recovery.ko] undefined! ERROR: "max_low_pfn" [arch/ia64/kernel/mca_recovery.ko] undefined! David suggested just exporting min_low_pfn & max_low_pfn in mm/memblock.c: https://lore.kernel.org/lkml/alpine.DEB.2.22.394.2006291911220.1118534@chino.kir.corp.google.com/ Reported-by: kernel test robot Signed-off-by: Randy Dunlap Acked-by: David Rientjes Acked-by: Tony Luck Cc: linux-mm@kvack.org Cc: Andrew Morton Cc: David Rientjes Cc: Mike Rapoport Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Signed-off-by: Mike Rapoport --- arch/ia64/kernel/ia64_ksyms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index b49fe6f618ed..f8150ee74f29 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -3,7 +3,7 @@ * Architecture-specific kernel symbols */ -#ifdef CONFIG_VIRTUAL_MEM_MAP +#if defined(CONFIG_VIRTUAL_MEM_MAP) || defined(CONFIG_DISCONTIGMEM) #include #include #include -- cgit v1.2.3 From 4af22ded0ecf23adea1b26ea264c53f9f1cfc310 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 28 Aug 2020 19:39:02 +0300 Subject: arc: fix memory initialization for systems with two memory banks Rework of memory map initialization broke initialization of ARC systems with two memory banks. Before these changes, memblock was not aware of nodes configuration and the memory map was always allocated from the "lowmem" bank. After the addition of node information to memblock, the core mm attempts to allocate the memory map for the "highmem" bank from its node. The access to this memory using __va() fails because it can be only accessed using kmap. Anther problem that was uncovered is that {min,max}_high_pfn are calculated from u64 high_mem_start variable which prevents truncation to 32-bit physical address and the PFN values are above the node and zone boundaries. Use phys_addr_t type for high_mem_start and high_mem_size to ensure correspondence between PFNs and highmem zone boundaries and reserve the entire highmem bank until mem_init() to avoid accesses to it before highmem is enabled. To test this: 1. Enable HIGHMEM in ARC config 2. Enable 2 memory banks in haps_hs.dts (uncomment the 2nd bank) Fixes: 51930df5801e ("mm: free_area_init: allow defining max_zone_pfn in descending order") Cc: stable@vger.kernel.org [5.8] Signed-off-by: Mike Rapoport Signed-off-by: Vineet Gupta [vgupta: added instructions to test highmem] --- arch/arc/mm/init.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index f886ac69d8ad..3a35b82a718e 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -26,8 +26,8 @@ static unsigned long low_mem_sz; #ifdef CONFIG_HIGHMEM static unsigned long min_high_pfn, max_high_pfn; -static u64 high_mem_start; -static u64 high_mem_sz; +static phys_addr_t high_mem_start; +static phys_addr_t high_mem_sz; #endif #ifdef CONFIG_DISCONTIGMEM @@ -69,6 +69,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) high_mem_sz = size; in_use = 1; memblock_add_node(base, size, 1); + memblock_reserve(base, size); #endif } @@ -157,7 +158,7 @@ void __init setup_arch_memory(void) min_high_pfn = PFN_DOWN(high_mem_start); max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); - max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; + max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; high_memory = (void *)(min_high_pfn << PAGE_SHIFT); kmap_init(); @@ -166,22 +167,26 @@ void __init setup_arch_memory(void) free_area_init(max_zone_pfn); } -/* - * mem_init - initializes memory - * - * Frees up bootmem - * Calculates and displays memory available/used - */ -void __init mem_init(void) +static void __init highmem_init(void) { #ifdef CONFIG_HIGHMEM unsigned long tmp; - reset_all_zones_managed_pages(); + memblock_free(high_mem_start, high_mem_sz); for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); #endif +} +/* + * mem_init - initializes memory + * + * Frees up bootmem + * Calculates and displays memory available/used + */ +void __init mem_init(void) +{ memblock_free_all(); + highmem_init(); mem_init_print_info(NULL); } -- cgit v1.2.3 From 26907eb605fbc3ba9dbf888f21d9d8d04471271d Mon Sep 17 00:00:00 2001 From: Evgeniy Didin Date: Tue, 7 Jul 2020 18:38:58 +0300 Subject: ARC: [plat-hsdk]: Switch ethernet phy-mode to rgmii-id HSDK board has Micrel KSZ9031, recent commit bcf3440c6dd ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") caused a breakdown of Ethernet. Using 'phy-mode = "rgmii"' is not correct because accodring RGMII specification it is necessary to have delay on RX (PHY to MAX) which is not generated in case of "rgmii". Using "rgmii-id" adds necessary delay and solves the issue. Also adding name of PHY placed on HSDK board. Signed-off-by: Evgeniy Didin Cc: Eugeniy Paltsev Cc: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 5d64a5a940ee..dcaa44e408ac 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -210,7 +210,7 @@ reg = <0x8000 0x2000>; interrupts = <10>; interrupt-names = "macirq"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; snps,pbl = <32>; snps,multicast-filter-bins = <256>; clocks = <&gmacclk>; @@ -228,7 +228,7 @@ #address-cells = <1>; #size-cells = <0>; compatible = "snps,dwmac-mdio"; - phy0: ethernet-phy@0 { + phy0: ethernet-phy@0 { /* Micrel KSZ9031 */ reg = <0>; }; }; -- cgit v1.2.3 From 60295d50958e21da1df7311fd3c6aced1b3f1f04 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 1 Sep 2020 10:52:29 +0100 Subject: arm64: Remove exporting cpu_logical_map symbol Commit eaecca9e7710 ("arm64: Fix __cpu_logical_map undefined issue") exported cpu_logical_map in order to fix tegra194-cpufreq module build failure. As this might potentially cause problem while supporting physical CPU hotplug, tegra194-cpufreq module was reworded to avoid use of cpu_logical_map() via the commit 93d0c1ab2328 ("cpufreq: replace cpu_logical_map() with read_cpuid_mpir()") Since cpu_logical_map was exported to fix the module build temporarily, let us remove the same before it gains any user again. Signed-off-by: Sudeep Holla Link: https://lore.kernel.org/r/20200901095229.56793-1-sudeep.holla@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 77c4c9bad1b8..53acbeca4f57 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -280,7 +280,6 @@ u64 cpu_logical_map(int cpu) { return __cpu_logical_map[cpu]; } -EXPORT_SYMBOL_GPL(cpu_logical_map); void __init __no_sanitize_address setup_arch(char **cmdline_p) { -- cgit v1.2.3 From e0328feda79d9681b3e3245e6e180295550c8ee9 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 1 Sep 2020 18:00:16 +0200 Subject: arm64/module: set trampoline section flags regardless of CONFIG_DYNAMIC_FTRACE In the arm64 module linker script, the section .text.ftrace_trampoline is specified unconditionally regardless of whether CONFIG_DYNAMIC_FTRACE is enabled (this is simply due to the limitation that module linker scripts are not preprocessed like the vmlinux one). Normally, for .plt and .text.ftrace_trampoline, the section flags present in the module binary wouldn't matter since module_frob_arch_sections() would assign them manually anyway. However, the arm64 module loader only sets the section flags for .text.ftrace_trampoline when CONFIG_DYNAMIC_FTRACE=y. That's only become problematic recently due to a recent change in binutils-2.35, where the .text.ftrace_trampoline section (along with the .plt section) is now marked writable and executable (WAX). We no longer allow writable and executable sections to be loaded due to commit 5c3a7db0c7ec ("module: Harden STRICT_MODULE_RWX"), so this is causing all modules linked with binutils-2.35 to be rejected under arm64. Drop the IS_ENABLED(CONFIG_DYNAMIC_FTRACE) check in module_frob_arch_sections() so that the section flags for .text.ftrace_trampoline get properly set to SHF_EXECINSTR|SHF_ALLOC, without SHF_WRITE. Signed-off-by: Jessica Yu Acked-by: Will Deacon Acked-by: Ard Biesheuvel Link: http://lore.kernel.org/r/20200831094651.GA16385@linux-8ccs Link: https://lore.kernel.org/r/20200901160016.3646-1-jeyu@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/module-plts.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 0ce3a28e3347..2e224435c024 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -305,8 +305,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt_shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt_shndx = i; - else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(secstrings + sechdrs[i].sh_name, + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".text.ftrace_trampoline")) tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) -- cgit v1.2.3 From 114b9df419bf5db097b322ebb03fcf2f502f9380 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 25 Aug 2020 03:59:39 +0200 Subject: s390: fix GENERIC_LOCKBREAK dependency typo in Kconfig Commit fa686453053b ("sched/rt, s390: Use CONFIG_PREEMPTION") changed a bunch of uses of CONFIG_PREEMPT to _PREEMPTION. Except in the Kconfig it used two T's. That's the only place in the system where that spelling exists, so let's fix that. Fixes: fa686453053b ("sched/rt, s390: Use CONFIG_PREEMPTION") Cc: # 5.6 Signed-off-by: Eric Farman Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3d86e12e8e3c..b29fcc66ec39 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS def_bool y config GENERIC_LOCKBREAK - def_bool y if PREEMPTTION + def_bool y if PREEMPTION config PGSTE def_bool y if KVM -- cgit v1.2.3 From 5c60ed283e1d87e161441bb273541a948ee96f6a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 1 Sep 2020 20:39:29 +0200 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 4 ++++ arch/s390/configs/defconfig | 3 +++ arch/s390/configs/zfcpdump_defconfig | 1 + 3 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0cf9a82326a8..7228aabe9da6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -626,6 +626,7 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -807,6 +808,7 @@ CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 # CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y @@ -818,6 +820,7 @@ CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_S390_PTDUMP=y CONFIG_NOTIFIER_ERROR_INJECTION=m @@ -829,6 +832,7 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 5df9759e8ff6..fab03b7a6932 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -617,6 +617,7 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -763,6 +764,7 @@ CONFIG_PANIC_ON_OOPS=y CONFIG_TEST_LOCKUP=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y @@ -771,6 +773,7 @@ CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_S390_PTDUMP=y CONFIG_LKDTM=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 4091c50449cd..8f67c55625f9 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -74,5 +74,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set -- cgit v1.2.3 From aef0148f3606117352053c015cb33734e9ee7397 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Wed, 2 Sep 2020 22:30:56 -0400 Subject: x86/cmdline: Disable jump tables for cmdline.c When CONFIG_RETPOLINE is disabled, Clang uses a jump table for the switch statement in cmdline_find_option (jump tables are disabled when CONFIG_RETPOLINE is enabled). This function is called very early in boot from sme_enable() if CONFIG_AMD_MEM_ENCRYPT is enabled. At this time, the kernel is still executing out of the identity mapping, but the jump table will contain virtual addresses. Fix this by disabling jump tables for cmdline.c when AMD_MEM_ENCRYPT is enabled. Signed-off-by: Arvind Sankar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200903023056.3914690-1-nivedita@alum.mit.edu --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d46fff11f06f..aa067859a70b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_cmdline.o = -pg endif -CFLAGS_cmdline.o := -fno-stack-protector +CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables endif inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk -- cgit v1.2.3 From 4819e15f740ec884a50bdc431d7f1e7638b6f7d9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 2 Sep 2020 17:59:04 +0200 Subject: x86/mm/32: Bring back vmalloc faulting on x86_32 One can not simply remove vmalloc faulting on x86-32. Upstream commit: 7f0a002b5a21 ("x86/mm: remove vmalloc faulting") removed it on x86 alltogether because previously the arch_sync_kernel_mappings() interface was introduced. This interface added synchronization of vmalloc/ioremap page-table updates to all page-tables in the system at creation time and was thought to make vmalloc faulting obsolete. But that assumption was incredibly naive. It turned out that there is a race window between the time the vmalloc or ioremap code establishes a mapping and the time it synchronizes this change to other page-tables in the system. During this race window another CPU or thread can establish a vmalloc mapping which uses the same intermediate page-table entries (e.g. PMD or PUD) and does no synchronization in the end, because it found all necessary mappings already present in the kernel reference page-table. But when these intermediate page-table entries are not yet synchronized, the other CPU or thread will continue with a vmalloc address that is not yet mapped in the page-table it currently uses, causing an unhandled page fault and oops like below: BUG: unable to handle page fault for address: fe80c000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page *pde = 33183067 *pte = a8648163 Oops: 0002 [#1] SMP CPU: 1 PID: 13514 Comm: cve-2017-17053 Tainted: G ... Call Trace: ldt_dup_context+0x66/0x80 dup_mm+0x2b3/0x480 copy_process+0x133b/0x15c0 _do_fork+0x94/0x3e0 __ia32_sys_clone+0x67/0x80 __do_fast_syscall_32+0x3f/0x70 do_fast_syscall_32+0x29/0x60 do_SYSENTER_32+0x15/0x20 entry_SYSENTER_32+0x9f/0xf2 EIP: 0xb7eef549 So the arch_sync_kernel_mappings() interface is racy, but removing it would mean to re-introduce the vmalloc_sync_all() interface, which is even more awful. Keep arch_sync_kernel_mappings() in place and catch the race condition in the page-fault handler instead. Do a partial revert of above commit to get vmalloc faulting on x86-32 back in place. Fixes: 7f0a002b5a21 ("x86/mm: remove vmalloc faulting") Reported-by: Naresh Kamboju Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200902155904.17544-1-joro@8bytes.org --- arch/x86/mm/fault.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 35f1498e9832..6e3e8a124903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } +/* + * Handle a fault on the vmalloc or module mapping area + * + * This is needed because there is a race condition between the time + * when the vmalloc mapping code updates the PMD to the point in time + * where it synchronizes this update with the other page-tables in the + * system. + * + * In this race window another thread/CPU can map an area on the same + * PMD, finds it already present and does not synchronize it with the + * rest of the system yet. As a result v[mz]alloc might return areas + * which are not mapped in every page-table in the system, causing an + * unhandled page-fault when they are accessed. + */ +static noinline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3_pa(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + + if (pmd_large(*pmd_k)) + return 0; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} +NOKPROBE_SYMBOL(vmalloc_fault); + void arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; @@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, */ WARN_ON_ONCE(hw_error_code & X86_PF_PK); +#ifdef CONFIG_X86_32 + /* + * We can fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * Before doing this on-demand faulting, ensure that the + * fault is not any of the following: + * 1. A fault on a PTE with a reserved bit set. + * 2. A fault caused by a user-mode access. (Do not demand- + * fault kernel memory due to user-mode accesses). + * 3. A fault caused by a page-level protection violation. + * (A demand fault would be on a non-present page which + * would have X86_PF_PROT==0). + * + * This is only needed to close a race condition on x86-32 in + * the vmalloc mapping/unmapping code. See the comment above + * vmalloc_fault() for details. On x86-64 the race does not + * exist as the vmalloc mappings don't need to be synchronized + * there. + */ + if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { + if (vmalloc_fault(address) >= 0) + return; + } +#endif + /* Was the fault spurious, caused by lazy TLB invalidation? */ if (spurious_kernel_fault(hw_error_code, address)) return; -- cgit v1.2.3 From bb06748207cfb1502d11b90325eba7f8c44c9f02 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 1 Sep 2020 14:53:09 +0800 Subject: MIPS: add missing MSACSR and upper MSA initialization In cc97ab235f3f ("MIPS: Simplify FP context initialization), init_fp_ctx just initialize the fp/msa context, and own_fp_inatomic just restore FCSR and 64bit FP regs from it, but miss MSACSR and upper MSA regs for MSA, so MSACSR and MSA upper regs's value from previous task on current cpu can leak into current task and cause unpredictable behavior when MSA context not initialized. Fixes: cc97ab235f3f ("MIPS: Simplify FP context initialization") Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/traps.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 38aa07ccdbcc..cf788591f091 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1287,6 +1287,18 @@ static int enable_restore_fp_context(int msa) err = own_fpu_inatomic(1); if (msa && !err) { enable_msa(); + /* + * with MSA enabled, userspace can see MSACSR + * and MSA regs, but the values in them are from + * other task before current task, restore them + * from saved fp/msa context + */ + write_msa_csr(current->thread.fpu.msacsr); + /* + * own_fpu_inatomic(1) just restore low 64bit, + * fix the high 64bit + */ + init_msa_upper(); set_thread_flag(TIF_USEDMSA); set_thread_flag(TIF_MSA_CTX_LIVE); } -- cgit v1.2.3 From baf5cb30fbd1c22f6aa03c081794c2ee0f5be4da Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 2 Sep 2020 23:32:14 +0200 Subject: MIPS: SNI: Fix SCSI interrupt On RM400(a20r) machines ISA and SCSI interrupts share the same interrupt line. Commit 49e6e07e3c80 ("MIPS: pass non-NULL dev_id on shared request_irq()") accidently dropped the IRQF_SHARED bit, which breaks registering SCSI interrupt. Put back IRQF_SHARED and add dev_id for ISA interrupt. Fixes: 49e6e07e3c80 ("MIPS: pass non-NULL dev_id on shared request_irq()") Signed-off-by: Thomas Bogendoerfer --- arch/mips/sni/a20r.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c index 0ecffb65fd6d..b09dc844985a 100644 --- a/arch/mips/sni/a20r.c +++ b/arch/mips/sni/a20r.c @@ -222,8 +222,8 @@ void __init sni_a20r_irq_init(void) irq_set_chip_and_handler(i, &a20r_irq_type, handle_level_irq); sni_hwint = a20r_hwint; change_c0_status(ST0_IM, IE_IRQ0); - if (request_irq(SNI_A20R_IRQ_BASE + 3, sni_isa_irq_handler, 0, "ISA", - NULL)) + if (request_irq(SNI_A20R_IRQ_BASE + 3, sni_isa_irq_handler, + IRQF_SHARED, "ISA", sni_isa_irq_handler)) pr_err("Failed to register ISA interrupt\n"); } -- cgit v1.2.3 From ccae0f36d500aef727f98acd8d0601e6b262a513 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 4 Sep 2020 14:10:47 +0800 Subject: x86, fakenuma: Fix invalid starting node ID Commit: cc9aec03e58f ("x86/numa_emulation: Introduce uniform split capability") uses "-1" as the starting node ID, which causes the strange kernel log as follows, when "numa=fake=32G" is added to the kernel command line: Faking node -1 at [mem 0x0000000000000000-0x0000000893ffffff] (35136MB) Faking node 0 at [mem 0x0000001840000000-0x000000203fffffff] (32768MB) Faking node 1 at [mem 0x0000000894000000-0x000000183fffffff] (64192MB) Faking node 2 at [mem 0x0000002040000000-0x000000283fffffff] (32768MB) Faking node 3 at [mem 0x0000002840000000-0x000000303fffffff] (32768MB) And finally the kernel crashes: BUG: Bad page state in process swapper pfn:00011 page:(____ptrval____) refcount:0 mapcount:1 mapping:(____ptrval____) index:0x55cd7e44b270 pfn:0x11 failed to read mapping contents, not a valid kernel address? flags: 0x5(locked|uptodate) raw: 0000000000000005 000055cd7e44af30 000055cd7e44af50 0000000100000006 raw: 000055cd7e44b270 000055cd7e44b290 0000000000000000 000055cd7e44b510 page dumped because: page still charged to cgroup page->mem_cgroup:000055cd7e44b510 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.9.0-rc2 #1 Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 Call Trace: dump_stack+0x57/0x80 bad_page.cold+0x63/0x94 __free_pages_ok+0x33f/0x360 memblock_free_all+0x127/0x195 mem_init+0x23/0x1f5 start_kernel+0x219/0x4f5 secondary_startup_64+0xb6/0xc0 Fix this bug via using 0 as the starting node ID. This restores the original behavior before cc9aec03e58f. [ mingo: Massaged the changelog. ] Fixes: cc9aec03e58f ("x86/numa_emulation: Introduce uniform split capability") Signed-off-by: "Huang, Ying" Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200904061047.612950-1-ying.huang@intel.com --- arch/x86/mm/numa_emulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index c5174b4e318b..683cd12f4793 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, u64 addr, u64 max_addr, u64 size) { return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size, - 0, NULL, NUMA_NO_NODE); + 0, NULL, 0); } static int __init setup_emu2phys_nid(int *dfl_phys_nid) -- cgit v1.2.3 From 2356bb4b8221d7dc8c7beb810418122ed90254c9 Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Fri, 28 Aug 2020 17:02:46 +0530 Subject: tracing/kprobes, x86/ptrace: Fix regs argument order for i386 On i386, the order of parameters passed on regs is eax,edx,and ecx (as per regparm(3) calling conventions). Change the mapping in regs_get_kernel_argument(), so that arg1=ax arg2=dx, and arg3=cx. Running the selftests testcase kprobes_args_use.tc shows the result as passed. Fixes: 3c88ee194c28 ("x86: ptrace: Add function argument access API") Signed-off-by: Vamshi K Sthambamkadi Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/20200828113242.GA1424@cosmos --- arch/x86/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 40aa69d04862..d8324a236696 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), -- cgit v1.2.3 From 662a0221893a3d58aa72719671844264306f6e4b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:50 +0200 Subject: x86/entry: Fix AC assertion The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") unconditionally triggers on a IVB machine because it does not support SMAP. For !SMAP hardware the CLAC/STAC instructions are patched out and thus if userspace sets AC, it is still have set after entry. Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Acked-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200902133200.666781610@infradead.org --- arch/x86/include/asm/entry-common.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index a8f9315b9eae..6fe54b2813c1 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) * state, not the interrupt state as imagined by Xen. */ unsigned long flags = native_save_fl(); - WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | - X86_EFLAGS_NT)); + unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT; + + /* + * For !SMAP hardware we patch out CLAC on entry. + */ + if (boot_cpu_has(X86_FEATURE_SMAP) || + (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + mask |= X86_EFLAGS_AC; + + WARN_ON_ONCE(flags & mask); /* We think we came from user mode. Make sure pt_regs agrees. */ WARN_ON_ONCE(!user_mode(regs)); -- cgit v1.2.3 From d5c678aed5eddb944b8e7ce451b107b39245962d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 2 Sep 2020 15:25:51 +0200 Subject: x86/debug: Allow a single level of #DB recursion Trying to clear DR7 around a #DB from usermode malfunctions if the tasks schedules when delivering SIGTRAP. Rather than trying to define a special no-recursion region, just allow a single level of recursion. The same mechanism is used for NMI, and it hasn't caused any problems yet. Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling") Reported-by: Kyle Huey Debugged-by: Josh Poimboeuf Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.1597882384.git.luto@kernel.org Link: https://lore.kernel.org/r/20200902133200.726584153@infradead.org --- arch/x86/kernel/traps.c | 65 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1f66d2d1e998..81a2fb711091 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) +static __always_inline unsigned long debug_read_clear_dr6(void) { - /* - * Disable breakpoints during exception handling; recursive exceptions - * are exceedingly 'fun'. - * - * Since this function is NOKPROBE, and that also applies to - * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a - * HW_BREAKPOINT_W on our stack) - * - * Entry text is excluded for HW_BP_X and cpu_entry_area, which - * includes the entry stack is excluded for everything. - */ - *dr7 = local_db_save(); + unsigned long dr6; /* * The Intel SDM says: @@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) * * Keep it simple: clear DR6 immediately. */ - get_debugreg(*dr6, 6); + get_debugreg(dr6, 6); set_debugreg(0, 6); /* Filter out all the reserved bits which are preset to 1 */ - *dr6 &= ~DR6_RESERVED; -} + dr6 &= ~DR6_RESERVED; -static __always_inline void debug_exit(unsigned long dr7) -{ - local_db_restore(dr7); + return dr6; } /* @@ -863,6 +849,18 @@ out: static __always_inline void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { + /* + * Disable breakpoints during exception handling; recursive exceptions + * are exceedingly 'fun'. + * + * Since this function is NOKPROBE, and that also applies to + * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a + * HW_BREAKPOINT_W on our stack) + * + * Entry text is excluded for HW_BP_X and cpu_entry_area, which + * includes the entry stack is excluded for everything. + */ + unsigned long dr7 = local_db_save(); bool irq_state = idtentry_enter_nmi(regs); instrumentation_begin(); @@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, instrumentation_end(); idtentry_exit_nmi(regs, irq_state); + + local_db_restore(dr7); } static __always_inline void exc_debug_user(struct pt_regs *regs, @@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, */ WARN_ON_ONCE(!user_mode(regs)); + /* + * NB: We can't easily clear DR7 here because + * idtentry_exit_to_usermode() can invoke ptrace, schedule, access + * user memory, etc. This means that a recursive #DB is possible. If + * this happens, that #DB will hit exc_debug_kernel() and clear DR7. + * Since we're not on the IST stack right now, everything will be + * fine. + */ + irqentry_enter_from_user_mode(regs); instrumentation_begin(); @@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); - exc_debug_kernel(regs, dr6); - debug_exit(dr7); + exc_debug_kernel(regs, debug_read_clear_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); - exc_debug_user(regs, dr6); - debug_exit(dr7); + exc_debug_user(regs, debug_read_clear_dr6()); } #else /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); + unsigned long dr6 = debug_read_clear_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); else exc_debug_kernel(regs, dr6); - - debug_exit(dr7); } #endif -- cgit v1.2.3 From 4facb95b7adaf77e2da73aafb9ba60996fe42a12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Sep 2020 01:50:54 +0200 Subject: x86/entry: Unbreak 32bit fast syscall Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d14f5c ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7..2f84c7ca74ea 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); -- cgit v1.2.3 From d3afc7f12987581eb0d1215b518d719fb9d762da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 25 Apr 2020 15:03:47 +0100 Subject: arm64: Allow IPIs to be handled as normal interrupts In order to deal with IPIs as normal interrupts, let's add a new way to register them with the architecture code. set_smp_ipi_range() takes a range of interrupts, and allows the arch code to request them as if the were normal interrupts. A standard handler is then called by the core IRQ code to deal with the IPI. This means that we don't need to call irq_enter/irq_exit, and that we don't need to deal with set_irq_regs either. So let's move the dispatcher into its own function, and leave handle_IPI() as a compatibility function. On the sending side, let's make use of ipi_send_mask, which already exists for this purpose. One of the major difference is that we end up, in some cases (such as when performing IRQ time accounting on the scheduler IPI), end up with nested irq_enter()/irq_exit() pairs. Other than the (relatively small) overhead, there should be no consequences to it (these pairs are designed to nest correctly, and the accounting shouldn't be off). Reviewed-by: Valentin Schneider Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/smp.h | 5 +++ arch/arm64/kernel/smp.c | 93 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 87 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..d0fdbe5fb32f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -106,6 +106,7 @@ config ARM64 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP + select GENERIC_IRQ_IPI select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 0eadbf933e35..57c5db15f6b7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -78,6 +78,11 @@ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); extern void (*__smp_cross_call)(const struct cpumask *, unsigned int); +/* + * Register IPI interrupts with the arch SMP code + */ +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); + /* * Called from the secondary holding pen, this is the secondary CPU entry point. */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 355ee9eed4dd..00c9db1b61b5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -75,6 +75,13 @@ enum ipi_msg_type { IPI_WAKEUP }; +static int ipi_irq_base __read_mostly; +static int nr_ipi __read_mostly = NR_IPI; +static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; + +static void ipi_setup(int cpu); +static void ipi_teardown(int cpu); + #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -237,6 +244,8 @@ asmlinkage notrace void secondary_start_kernel(void) */ notify_cpu_starting(cpu); + ipi_setup(cpu); + store_cpu_topology(cpu); numa_add_cpu(cpu); @@ -302,6 +311,7 @@ int __cpu_disable(void) * and we must not schedule until we're ready to give up the cpu. */ set_cpu_online(cpu, false); + ipi_teardown(cpu); /* * OK - migrate IRQs away from this CPU @@ -890,10 +900,9 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) /* * Main handler for inter-processor interrupts */ -void handle_IPI(int ipinr, struct pt_regs *regs) +static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); - struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { trace_ipi_entry_rcuidle(ipi_types[ipinr]); @@ -906,21 +915,16 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CALL_FUNC: - irq_enter(); generic_smp_call_function_interrupt(); - irq_exit(); break; case IPI_CPU_STOP: - irq_enter(); local_cpu_stop(); - irq_exit(); break; case IPI_CPU_CRASH_STOP: if (IS_ENABLED(CONFIG_KEXEC_CORE)) { - irq_enter(); - ipi_cpu_crash_stop(cpu, regs); + ipi_cpu_crash_stop(cpu, get_irq_regs()); unreachable(); } @@ -928,17 +932,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: - irq_enter(); tick_receive_broadcast(); - irq_exit(); break; #endif #ifdef CONFIG_IRQ_WORK case IPI_IRQ_WORK: - irq_enter(); irq_work_run(); - irq_exit(); break; #endif @@ -957,9 +957,78 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if ((unsigned)ipinr < NR_IPI) trace_ipi_exit_rcuidle(ipi_types[ipinr]); +} + +/* Legacy version, should go away once all irqchips have been converted */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + do_handle_IPI(ipinr); + irq_exit(); + set_irq_regs(old_regs); } +static irqreturn_t ipi_handler(int irq, void *data) +{ + do_handle_IPI(irq - ipi_irq_base); + return IRQ_HANDLED; +} + +static void ipi_send(const struct cpumask *target, unsigned int ipi) +{ + __ipi_send_mask(ipi_desc[ipi], target); +} + +static void ipi_setup(int cpu) +{ + int i; + + if (!ipi_irq_base) + return; + + for (i = 0; i < nr_ipi; i++) + enable_percpu_irq(ipi_irq_base + i, 0); +} + +static void ipi_teardown(int cpu) +{ + int i; + + if (!ipi_irq_base) + return; + + for (i = 0; i < nr_ipi; i++) + disable_percpu_irq(ipi_irq_base + i); +} + +void __init set_smp_ipi_range(int ipi_base, int n) +{ + int i; + + WARN_ON(n < NR_IPI); + nr_ipi = min(n, NR_IPI); + + for (i = 0; i < nr_ipi; i++) { + int err; + + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN_ON(err); + + ipi_desc[i] = irq_to_desc(ipi_base + i); + irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); + } + + ipi_irq_base = ipi_base; + __smp_cross_call = ipi_send; + + /* Setup the boot CPU immediately */ + ipi_setup(smp_processor_id()); +} + void smp_send_reschedule(int cpu) { smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); -- cgit v1.2.3 From 56afcd3dbd1995c526bfbd920cebde6158b22c4a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Jun 2020 20:38:41 +0100 Subject: ARM: Allow IPIs to be handled as normal interrupts In order to deal with IPIs as normal interrupts, let's add a new way to register them with the architecture code. set_smp_ipi_range() takes a range of interrupts, and allows the arch code to request them as if the were normal interrupts. A standard handler is then called by the core IRQ code to deal with the IPI. This means that we don't need to call irq_enter/irq_exit, and that we don't need to deal with set_irq_regs either. So let's move the dispatcher into its own function, and leave handle_IPI() as a compatibility function. On the sending side, let's make use of ipi_send_mask, which already exists for this purpose. One of the major difference is that we end up, in some cases (such as when performing IRQ time accounting on the scheduler IPI), end up with nested irq_enter()/irq_exit() pairs. Other than the (relatively small) overhead, there should be no consequences to it (these pairs are designed to nest correctly, and the accounting shouldn't be off). Reviewed-by: Valentin Schneider Signed-off-by: Marc Zyngier --- arch/arm/Kconfig | 1 + arch/arm/include/asm/smp.h | 5 +++ arch/arm/kernel/smp.c | 99 ++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 89 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e00d94b16658..e67ef15c800f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -49,6 +49,7 @@ config ARM select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_IRQ_IPI if SMP select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index a91f21e3c5b5..0e29730295ca 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -45,6 +45,11 @@ extern void smp_init_cpus(void); */ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); +/* + * Register IPI interrupts with the arch SMP code + */ +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); + /* * Called from platform specific assembly code, this is the * secondary CPU entry point. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5d9da61eff62..f21f78483353 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -69,14 +69,22 @@ enum ipi_msg_type { * CPU_BACKTRACE is special and not included in NR_IPI * or tracable with trace_ipi_* */ - IPI_CPU_BACKTRACE, + IPI_CPU_BACKTRACE = NR_IPI, /* * SGI8-15 can be reserved by secure firmware, and thus may * not be usable by the kernel. Please keep the above limited * to at most 8 entries. */ + MAX_IPI }; +static int ipi_irq_base __read_mostly; +static int nr_ipi __read_mostly = NR_IPI; +static struct irq_desc *ipi_desc[MAX_IPI] __read_mostly; + +static void ipi_setup(int cpu); +static void ipi_teardown(int cpu); + static DECLARE_COMPLETION(cpu_running); static struct smp_operations smp_ops __ro_after_init; @@ -247,6 +255,7 @@ int __cpu_disable(void) * and we must not schedule until we're ready to give up the cpu. */ set_cpu_online(cpu, false); + ipi_teardown(cpu); /* * OK - migrate IRQs away from this CPU @@ -422,6 +431,8 @@ asmlinkage void secondary_start_kernel(void) notify_cpu_starting(cpu); + ipi_setup(cpu); + calibrate_delay(); smp_store_cpu_info(cpu); @@ -627,10 +638,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) handle_IPI(ipinr, regs); } -void handle_IPI(int ipinr, struct pt_regs *regs) +static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); - struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { trace_ipi_entry_rcuidle(ipi_types[ipinr]); @@ -643,9 +653,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: - irq_enter(); tick_receive_broadcast(); - irq_exit(); break; #endif @@ -654,36 +662,26 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CALL_FUNC: - irq_enter(); generic_smp_call_function_interrupt(); - irq_exit(); break; case IPI_CPU_STOP: - irq_enter(); ipi_cpu_stop(cpu); - irq_exit(); break; #ifdef CONFIG_IRQ_WORK case IPI_IRQ_WORK: - irq_enter(); irq_work_run(); - irq_exit(); break; #endif case IPI_COMPLETION: - irq_enter(); ipi_complete(cpu); - irq_exit(); break; case IPI_CPU_BACKTRACE: printk_nmi_enter(); - irq_enter(); - nmi_cpu_backtrace(regs); - irq_exit(); + nmi_cpu_backtrace(get_irq_regs()); printk_nmi_exit(); break; @@ -695,9 +693,78 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if ((unsigned)ipinr < NR_IPI) trace_ipi_exit_rcuidle(ipi_types[ipinr]); +} + +/* Legacy version, should go away once all irqchips have been converted */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + do_handle_IPI(ipinr); + irq_exit(); + set_irq_regs(old_regs); } +static irqreturn_t ipi_handler(int irq, void *data) +{ + do_handle_IPI(irq - ipi_irq_base); + return IRQ_HANDLED; +} + +static void ipi_send(const struct cpumask *target, unsigned int ipi) +{ + __ipi_send_mask(ipi_desc[ipi], target); +} + +static void ipi_setup(int cpu) +{ + int i; + + if (!ipi_irq_base) + return; + + for (i = 0; i < nr_ipi; i++) + enable_percpu_irq(ipi_irq_base + i, 0); +} + +static void ipi_teardown(int cpu) +{ + int i; + + if (!ipi_irq_base) + return; + + for (i = 0; i < nr_ipi; i++) + disable_percpu_irq(ipi_irq_base + i); +} + +void __init set_smp_ipi_range(int ipi_base, int n) +{ + int i; + + WARN_ON(n < MAX_IPI); + nr_ipi = min(n, MAX_IPI); + + for (i = 0; i < nr_ipi; i++) { + int err; + + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN_ON(err); + + ipi_desc[i] = irq_to_desc(ipi_base + i); + irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); + } + + ipi_irq_base = ipi_base; + set_smp_cross_call(ipi_send); + + /* Setup the boot CPU immediately */ + ipi_setup(smp_processor_id()); +} + void smp_send_reschedule(int cpu) { smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); -- cgit v1.2.3 From 336780590990efa69596884114cad3f517b6333b Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Sat, 12 Sep 2020 16:37:07 +0100 Subject: irqchip/gic-v3: Support pseudo-NMIs when SCR_EL3.FIQ == 0 The GIC's internal view of the priority mask register and the assigned interrupt priorities are based on whether GIC security is enabled and whether firmware routes Group 0 interrupts to EL3. At the moment, we support priority masking when ICC_PMR_EL1 and interrupt priorities are either both modified by the GIC, or both left unchanged. Trusted Firmware-A's default interrupt routing model allows Group 0 interrupts to be delivered to the non-secure world (SCR_EL3.FIQ == 0). Unfortunately, this is precisely the case that the GIC driver doesn't support: ICC_PMR_EL1 remains unchanged, but the GIC's view of interrupt priorities is different from the software programmed values. Support pseudo-NMIs when SCR_EL3.FIQ == 0 by using a different value to mask regular interrupts. All the other values remain the same. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200912153707.667731-3-alexandru.elisei@arm.com --- arch/arm64/include/asm/arch_gicv3.h | 8 +++++++- arch/arm64/include/asm/ptrace.h | 14 +++++++++++++- arch/arm64/kernel/image-vars.h | 2 ++ 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 6647ae4f0231..880b9054d75c 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -153,7 +153,7 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | + BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET)); BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); /* @@ -162,6 +162,12 @@ static inline void gic_pmr_mask_irqs(void) * are applied to IRQ priorities */ BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON); + /* + * Same situation as above, but now we make sure that we can mask + * regular interrupts. + */ + BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS | + GIC_PRIO_PSR_I_SET)); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 966ed30ed5f7..997cf8c8cd52 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -31,9 +31,21 @@ * interrupt disabling temporarily does not rely on IRQ priorities. */ #define GIC_PRIO_IRQON 0xe0 -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define __GIC_PRIO_IRQOFF_NS 0xa0 #define GIC_PRIO_PSR_I_SET (1 << 4) +#define GIC_PRIO_IRQOFF \ + ({ \ + extern struct static_key_false gic_nonsecure_priorities;\ + u8 __prio = __GIC_PRIO_IRQOFF; \ + \ + if (static_branch_unlikely(&gic_nonsecure_priorities)) \ + __prio = __GIC_PRIO_IRQOFF_NS; \ + \ + __prio; \ + }) + /* Additional SPSR bits not exposed in the UABI */ #define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 8982b68289b7..98a5215c1598 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -101,6 +101,8 @@ KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); /* Static key checked in pmr_sync(). */ #ifdef CONFIG_ARM64_PSEUDO_NMI KVM_NVHE_ALIAS(gic_pmr_sync); +/* Static key checked in GIC_PRIO_IRQOFF. */ +KVM_NVHE_ALIAS(gic_nonsecure_priorities); #endif /* EL2 exception handling */ -- cgit v1.2.3 From 5cebfd2d47c214f69d918e3d34ad183c061eddb2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 9 May 2020 14:00:23 +0100 Subject: arm64: Kill __smp_cross_call and co The old IPI registration interface is now unused on arm64, so let's get rid of it. Reviewed-by: Valentin Schneider Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/irq_work.h | 4 +--- arch/arm64/include/asm/smp.h | 12 ------------ arch/arm64/kernel/smp.c | 38 +++++++------------------------------- 3 files changed, 8 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 8a1ef1907760..a1020285ea75 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,11 +2,9 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -#include - static inline bool arch_irq_work_has_interrupt(void) { - return !!__smp_cross_call; + return true; } #endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 57c5db15f6b7..c298ad02252e 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -60,24 +60,12 @@ struct seq_file; */ extern void show_ipi_list(struct seq_file *p, int prec); -/* - * Called from C code, this handles an IPI. - */ -extern void handle_IPI(int ipinr, struct pt_regs *regs); - /* * Discover the set of possible CPUs and determine their * SMP operations. */ extern void smp_init_cpus(void); -/* - * Provide a function to raise an IPI cross call on CPUs in callmap. - */ -extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); - -extern void (*__smp_cross_call)(const struct cpumask *, unsigned int); - /* * Register IPI interrupts with the arch SMP code */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 00c9db1b61b5..58fb155fb0ab 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -782,13 +782,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -void (*__smp_cross_call)(const struct cpumask *, unsigned int); - -void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) -{ - __smp_cross_call = fn; -} - static const char *ipi_types[NR_IPI] __tracepoint_string = { #define S(x,s) [x] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), @@ -800,11 +793,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_WAKEUP, "CPU wake-up interrupts"), }; -static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) -{ - trace_ipi_raise(target, ipi_types[ipinr]); - __smp_cross_call(target, ipinr); -} +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); void show_ipi_list(struct seq_file *p, int prec) { @@ -851,8 +840,7 @@ void arch_send_wakeup_ipi_mask(const struct cpumask *mask) #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { - if (__smp_cross_call) - smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); } #endif @@ -959,34 +947,23 @@ static void do_handle_IPI(int ipinr) trace_ipi_exit_rcuidle(ipi_types[ipinr]); } -/* Legacy version, should go away once all irqchips have been converted */ -void handle_IPI(int ipinr, struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - do_handle_IPI(ipinr); - irq_exit(); - - set_irq_regs(old_regs); -} - static irqreturn_t ipi_handler(int irq, void *data) { do_handle_IPI(irq - ipi_irq_base); return IRQ_HANDLED; } -static void ipi_send(const struct cpumask *target, unsigned int ipi) +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - __ipi_send_mask(ipi_desc[ipi], target); + trace_ipi_raise(target, ipi_types[ipinr]); + __ipi_send_mask(ipi_desc[ipinr], target); } static void ipi_setup(int cpu) { int i; - if (!ipi_irq_base) + if (WARN_ON_ONCE(!ipi_irq_base)) return; for (i = 0; i < nr_ipi; i++) @@ -997,7 +974,7 @@ static void ipi_teardown(int cpu) { int i; - if (!ipi_irq_base) + if (WARN_ON_ONCE(!ipi_irq_base)) return; for (i = 0; i < nr_ipi; i++) @@ -1023,7 +1000,6 @@ void __init set_smp_ipi_range(int ipi_base, int n) } ipi_irq_base = ipi_base; - __smp_cross_call = ipi_send; /* Setup the boot CPU immediately */ ipi_setup(smp_processor_id()); -- cgit v1.2.3 From a263881525310e10ecd46ae8e8531ac9e968b1b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 20 Jun 2020 17:19:00 +0100 Subject: arm64: Remove custom IRQ stat accounting Let's switch the arm64 code to the core accounting, which already does everything we need. Reviewed-by: Valentin Schneider Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/hardirq.h | 9 --------- arch/arm64/include/asm/smp.h | 5 ----- arch/arm64/kernel/irq.c | 11 +---------- arch/arm64/kernel/smp.c | 30 ++++++++++++------------------ 4 files changed, 13 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 985493af704b..5ffa4bacdad3 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -13,21 +13,12 @@ #include #include -#define NR_IPI 7 - typedef struct { unsigned int __softirq_pending; - unsigned int ipi_irqs[NR_IPI]; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ -#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ -#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) - -u64 smp_irq_stat_cpu(unsigned int cpu); -#define arch_irq_stat_cpu smp_irq_stat_cpu - #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 struct nmi_ctx { diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index c298ad02252e..2e7f529ec5a6 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -55,11 +55,6 @@ static inline void set_cpu_logical_map(int cpu, u64 hwid) struct seq_file; -/* - * generate IPI list text - */ -extern void show_ipi_list(struct seq_file *p, int prec); - /* * Discover the set of possible CPUs and determine their * SMP operations. diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 04a327ccf84d..9cf2fb87584a 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -10,10 +10,10 @@ * Copyright (C) 2012 ARM Ltd. */ -#include #include #include #include +#include #include #include #include @@ -22,20 +22,11 @@ #include #include -unsigned long irq_err_count; - /* Only access this in an NMI enter/exit */ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); -int arch_show_interrupts(struct seq_file *p, int prec) -{ - show_ipi_list(p, prec); - seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); - return 0; -} - #ifdef CONFIG_VMAP_STACK static void init_irq_stacks(void) { diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 58fb155fb0ab..b6bde2675ccc 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -72,7 +73,8 @@ enum ipi_msg_type { IPI_CPU_CRASH_STOP, IPI_TIMER, IPI_IRQ_WORK, - IPI_WAKEUP + IPI_WAKEUP, + NR_IPI }; static int ipi_irq_base __read_mostly; @@ -795,29 +797,23 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); -void show_ipi_list(struct seq_file *p, int prec) +unsigned long irq_err_count; + +int arch_show_interrupts(struct seq_file *p, int prec) { unsigned int cpu, i; for (i = 0; i < NR_IPI; i++) { + unsigned int irq = irq_desc_get_irq(ipi_desc[i]); seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", - __get_irq_stat(cpu, ipi_irqs[i])); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_printf(p, " %s\n", ipi_types[i]); } -} - -u64 smp_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = 0; - int i; - for (i = 0; i < NR_IPI; i++) - sum += __get_irq_stat(cpu, ipi_irqs[i]); - - return sum; + seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); + return 0; } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -892,10 +888,8 @@ static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); - if ((unsigned)ipinr < NR_IPI) { + if ((unsigned)ipinr < NR_IPI) trace_ipi_entry_rcuidle(ipi_types[ipinr]); - __inc_irq_stat(cpu, ipi_irqs[ipinr]); - } switch (ipinr) { case IPI_RESCHEDULE: @@ -992,7 +986,7 @@ void __init set_smp_ipi_range(int ipi_base, int n) int err; err = request_percpu_irq(ipi_base + i, ipi_handler, - "IPI", &irq_stat); + "IPI", &cpu_number); WARN_ON(err); ipi_desc[i] = irq_to_desc(ipi_base + i); -- cgit v1.2.3 From 8aa837cb7a032884c787b15de81f7d9de8af0869 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Jun 2020 22:15:54 +0100 Subject: ARM: Kill __smp_cross_call and co The old IPI registration interface is now unused on arm, so let's get rid of it. Reviewed-by: Valentin Schneider Signed-off-by: Marc Zyngier --- arch/arm/include/asm/smp.h | 6 ------ arch/arm/kernel/smp.c | 26 +++++++------------------- 2 files changed, 7 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 0e29730295ca..0ca55a607d0a 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -39,12 +39,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs); */ extern void smp_init_cpus(void); - -/* - * Provide a function to raise an IPI cross call on CPUs in callmap. - */ -extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); - /* * Register IPI interrupts with the arch SMP code */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index f21f78483353..d51e64955a26 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -511,14 +511,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -static void (*__smp_cross_call)(const struct cpumask *, unsigned int); - -void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) -{ - if (!__smp_cross_call) - __smp_cross_call = fn; -} - static const char *ipi_types[NR_IPI] __tracepoint_string = { #define S(x,s) [x] = s S(IPI_WAKEUP, "CPU wakeup interrupts"), @@ -530,11 +522,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_COMPLETION, "completion interrupts"), }; -static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) -{ - trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); - __smp_cross_call(target, ipinr); -} +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); void show_ipi_list(struct seq_file *p, int prec) { @@ -713,16 +701,17 @@ static irqreturn_t ipi_handler(int irq, void *data) return IRQ_HANDLED; } -static void ipi_send(const struct cpumask *target, unsigned int ipi) +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - __ipi_send_mask(ipi_desc[ipi], target); + trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); + __ipi_send_mask(ipi_desc[ipinr], target); } static void ipi_setup(int cpu) { int i; - if (!ipi_irq_base) + if (WARN_ON_ONCE(!ipi_irq_base)) return; for (i = 0; i < nr_ipi; i++) @@ -733,7 +722,7 @@ static void ipi_teardown(int cpu) { int i; - if (!ipi_irq_base) + if (WARN_ON_ONCE(!ipi_irq_base)) return; for (i = 0; i < nr_ipi; i++) @@ -759,7 +748,6 @@ void __init set_smp_ipi_range(int ipi_base, int n) } ipi_irq_base = ipi_base; - set_smp_cross_call(ipi_send); /* Setup the boot CPU immediately */ ipi_setup(smp_processor_id()); @@ -872,7 +860,7 @@ core_initcall(register_cpufreq_notifier); static void raise_nmi(cpumask_t *mask) { - __smp_cross_call(mask, IPI_CPU_BACKTRACE); + __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) -- cgit v1.2.3 From 5ebf353af22c89d18964bb3b877a95200dfe07b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Jun 2020 21:15:00 +0100 Subject: ARM: Remove custom IRQ stat accounting Let's switch the arm code to the core accounting, which already does everything we need. Reviewed-by: Valentin Schneider Signed-off-by: Marc Zyngier --- arch/arm/include/asm/hardirq.h | 17 ----------------- arch/arm/kernel/irq.c | 1 - arch/arm/kernel/smp.c | 21 +++++---------------- 3 files changed, 5 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 7a88f160b1fb..b95848ed2bc7 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -6,29 +6,12 @@ #include #include -/* number of IPIS _not_ including IPI_CPU_BACKTRACE */ -#define NR_IPI 7 - typedef struct { unsigned int __softirq_pending; -#ifdef CONFIG_SMP - unsigned int ipi_irqs[NR_IPI]; -#endif } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ -#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ -#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) - -#ifdef CONFIG_SMP -u64 smp_irq_stat_cpu(unsigned int cpu); -#else -#define smp_irq_stat_cpu(cpu) 0 -#endif - -#define arch_irq_stat_cpu smp_irq_stat_cpu - #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ee514034c0a1..698b6f636156 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -18,7 +18,6 @@ * IRQ's are in fact implemented a bit like signal handlers for the kernel. * Naturally it's not a 1:1 relation, but there are similarities. */ -#include #include #include #include diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index d51e64955a26..00327fa74b01 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -65,6 +66,7 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, + NR_IPI, /* * CPU_BACKTRACE is special and not included in NR_IPI * or tracable with trace_ipi_* @@ -529,27 +531,16 @@ void show_ipi_list(struct seq_file *p, int prec) unsigned int cpu, i; for (i = 0; i < NR_IPI; i++) { + unsigned int irq = irq_desc_get_irq(ipi_desc[i]); seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", - __get_irq_stat(cpu, ipi_irqs[i])); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_printf(p, " %s\n", ipi_types[i]); } } -u64 smp_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = 0; - int i; - - for (i = 0; i < NR_IPI; i++) - sum += __get_irq_stat(cpu, ipi_irqs[i]); - - return sum; -} - void arch_send_call_function_ipi_mask(const struct cpumask *mask) { smp_cross_call(mask, IPI_CALL_FUNC); @@ -630,10 +621,8 @@ static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); - if ((unsigned)ipinr < NR_IPI) { + if ((unsigned)ipinr < NR_IPI) trace_ipi_entry_rcuidle(ipi_types[ipinr]); - __inc_irq_stat(cpu, ipi_irqs[ipinr]); - } switch (ipinr) { case IPI_WAKEUP: -- cgit v1.2.3 From 9d9edb962e910552c9c008800ec907293a47852e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Sep 2020 20:33:18 +0800 Subject: arm64: Fix -Wunused-function warning when !CONFIG_HOTPLUG_CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_HOTPLUG_CPU is n, gcc warns: arch/arm64/kernel/smp.c:967:13: warning: ‘ipi_teardown’ defined but not used [-Wunused-function] static void ipi_teardown(int cpu) ^~~~~~~~~~~~ Use #ifdef guard this. Signed-off-by: YueHaibing Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200918123318.23764-1-yuehaibing@huawei.com --- arch/arm64/kernel/smp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b6bde2675ccc..82e75fc2c903 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -82,9 +82,9 @@ static int nr_ipi __read_mostly = NR_IPI; static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; static void ipi_setup(int cpu); -static void ipi_teardown(int cpu); #ifdef CONFIG_HOTPLUG_CPU +static void ipi_teardown(int cpu); static int op_cpu_kill(unsigned int cpu); #else static inline int op_cpu_kill(unsigned int cpu) @@ -964,6 +964,7 @@ static void ipi_setup(int cpu) enable_percpu_irq(ipi_irq_base + i, 0); } +#ifdef CONFIG_HOTPLUG_CPU static void ipi_teardown(int cpu) { int i; @@ -974,6 +975,7 @@ static void ipi_teardown(int cpu) for (i = 0; i < nr_ipi; i++) disable_percpu_irq(ipi_irq_base + i); } +#endif void __init set_smp_ipi_range(int ipi_base, int n) { -- cgit v1.2.3 From ac15a54e03d13686d2fc016a88311801b0734046 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Sep 2020 17:19:46 +0100 Subject: arm: Move ipi_teardown() to a CONFIG_HOTPLUG_CPU section ipi_teardown() is only used when CONFIG_HOTPLUG_CPU is enabled. Move the function to a location guarded by this config option. Signed-off-by: Marc Zyngier --- arch/arm/kernel/smp.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 00327fa74b01..8425da517984 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -85,7 +85,6 @@ static int nr_ipi __read_mostly = NR_IPI; static struct irq_desc *ipi_desc[MAX_IPI] __read_mostly; static void ipi_setup(int cpu); -static void ipi_teardown(int cpu); static DECLARE_COMPLETION(cpu_running); @@ -236,6 +235,17 @@ int platform_can_hotplug_cpu(unsigned int cpu) return cpu != 0; } +static void ipi_teardown(int cpu) +{ + int i; + + if (WARN_ON_ONCE(!ipi_irq_base)) + return; + + for (i = 0; i < nr_ipi; i++) + disable_percpu_irq(ipi_irq_base + i); +} + /* * __cpu_disable runs on the processor to be shutdown. */ @@ -707,17 +717,6 @@ static void ipi_setup(int cpu) enable_percpu_irq(ipi_irq_base + i, 0); } -static void ipi_teardown(int cpu) -{ - int i; - - if (WARN_ON_ONCE(!ipi_irq_base)) - return; - - for (i = 0; i < nr_ipi; i++) - disable_percpu_irq(ipi_irq_base + i); -} - void __init set_smp_ipi_range(int ipi_base, int n) { int i; -- cgit v1.2.3 From 220387048d859896ccc362c0ebf9bc1e0fa62eb9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 25 Sep 2020 16:22:00 +0100 Subject: ARM: Handle no IPI being registered in show_ipi_list() As SMP-on-UP is a valid configuration on 32bit ARM, do not assume that IPIs are populated in show_ipi_list(). Reported-by: Guillaume Tucker Reported-by: kernelci.org bot Tested-by: Guillaume Tucker Signed-off-by: Marc Zyngier --- arch/arm/kernel/smp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8425da517984..48099c6e1e4a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -541,7 +541,12 @@ void show_ipi_list(struct seq_file *p, int prec) unsigned int cpu, i; for (i = 0; i < NR_IPI; i++) { - unsigned int irq = irq_desc_get_irq(ipi_desc[i]); + unsigned int irq; + + if (!ipi_desc[i]) + continue; + + irq = irq_desc_get_irq(ipi_desc[i]); seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); for_each_online_cpu(cpu) -- cgit v1.2.3