From b3d52556794c0ab021667b54551685d812d500ca Mon Sep 17 00:00:00 2001 From: Chris Cole Date: Fri, 23 Nov 2018 12:20:45 +0100 Subject: ARM: 8814/1: mm: improve/fix ARM v7_dma_inv_range() unaligned address handling [ Upstream commit a1208f6a822ac29933e772ef1f637c5d67838da9 ] This patch addresses possible memory corruption when v7_dma_inv_range(start_address, end_address) address parameters are not aligned to whole cache lines. This function issues "invalidate" cache management operations to all cache lines from start_address (inclusive) to end_address (exclusive). When start_address and/or end_address are not aligned, the start and/or end cache lines are first issued "clean & invalidate" operation. The assumption is this is done to ensure that any dirty data addresses outside the address range (but part of the first or last cache lines) are cleaned/flushed so that data is not lost, which could happen if just an invalidate is issued. The problem is that these first/last partial cache lines are issued "clean & invalidate" and then "invalidate". This second "invalidate" is not required and worse can cause "lost" writes to addresses outside the address range but part of the cache line. If another component writes to its part of the cache line between the "clean & invalidate" and "invalidate" operations, the write can get lost. This fix is to remove the extra "invalidate" operation when unaligned addressed are used. A kernel module is available that has a stress test to reproduce the issue and a unit test of the updated v7_dma_inv_range(). It can be downloaded from http://ftp.sageembedded.com/outgoing/linux/cache-test-20181107.tgz. v7_dma_inv_range() is call by dmac_[un]map_area(addr, len, direction) when the direction is DMA_FROM_DEVICE. One can (I believe) successfully argue that DMA from a device to main memory should use buffers aligned to cache line size, because the "clean & invalidate" might overwrite data that the device just wrote using DMA. But if a driver does use unaligned buffers, at least this fix will prevent memory corruption outside the buffer. Signed-off-by: Chris Cole Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/cache-v7.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 215df435bfb9..2149b47a0c5a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -360,14 +360,16 @@ v7_dma_inv_range: ALT_UP(W(nop)) #endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 tst r1, r3 bic r1, r1, r3 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line -1: - mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line - add r0, r0, r2 cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr -- cgit v1.2.3 From 5cb9667104e8de4d0a7ae0a6e5647d7b9055cb94 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 23 Nov 2018 12:25:21 +0100 Subject: ARM: 8815/1: V7M: align v7m_dma_inv_range() with v7 counterpart [ Upstream commit 3d0358d0ba048c5afb1385787aaec8fa5ad78fcc ] Chris has discovered and reported that v7_dma_inv_range() may corrupt memory if address range is not aligned to cache line size. Since the whole cache-v7m.S was lifted form cache-v7.S the same observation applies to v7m_dma_inv_range(). So the fix just mirrors what has been done for v7 with a little specific of M-class. Cc: Chris Cole Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/cache-v7m.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 788486e830d3..32aa2a2aa260 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -73,9 +73,11 @@ /* * dcimvac: Invalidate data cache line by MVA to PoC */ -.macro dcimvac, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dcimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c .endm +.endr /* * dccmvau: Clean data cache line by MVA to PoU @@ -369,14 +371,16 @@ v7m_dma_inv_range: tst r0, r3 bic r0, r0, r3 dccimvacne r0, r3 + addne r0, r0, r2 subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac tst r1, r3 bic r1, r1, r3 dccimvacne r1, r3 -1: - dcimvac r0, r3 - add r0, r0, r2 cmp r0, r1 +1: + dcimvaclo r0, r3 + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr -- cgit v1.2.3 From 80eaec9b94bccbd752b41523e160b63a4711a7f2 Mon Sep 17 00:00:00 2001 From: Nathan Jones Date: Tue, 4 Dec 2018 10:05:32 +0100 Subject: ARM: 8816/1: dma-mapping: fix potential uninitialized return [ Upstream commit c2a3831df6dc164af66d8d86cf356a90c021b86f ] While trying to use the dma_mmap_*() interface, it was noticed that this interface returns strange values when passed an incorrect length. If neither of the if() statements fire then the return value is uninitialized. In the worst case it returns 0 which means the caller will think the function succeeded. Fixes: 1655cf8829d8 ("ARM: dma-mapping: Remove traces of NOMMU code") Signed-off-by: Nathan Jones Reviewed-by: Robin Murphy Acked-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 66566472c153..1cb9c0f9b5d6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -830,7 +830,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - int ret; + int ret = -ENXIO; unsigned long nr_vma_pages = vma_pages(vma); unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); -- cgit v1.2.3 From 4645c6ccbd7a6685f2c5629092e73662fc5ec370 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 16:32:21 -0500 Subject: ARM: spectre-v2: per-CPU vtables to work around big.Little systems Commit 383fb3ee8024d596f488d2dbaf45e572897acbdb upstream. In big.Little systems, some CPUs require the Spectre workarounds in paths such as the context switch, but other CPUs do not. In order to handle these differences, we need per-CPU vtables. We are unable to use the kernel's per-CPU variables to support this as per-CPU is not initialised at times when we need access to the vtables, so we have to use an array indexed by logical CPU number. We use an array-of-pointers to avoid having function pointers in the kernel's read/write .data section. Reviewed-by: Julien Thierry Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7-bugs.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a..9a07916af8dd 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break; @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void) -- cgit v1.2.3 From e7a8dabe92f1c59bb5f992e4d40bb88dbedd9baf Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 16:32:22 -0500 Subject: ARM: ensure that processor vtables is not lost after boot Commit 3a4d0c2172bcf15b7a3d9d498b2b355f9864286b upstream. Marek Szyprowski reported problems with CPU hotplug in current kernels. This was tracked down to the processor vtables being located in an init section, and therefore discarded after kernel boot, despite being required after boot to properly initialise the non-boot CPUs. Arrange for these tables to end up in .rodata when required. Reported-by: Marek Szyprowski Tested-by: Krzysztof Kozlowski Fixes: 383fb3ee8024 ("ARM: spectre-v2: per-CPU vtables to work around big.Little systems") Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/mm/proc-macros.S | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 81d0efb055c6..19516fbc2c55 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -274,6 +274,13 @@ .endm .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 +/* + * If we are building for big.Little with branch predictor hardening, + * we need the processor function tables to remain available after boot. + */ +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .section ".rodata" +#endif .type \name\()_processor_functions, #object .align 2 ENTRY(\name\()_processor_functions) @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions) .endif .size \name\()_processor_functions, . - \name\()_processor_functions +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .previous +#endif .endm .macro define_cache_functions name:req -- cgit v1.2.3 From bdd8fd8a2efe3be7c67d38568f65a3e88a93cb59 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Feb 2019 16:32:23 -0500 Subject: ARM: fix the cockup in the previous patch Commit d6951f582cc50ba0ad22ef46b599740966599b14 upstream. The intention in the previous patch was to only place the processor tables in the .rodata section if big.Little was being built and we wanted the branch target hardening, but instead (due to the way it was tested) it ended up always placing the tables into the .rodata section. Although harmless, let's correct this anyway. Fixes: 3a4d0c2172bc ("ARM: ensure that processor vtables is not lost after boot") Signed-off-by: Russell King Signed-off-by: David A. Long Reviewed-by: Julien Thierry Tested-by: Julien Thierry Signed-off-by: Sasha Levin --- arch/arm/mm/proc-macros.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 19516fbc2c55..5461d589a1e2 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -278,7 +278,7 @@ * If we are building for big.Little with branch predictor hardening, * we need the processor function tables to remain available after boot. */ -#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) .section ".rodata" #endif .type \name\()_processor_functions, #object @@ -316,7 +316,7 @@ ENTRY(\name\()_processor_functions) .endif .size \name\()_processor_functions, . - \name\()_processor_functions -#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) .previous #endif .endm -- cgit v1.2.3 From 84657a1ba9bd5c9f9c94e8d857ae57dcb5abbcce Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 6 Feb 2019 18:43:24 +0100 Subject: ARM: 8835/1: dma-mapping: Clear DMA ops on teardown [ Upstream commit fc67e6f120a388b611d94cc40baf99a5cc56b283 ] Installing the appropriate non-IOMMU DMA ops in arm_iommu_detch_device() serves the case where IOMMU-aware drivers choose to control their own mapping but still make DMA API calls, however it also affects the case when the arch code itself tears down the mapping upon driver unbinding, where the ops now get left in place and can inhibit arch_setup_dma_ops() on subsequent re-probe attempts. Fix the latter case by making sure that arch_teardown_dma_ops() cleans up whenever the ops were automatically installed by its counterpart. Reported-by: Tobias Jakobi Reported-by: Marek Szyprowski Fixes: 1874619a7df4 "ARM: dma-mapping: Set proper DMA ops in arm_iommu_detach_device()" Tested-by: Tobias Jakobi Tested-by: Thierry Reding Signed-off-by: Robin Murphy Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1cb9c0f9b5d6..8211cf45ece1 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2400,4 +2400,6 @@ void arch_teardown_dma_ops(struct device *dev) return; arm_teardown_iommu_dma_ops(dev); + /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ + set_dma_ops(dev, NULL); } -- cgit v1.2.3 From d8945878ded6e5850b59e101bcc9fe4167ab7fb1 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 25 Jan 2019 15:18:37 +0100 Subject: ARM: 8830/1: NOMMU: Toggle only bits in EXC_RETURN we are really care of [ Upstream commit 72cd4064fccaae15ab84d40d4be23667402df4ed ] ARMv8M introduces support for Security extension to M class, among other things it affects exception handling, especially, encoding of EXC_RETURN. The new bits have been added: Bit [6] Secure or Non-secure stack Bit [5] Default callee register stacking Bit [0] Exception Secure which conflicts with hard-coded value of EXC_RETURN: In fact, we only care of few bits: Bit [3] Mode (0 - Handler, 1 - Thread) Bit [2] Stack pointer selection (0 - Main, 1 - Process) We can toggle only those bits and left other bits as they were on exception entry. It is basically, what patch does - saves EXC_RETURN when we do transition form Thread to Handler mode (it is first svc), so later saved value is used instead of EXC_RET_THREADMODE_PROCESSSTACK. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7m.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 47a5acc64433..92e84181933a 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -139,6 +139,9 @@ __v7m_setup_cont: cpsie i svc #0 1: cpsid i + ldr r0, =exc_ret + orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK + str lr, [r0] ldmia sp, {r0-r3, r12} str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR -- cgit v1.2.3