summaryrefslogtreecommitdiff
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/Kconfig123
-rw-r--r--arch/arm/mm/alignment.c6
-rw-r--r--arch/arm/mm/cache-fa.S3
-rw-r--r--arch/arm/mm/cache-l2x0.c10
-rw-r--r--arch/arm/mm/cache-tauros2.c83
-rw-r--r--arch/arm/mm/cache-v3.S3
-rw-r--r--arch/arm/mm/cache-v4.S3
-rw-r--r--arch/arm/mm/cache-v4wb.S3
-rw-r--r--arch/arm/mm/cache-v4wt.S3
-rw-r--r--arch/arm/mm/cache-v6.S3
-rw-r--r--arch/arm/mm/cache-v7.S51
-rw-r--r--arch/arm/mm/context.c7
-rw-r--r--arch/arm/mm/dma-mapping.c380
-rw-r--r--arch/arm/mm/fault-armv.c3
-rw-r--r--arch/arm/mm/fault.c1
-rw-r--r--arch/arm/mm/flush.c3
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm/mm/ioremap.c15
-rw-r--r--arch/arm/mm/mm.h3
-rw-r--r--arch/arm/mm/mmu.c71
-rw-r--r--arch/arm/mm/proc-arm1020.S3
-rw-r--r--arch/arm/mm/proc-arm1020e.S3
-rw-r--r--arch/arm/mm/proc-arm1022.S3
-rw-r--r--arch/arm/mm/proc-arm1026.S3
-rw-r--r--arch/arm/mm/proc-arm920.S3
-rw-r--r--arch/arm/mm/proc-arm922.S3
-rw-r--r--arch/arm/mm/proc-arm925.S3
-rw-r--r--arch/arm/mm/proc-arm926.S3
-rw-r--r--arch/arm/mm/proc-arm940.S3
-rw-r--r--arch/arm/mm/proc-arm946.S3
-rw-r--r--arch/arm/mm/proc-feroceon.S4
-rw-r--r--arch/arm/mm/proc-macros.S1
-rw-r--r--arch/arm/mm/proc-mohawk.S3
-rw-r--r--arch/arm/mm/proc-v7.S2
-rw-r--r--arch/arm/mm/proc-xsc3.S3
-rw-r--r--arch/arm/mm/proc-xscale.S4
36 files changed, 636 insertions, 187 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 101b9681c08c..94186b6c685f 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -10,8 +10,8 @@ config CPU_ARM7TDMI
depends on !MMU
select CPU_32v4T
select CPU_ABRT_LV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4
+ select CPU_PABRT_LEGACY
help
A 32-bit RISC microprocessor based on the ARM7 processor core
which has no memory control unit and cache.
@@ -24,11 +24,11 @@ config CPU_ARM720T
bool "Support ARM720T processor" if ARCH_INTEGRATOR
select CPU_32v4T
select CPU_ABRT_LV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WT if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WT if MMU
help
A 32-bit RISC processor with 8kByte Cache, Write Buffer and
@@ -43,9 +43,9 @@ config CPU_ARM740T
depends on !MMU
select CPU_32v4T
select CPU_ABRT_LV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V3 # although the core is v4t
select CPU_CP15_MPU
+ select CPU_PABRT_LEGACY
help
A 32-bit RISC processor with 8KB cache or 4KB variants,
write buffer and MPU(Protection Unit) built around
@@ -60,8 +60,8 @@ config CPU_ARM9TDMI
depends on !MMU
select CPU_32v4T
select CPU_ABRT_NOMMU
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4
+ select CPU_PABRT_LEGACY
help
A 32-bit RISC microprocessor based on the ARM9 processor core
which has no memory control unit and cache.
@@ -74,11 +74,11 @@ config CPU_ARM920T
bool "Support ARM920T processor" if ARCH_INTEGRATOR
select CPU_32v4T
select CPU_ABRT_EV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM920T is licensed to be produced by numerous vendors,
@@ -92,11 +92,11 @@ config CPU_ARM922T
bool "Support ARM922T processor" if ARCH_INTEGRATOR
select CPU_32v4T
select CPU_ABRT_EV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM922T is a version of the ARM920T, but with smaller
@@ -111,11 +111,11 @@ config CPU_ARM925T
bool "Support ARM925T processor" if ARCH_OMAP1
select CPU_32v4T
select CPU_ABRT_EV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM925T is a mix between the ARM920T and ARM926T, but with
@@ -130,10 +130,10 @@ config CPU_ARM926T
bool "Support ARM926T processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB
select CPU_32v5
select CPU_ABRT_EV5TJ
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
This is a variant of the ARM920. It has slightly different
@@ -148,11 +148,11 @@ config CPU_FA526
bool
select CPU_32v4
select CPU_ABRT_EV4
- select CPU_PABRT_LEGACY
- select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_CACHE_FA
+ select CPU_CACHE_VIVT
select CPU_COPY_FA if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_FA if MMU
help
The FA526 is a version of the ARMv4 compatible processor with
@@ -167,9 +167,9 @@ config CPU_ARM940T
depends on !MMU
select CPU_32v4T
select CPU_ABRT_NOMMU
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
select CPU_CP15_MPU
+ select CPU_PABRT_LEGACY
help
ARM940T is a member of the ARM9TDMI family of general-
purpose microprocessors with MPU and separate 4KB
@@ -185,9 +185,9 @@ config CPU_ARM946E
depends on !MMU
select CPU_32v5
select CPU_ABRT_NOMMU
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
select CPU_CP15_MPU
+ select CPU_PABRT_LEGACY
help
ARM946E-S is a member of the ARM9E-S family of high-
performance, 32-bit system-on-chip processor solutions.
@@ -201,11 +201,11 @@ config CPU_ARM1020
bool "Support ARM1020T (rev 0) processor" if ARCH_INTEGRATOR
select CPU_32v5
select CPU_ABRT_EV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM1020 is the 32K cached version of the ARM10 processor,
@@ -217,25 +217,25 @@ config CPU_ARM1020
# ARM1020E - needs validating
config CPU_ARM1020E
bool "Support ARM1020E processor" if ARCH_INTEGRATOR
+ depends on n
select CPU_32v5
select CPU_ABRT_EV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
- depends on n
# ARM1022E
config CPU_ARM1022
bool "Support ARM1022E processor" if ARCH_INTEGRATOR
select CPU_32v5
select CPU_ABRT_EV4T
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU # can probably do better
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM1022E is an implementation of the ARMv5TE architecture
@@ -250,10 +250,10 @@ config CPU_ARM1026
bool "Support ARM1026EJ-S processor" if ARCH_INTEGRATOR
select CPU_32v5
select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU # can probably do better
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture
@@ -268,11 +268,11 @@ config CPU_SA110
select CPU_32v3 if ARCH_RPC
select CPU_32v4 if !ARCH_RPC
select CPU_ABRT_EV4
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WB
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WB if MMU
help
The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and
@@ -288,10 +288,10 @@ config CPU_SA1100
bool
select CPU_32v4
select CPU_ABRT_EV4
- select CPU_PABRT_LEGACY
select CPU_CACHE_V4WB
select CPU_CACHE_VIVT
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WB if MMU
# XScale
@@ -299,9 +299,9 @@ config CPU_XSCALE
bool
select CPU_32v5
select CPU_ABRT_EV5T
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
# XScale Core Version 3
@@ -309,9 +309,9 @@ config CPU_XSC3
bool
select CPU_32v5
select CPU_ABRT_EV5T
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
select IO_36
@@ -320,21 +320,21 @@ config CPU_MOHAWK
bool
select CPU_32v5
select CPU_ABRT_EV5T
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
+ select CPU_COPY_V4WB if MMU
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
- select CPU_COPY_V4WB if MMU
# Feroceon
config CPU_FEROCEON
bool
select CPU_32v5
select CPU_ABRT_EV5T
- select CPU_PABRT_LEGACY
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_FEROCEON if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_FEROCEON if MMU
config CPU_FEROCEON_OLD_ID
@@ -349,20 +349,20 @@ config CPU_FEROCEON_OLD_ID
# Marvell PJ4
config CPU_PJ4
bool
- select CPU_V7
select ARM_THUMBEE
+ select CPU_V7
# ARMv6
config CPU_V6
bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
select CPU_32v6
select CPU_ABRT_EV6
- select CPU_PABRT_V6
select CPU_CACHE_V6
select CPU_CACHE_VIPT
+ select CPU_COPY_V6 if MMU
select CPU_CP15_MMU
select CPU_HAS_ASID if MMU
- select CPU_COPY_V6 if MMU
+ select CPU_PABRT_V6
select CPU_TLB_V6 if MMU
# ARMv6k
@@ -371,12 +371,12 @@ config CPU_V6K
select CPU_32v6
select CPU_32v6K
select CPU_ABRT_EV6
- select CPU_PABRT_V6
select CPU_CACHE_V6
select CPU_CACHE_VIPT
+ select CPU_COPY_V6 if MMU
select CPU_CP15_MMU
select CPU_HAS_ASID if MMU
- select CPU_COPY_V6 if MMU
+ select CPU_PABRT_V6
select CPU_TLB_V6 if MMU
# ARMv7
@@ -385,44 +385,44 @@ config CPU_V7
select CPU_32v6K
select CPU_32v7
select CPU_ABRT_EV7
- select CPU_PABRT_V7
select CPU_CACHE_V7
select CPU_CACHE_VIPT
+ select CPU_COPY_V6 if MMU
select CPU_CP15_MMU
select CPU_HAS_ASID if MMU
- select CPU_COPY_V6 if MMU
+ select CPU_PABRT_V7
select CPU_TLB_V7 if MMU
# Figure out what processor architecture version we should be using.
# This defines the compiler instruction set which depends on the machine type.
config CPU_32v3
bool
- select TLS_REG_EMUL if SMP || !MMU
- select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
select CPU_USE_DOMAINS if MMU
+ select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v4
bool
- select TLS_REG_EMUL if SMP || !MMU
- select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
select CPU_USE_DOMAINS if MMU
+ select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v4T
bool
- select TLS_REG_EMUL if SMP || !MMU
- select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
select CPU_USE_DOMAINS if MMU
+ select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v5
bool
- select TLS_REG_EMUL if SMP || !MMU
- select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
select CPU_USE_DOMAINS if MMU
+ select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v6
bool
- select TLS_REG_EMUL if !CPU_32v6K && !MMU
select CPU_USE_DOMAINS if CPU_V6 && MMU
+ select TLS_REG_EMUL if !CPU_32v6K && !MMU
config CPU_32v6K
bool
@@ -624,11 +624,28 @@ config ARM_THUMBEE
Say Y here if you have a CPU with the ThumbEE extension and code to
make use of it. Say N for code that can run on CPUs without ThumbEE.
+config ARM_VIRT_EXT
+ bool "Native support for the ARM Virtualization Extensions"
+ depends on MMU && CPU_V7
+ help
+ Enable the kernel to make use of the ARM Virtualization
+ Extensions to install hypervisors without run-time firmware
+ assistance.
+
+ A compliant bootloader is required in order to make maximum
+ use of this feature. Refer to Documentation/arm/Booting for
+ details.
+
+ It is safe to enable this option even if the kernel may not be
+ booted in HYP mode, may not have support for the
+ virtualization extensions, or may be booted with a
+ non-compliant bootloader.
+
config SWP_EMULATE
bool "Emulate SWP/SWPB instructions"
depends on !CPU_USE_DOMAINS && CPU_V7
- select HAVE_PROC_CPU if PROC_FS
default y if SMP
+ select HAVE_PROC_CPU if PROC_FS
help
ARMv6 architecture deprecates use of the SWP/SWPB instructions.
ARMv7 multiprocessing extensions introduce the ability to disable
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 9107231aacc5..b9f60ebe3bc4 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -699,7 +699,6 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
unsigned long instr = *pinstr;
u16 tinst1 = (instr >> 16) & 0xffff;
u16 tinst2 = instr & 0xffff;
- poffset->un = 0;
switch (tinst1 & 0xffe0) {
/* A6.3.5 Load/Store multiple */
@@ -854,9 +853,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
break;
case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */
- if (thumb2_32b)
+ if (thumb2_32b) {
+ offset.un = 0;
handler = do_alignment_t32_to_handler(&instr, regs, &offset);
- else
+ } else
handler = do_alignment_ldmstm;
break;
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index 072016371093..e505befe51b5 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -240,6 +240,9 @@ ENTRY(fa_dma_unmap_area)
mov pc, lr
ENDPROC(fa_dma_unmap_area)
+ .globl fa_flush_kern_cache_louis
+ .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all
+
__INITDATA
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2a8e380501e8..8a97e6443c62 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -368,14 +368,18 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
/* l2x0 controller is disabled */
writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
- l2x0_saved_regs.aux_ctrl = aux;
-
l2x0_inv_all();
/* enable L2X0 */
writel_relaxed(1, l2x0_base + L2X0_CTRL);
}
+ /* Re-read it in case some bits are reserved. */
+ aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+
+ /* Save the value for resuming. */
+ l2x0_saved_regs.aux_ctrl = aux;
+
outer_cache.inv_range = l2x0_inv_range;
outer_cache.clean_range = l2x0_clean_range;
outer_cache.flush_range = l2x0_flush_range;
@@ -554,7 +558,7 @@ static const struct of_device_id l2x0_ids[] __initconst = {
int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
{
struct device_node *np;
- struct l2x0_of_data *data;
+ const struct l2x0_of_data *data;
struct resource res;
np = of_find_matching_node(NULL, l2x0_ids);
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
index 23a7643e9a87..1be0f4e5e6eb 100644
--- a/arch/arm/mm/cache-tauros2.c
+++ b/arch/arm/mm/cache-tauros2.c
@@ -15,8 +15,11 @@
*/
#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/cacheflush.h>
#include <asm/cp15.h>
+#include <asm/cputype.h>
#include <asm/hardware/cache-tauros2.h>
@@ -144,25 +147,8 @@ static inline void __init write_extra_features(u32 u)
__asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u));
}
-static void __init disable_l2_prefetch(void)
-{
- u32 u;
-
- /*
- * Read the CPU Extra Features register and verify that the
- * Disable L2 Prefetch bit is set.
- */
- u = read_extra_features();
- if (!(u & 0x01000000)) {
- printk(KERN_INFO "Tauros2: Disabling L2 prefetch.\n");
- write_extra_features(u | 0x01000000);
- }
-}
-
static inline int __init cpuid_scheme(void)
{
- extern int processor_id;
-
return !!((processor_id & 0x000f0000) == 0x000f0000);
}
@@ -189,12 +175,36 @@ static inline void __init write_actlr(u32 actlr)
__asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr));
}
-void __init tauros2_init(void)
+static void enable_extra_feature(unsigned int features)
+{
+ u32 u;
+
+ u = read_extra_features();
+
+ if (features & CACHE_TAUROS2_PREFETCH_ON)
+ u &= ~0x01000000;
+ else
+ u |= 0x01000000;
+ printk(KERN_INFO "Tauros2: %s L2 prefetch.\n",
+ (features & CACHE_TAUROS2_PREFETCH_ON)
+ ? "Enabling" : "Disabling");
+
+ if (features & CACHE_TAUROS2_LINEFILL_BURST8)
+ u |= 0x00100000;
+ else
+ u &= ~0x00100000;
+ printk(KERN_INFO "Tauros2: %s line fill burt8.\n",
+ (features & CACHE_TAUROS2_LINEFILL_BURST8)
+ ? "Enabling" : "Disabling");
+
+ write_extra_features(u);
+}
+
+static void __init tauros2_internal_init(unsigned int features)
{
- extern int processor_id;
- char *mode;
+ char *mode = NULL;
- disable_l2_prefetch();
+ enable_extra_feature(features);
#ifdef CONFIG_CPU_32v5
if ((processor_id & 0xff0f0000) == 0x56050000) {
@@ -286,3 +296,34 @@ void __init tauros2_init(void)
printk(KERN_INFO "Tauros2: L2 cache support initialised "
"in %s mode.\n", mode);
}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tauros2_ids[] __initconst = {
+ { .compatible = "marvell,tauros2-cache"},
+ {}
+};
+#endif
+
+void __init tauros2_init(unsigned int features)
+{
+#ifdef CONFIG_OF
+ struct device_node *node;
+ int ret;
+ unsigned int f;
+
+ node = of_find_matching_node(NULL, tauros2_ids);
+ if (!node) {
+ pr_info("Not found marvell,tauros2-cache, disable it\n");
+ return;
+ }
+
+ ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f);
+ if (ret) {
+ pr_info("Not found marvell,tauros-cache-features property, "
+ "disable extra features\n");
+ features = 0;
+ } else
+ features = f;
+#endif
+ tauros2_internal_init(features);
+}
diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S
index 52e35f32eefb..8a3fadece8d3 100644
--- a/arch/arm/mm/cache-v3.S
+++ b/arch/arm/mm/cache-v3.S
@@ -128,6 +128,9 @@ ENTRY(v3_dma_map_area)
ENDPROC(v3_dma_unmap_area)
ENDPROC(v3_dma_map_area)
+ .globl v3_flush_kern_cache_louis
+ .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all
+
__INITDATA
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index 022135d2b7e4..43e5d77be677 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -140,6 +140,9 @@ ENTRY(v4_dma_map_area)
ENDPROC(v4_dma_unmap_area)
ENDPROC(v4_dma_map_area)
+ .globl v4_flush_kern_cache_louis
+ .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all
+
__INITDATA
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index 8f1eeae340c8..cd4945321407 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -251,6 +251,9 @@ ENTRY(v4wb_dma_unmap_area)
mov pc, lr
ENDPROC(v4wb_dma_unmap_area)
+ .globl v4wb_flush_kern_cache_louis
+ .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all
+
__INITDATA
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index b34a5f908a82..11e5e5838bc5 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -196,6 +196,9 @@ ENTRY(v4wt_dma_map_area)
ENDPROC(v4wt_dma_unmap_area)
ENDPROC(v4wt_dma_map_area)
+ .globl v4wt_flush_kern_cache_louis
+ .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
+
__INITDATA
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 4b10760c56d6..d8fd4d4bd3d4 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -326,6 +326,9 @@ ENTRY(v6_dma_unmap_area)
mov pc, lr
ENDPROC(v6_dma_unmap_area)
+ .globl v6_flush_kern_cache_louis
+ .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all
+
__INITDATA
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 39e3fb3db801..cd956647c21a 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -33,6 +33,24 @@ ENTRY(v7_flush_icache_all)
mov pc, lr
ENDPROC(v7_flush_icache_all)
+ /*
+ * v7_flush_dcache_louis()
+ *
+ * Flush the D-cache up to the Level of Unification Inner Shareable
+ *
+ * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ */
+
+ENTRY(v7_flush_dcache_louis)
+ dmb @ ensure ordering with previous memory accesses
+ mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr
+ ands r3, r0, #0xe00000 @ extract LoUIS from clidr
+ mov r3, r3, lsr #20 @ r3 = LoUIS * 2
+ moveq pc, lr @ return if level == 0
+ mov r10, #0 @ r10 (starting level) = 0
+ b flush_levels @ start flushing cache levels
+ENDPROC(v7_flush_dcache_louis)
+
/*
* v7_flush_dcache_all()
*
@@ -49,7 +67,7 @@ ENTRY(v7_flush_dcache_all)
mov r3, r3, lsr #23 @ left align loc bit field
beq finished @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
-loop1:
+flush_levels:
add r2, r10, r10, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
and r1, r1, #7 @ mask of the bits for current cache only
@@ -71,9 +89,9 @@ loop1:
clz r5, r4 @ find bit position of way size increment
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
-loop2:
+loop1:
mov r9, r4 @ create working copy of max way size
-loop3:
+loop2:
ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
THUMB( lsl r6, r9, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
@@ -82,13 +100,13 @@ loop3:
THUMB( orr r11, r11, r6 ) @ factor index number into r11
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
subs r9, r9, #1 @ decrement the way
- bge loop3
- subs r7, r7, #1 @ decrement the index
bge loop2
+ subs r7, r7, #1 @ decrement the index
+ bge loop1
skip:
add r10, r10, #2 @ increment cache number
cmp r3, r10
- bgt loop1
+ bgt flush_levels
finished:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
@@ -120,6 +138,24 @@ ENTRY(v7_flush_kern_cache_all)
mov pc, lr
ENDPROC(v7_flush_kern_cache_all)
+ /*
+ * v7_flush_kern_cache_louis(void)
+ *
+ * Flush the data cache up to Level of Unification Inner Shareable.
+ * Invalidate the I-cache to the point of unification.
+ */
+ENTRY(v7_flush_kern_cache_louis)
+ ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( stmfd sp!, {r4-r7, r9-r11, lr} )
+ bl v7_flush_dcache_louis
+ mov r0, #0
+ ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
+ ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
+ ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
+ mov pc, lr
+ENDPROC(v7_flush_kern_cache_louis)
+
/*
* v7_flush_cache_all()
*
@@ -211,6 +247,9 @@ ENTRY(v7_coherent_user_range)
* isn't mapped, fail with -EFAULT.
*/
9001:
+#ifdef CONFIG_ARM_ERRATA_775420
+ dsb
+#endif
mov r0, #-EFAULT
mov pc, lr
UNWIND(.fnend )
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 119bc52ab93e..4e07eec1270d 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -63,10 +63,11 @@ static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd,
pid = task_pid_nr(thread->task) << ASID_BITS;
asm volatile(
" mrc p15, 0, %0, c13, c0, 1\n"
- " bfi %1, %0, #0, %2\n"
- " mcr p15, 0, %1, c13, c0, 1\n"
+ " and %0, %0, %2\n"
+ " orr %0, %0, %1\n"
+ " mcr p15, 0, %0, c13, c0, 1\n"
: "=r" (contextidr), "+r" (pid)
- : "I" (ASID_BITS));
+ : "I" (~ASID_MASK));
isb();
return NOTIFY_OK;
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4e7d1182e8a3..477a2d23ddf1 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(page, offset, size, dir);
return pfn_to_dma(dev, page_to_pfn(page)) + offset;
}
+static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ return pfn_to_dma(dev, page_to_pfn(page)) + offset;
+}
+
/**
* arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
handle & ~PAGE_MASK, size, dir);
}
@@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev,
{
unsigned int offset = handle & (PAGE_SIZE - 1);
struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
- if (!arch_is_coherent())
- __dma_page_dev_to_cpu(page, offset, size, dir);
+ __dma_page_dev_to_cpu(page, offset, size, dir);
}
static void arm_dma_sync_single_for_device(struct device *dev,
@@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev,
{
unsigned int offset = handle & (PAGE_SIZE - 1);
struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
- if (!arch_is_coherent())
- __dma_page_cpu_to_dev(page, offset, size, dir);
+ __dma_page_cpu_to_dev(page, offset, size, dir);
}
static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
@@ -138,6 +143,22 @@ struct dma_map_ops arm_dma_ops = {
};
EXPORT_SYMBOL(arm_dma_ops);
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs);
+
+struct dma_map_ops arm_coherent_dma_ops = {
+ .alloc = arm_coherent_dma_alloc,
+ .free = arm_coherent_dma_free,
+ .mmap = arm_dma_mmap,
+ .get_sgtable = arm_dma_get_sgtable,
+ .map_page = arm_coherent_dma_map_page,
+ .map_sg = arm_dma_map_sg,
+ .set_dma_mask = arm_dma_set_mask,
+};
+EXPORT_SYMBOL(arm_coherent_dma_ops);
+
static u64 get_coherent_dma_mask(struct device *dev)
{
u64 mask = (u64)arm_dma_limit;
@@ -267,17 +288,19 @@ static void __dma_free_remap(void *cpu_addr, size_t size)
vunmap(cpu_addr);
}
+#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
+
struct dma_pool {
size_t size;
spinlock_t lock;
unsigned long *bitmap;
unsigned long nr_pages;
void *vaddr;
- struct page *page;
+ struct page **pages;
};
static struct dma_pool atomic_pool = {
- .size = SZ_256K,
+ .size = DEFAULT_DMA_COHERENT_POOL_SIZE,
};
static int __init early_coherent_pool(char *p)
@@ -287,6 +310,21 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
+void __init init_dma_coherent_pool_size(unsigned long size)
+{
+ /*
+ * Catch any attempt to set the pool size too late.
+ */
+ BUG_ON(atomic_pool.vaddr);
+
+ /*
+ * Set architecture specific coherent pool size only if
+ * it has not been changed by kernel command line parameter.
+ */
+ if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE)
+ atomic_pool.size = size;
+}
+
/*
* Initialise the coherent pool for atomic allocations.
*/
@@ -297,6 +335,7 @@ static int __init atomic_pool_init(void)
unsigned long nr_pages = pool->size >> PAGE_SHIFT;
unsigned long *bitmap;
struct page *page;
+ struct page **pages;
void *ptr;
int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
@@ -304,21 +343,33 @@ static int __init atomic_pool_init(void)
if (!bitmap)
goto no_bitmap;
+ pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto no_pages;
+
if (IS_ENABLED(CONFIG_CMA))
ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
else
ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
&page, NULL);
if (ptr) {
+ int i;
+
+ for (i = 0; i < nr_pages; i++)
+ pages[i] = page + i;
+
spin_lock_init(&pool->lock);
pool->vaddr = ptr;
- pool->page = page;
+ pool->pages = pages;
pool->bitmap = bitmap;
pool->nr_pages = nr_pages;
pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
(unsigned)pool->size / 1024);
return 0;
}
+
+ kfree(pages);
+no_pages:
kfree(bitmap);
no_bitmap:
pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
@@ -443,27 +494,45 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
if (pageno < pool->nr_pages) {
bitmap_set(pool->bitmap, pageno, count);
ptr = pool->vaddr + PAGE_SIZE * pageno;
- *ret_page = pool->page + pageno;
+ *ret_page = pool->pages[pageno];
+ } else {
+ pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+ "Please increase it with coherent_pool= kernel parameter!\n",
+ (unsigned)pool->size / 1024);
}
spin_unlock_irqrestore(&pool->lock, flags);
return ptr;
}
+static bool __in_atomic_pool(void *start, size_t size)
+{
+ struct dma_pool *pool = &atomic_pool;
+ void *end = start + size;
+ void *pool_start = pool->vaddr;
+ void *pool_end = pool->vaddr + pool->size;
+
+ if (start < pool_start || start >= pool_end)
+ return false;
+
+ if (end <= pool_end)
+ return true;
+
+ WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
+ start, end - 1, pool_start, pool_end - 1);
+
+ return false;
+}
+
static int __free_from_pool(void *start, size_t size)
{
struct dma_pool *pool = &atomic_pool;
unsigned long pageno, count;
unsigned long flags;
- if (start < pool->vaddr || start > pool->vaddr + pool->size)
+ if (!__in_atomic_pool(start, size))
return 0;
- if (start + size > pool->vaddr + pool->size) {
- WARN(1, "freeing wrong coherent size from pool\n");
- return 0;
- }
-
pageno = (start - pool->vaddr) >> PAGE_SHIFT;
count = size >> PAGE_SHIFT;
@@ -538,7 +607,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
- gfp_t gfp, pgprot_t prot, const void *caller)
+ gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
{
u64 mask = get_coherent_dma_mask(dev);
struct page *page;
@@ -571,7 +640,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
*handle = DMA_ERROR_CODE;
size = PAGE_ALIGN(size);
- if (arch_is_coherent() || nommu())
+ if (is_coherent || nommu())
addr = __alloc_simple_buffer(dev, size, gfp, &page);
else if (gfp & GFP_ATOMIC)
addr = __alloc_from_pool(size, &page);
@@ -599,7 +668,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (dma_alloc_from_coherent(dev, size, handle, &memory))
return memory;
- return __dma_alloc(dev, size, handle, gfp, prot,
+ return __dma_alloc(dev, size, handle, gfp, prot, false,
+ __builtin_return_address(0));
+}
+
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+{
+ pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
+ void *memory;
+
+ if (dma_alloc_from_coherent(dev, size, handle, &memory))
+ return memory;
+
+ return __dma_alloc(dev, size, handle, gfp, prot, true,
__builtin_return_address(0));
}
@@ -636,8 +718,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
/*
* Free a buffer as defined by the above mapping.
*/
-void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t handle, struct dma_attrs *attrs)
+static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs,
+ bool is_coherent)
{
struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
@@ -646,7 +729,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
size = PAGE_ALIGN(size);
- if (arch_is_coherent() || nommu()) {
+ if (is_coherent || nommu()) {
__dma_free_buffer(page, size);
} else if (__free_from_pool(cpu_addr, size)) {
return;
@@ -662,6 +745,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
}
}
+void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs)
+{
+ __arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
+}
+
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs)
+{
+ __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
+}
+
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
struct dma_attrs *attrs)
@@ -964,11 +1059,12 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
if (!pages[i])
goto error;
- if (order)
+ if (order) {
split_page(pages[i], order);
- j = 1 << order;
- while (--j)
- pages[i + j] = pages[i] + j;
+ j = 1 << order;
+ while (--j)
+ pages[i + j] = pages[i] + j;
+ }
__dma_clear_buffer(pages[i], PAGE_SIZE << order);
i += 1 << order;
@@ -1090,10 +1186,22 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
return 0;
}
+static struct page **__atomic_get_pages(void *addr)
+{
+ struct dma_pool *pool = &atomic_pool;
+ struct page **pages = pool->pages;
+ int offs = (addr - pool->vaddr) >> PAGE_SHIFT;
+
+ return pages + offs;
+}
+
static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
{
struct vm_struct *area;
+ if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
+ return __atomic_get_pages(cpu_addr);
+
if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
return cpu_addr;
@@ -1103,6 +1211,34 @@ static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
return NULL;
}
+static void *__iommu_alloc_atomic(struct device *dev, size_t size,
+ dma_addr_t *handle)
+{
+ struct page *page;
+ void *addr;
+
+ addr = __alloc_from_pool(size, &page);
+ if (!addr)
+ return NULL;
+
+ *handle = __iommu_create_mapping(dev, &page, size);
+ if (*handle == DMA_ERROR_CODE)
+ goto err_mapping;
+
+ return addr;
+
+err_mapping:
+ __free_from_pool(addr, size);
+ return NULL;
+}
+
+static void __iommu_free_atomic(struct device *dev, struct page **pages,
+ dma_addr_t handle, size_t size)
+{
+ __iommu_remove_mapping(dev, handle, size);
+ __free_from_pool(page_address(pages[0]), size);
+}
+
static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
{
@@ -1113,6 +1249,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
*handle = DMA_ERROR_CODE;
size = PAGE_ALIGN(size);
+ if (gfp & GFP_ATOMIC)
+ return __iommu_alloc_atomic(dev, size, handle);
+
pages = __iommu_alloc_buffer(dev, size, gfp);
if (!pages)
return NULL;
@@ -1179,6 +1318,11 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
}
+ if (__in_atomic_pool(cpu_addr, size)) {
+ __iommu_free_atomic(dev, pages, handle, size);
+ return;
+ }
+
if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
unmap_kernel_range((unsigned long)cpu_addr, size);
vunmap(cpu_addr);
@@ -1207,7 +1351,8 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
*/
static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
size_t size, dma_addr_t *handle,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, struct dma_attrs *attrs,
+ bool is_coherent)
{
struct dma_iommu_mapping *mapping = dev->archdata.mapping;
dma_addr_t iova, iova_base;
@@ -1226,8 +1371,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
phys_addr_t phys = page_to_phys(sg_page(s));
unsigned int len = PAGE_ALIGN(s->offset + s->length);
- if (!arch_is_coherent() &&
- !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ if (!is_coherent &&
+ !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
ret = iommu_map(mapping->domain, iova, phys, len, 0);
@@ -1245,20 +1390,9 @@ fail:
return ret;
}
-/**
- * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
- * @dev: valid struct device pointer
- * @sg: list of buffers
- * @nents: number of buffers to map
- * @dir: DMA transfer direction
- *
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * The scatter gather list elements are merged together (if possible) and
- * tagged with the appropriate dma address and length. They are obtained via
- * sg_dma_{address,length}.
- */
-int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs,
+ bool is_coherent)
{
struct scatterlist *s = sg, *dma = sg, *start = sg;
int i, count = 0;
@@ -1274,7 +1408,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
if (__map_sg_chunk(dev, start, size, &dma->dma_address,
- dir, attrs) < 0)
+ dir, attrs, is_coherent) < 0)
goto bad_mapping;
dma->dma_address += offset;
@@ -1287,7 +1421,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
}
size += s->length;
}
- if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0)
+ if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
+ is_coherent) < 0)
goto bad_mapping;
dma->dma_address += offset;
@@ -1302,17 +1437,44 @@ bad_mapping:
}
/**
- * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA
* @dev: valid struct device pointer
* @sg: list of buffers
- * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
- * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
*
- * Unmap a set of streaming mode DMA translations. Again, CPU access
- * rules concerning calls here are the same as for dma_unmap_single().
+ * Map a set of i/o coherent buffers described by scatterlist in streaming
+ * mode for DMA. The scatter gather list elements are merged together (if
+ * possible) and tagged with the appropriate dma address and length. They are
+ * obtained via sg_dma_{address,length}.
*/
-void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ return __iommu_map_sg(dev, sg, nents, dir, attrs, true);
+}
+
+/**
+ * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * The scatter gather list elements are merged together (if possible) and
+ * tagged with the appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}.
+ */
+int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ return __iommu_map_sg(dev, sg, nents, dir, attrs, false);
+}
+
+static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, struct dma_attrs *attrs,
+ bool is_coherent)
{
struct scatterlist *s;
int i;
@@ -1321,7 +1483,7 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
if (sg_dma_len(s))
__iommu_remove_mapping(dev, sg_dma_address(s),
sg_dma_len(s));
- if (!arch_is_coherent() &&
+ if (!is_coherent &&
!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(sg_page(s), s->offset,
s->length, dir);
@@ -1329,6 +1491,38 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
}
/**
+ * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations. Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ __iommu_unmap_sg(dev, sg, nents, dir, attrs, true);
+}
+
+/**
+ * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations. Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ __iommu_unmap_sg(dev, sg, nents, dir, attrs, false);
+}
+
+/**
* arm_iommu_sync_sg_for_cpu
* @dev: valid struct device pointer
* @sg: list of buffers
@@ -1342,8 +1536,7 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int i;
for_each_sg(sg, s, nents, i)
- if (!arch_is_coherent())
- __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
+ __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
}
@@ -1361,22 +1554,21 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int i;
for_each_sg(sg, s, nents, i)
- if (!arch_is_coherent())
- __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+ __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
}
/**
- * arm_iommu_map_page
+ * arm_coherent_iommu_map_page
* @dev: valid struct device pointer
* @page: page that buffer resides in
* @offset: offset into page for start of buffer
* @size: size of buffer to map
* @dir: DMA transfer direction
*
- * IOMMU aware version of arm_dma_map_page()
+ * Coherent IOMMU aware version of arm_dma_map_page()
*/
-static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
+static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
@@ -1384,9 +1576,6 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
dma_addr_t dma_addr;
int ret, len = PAGE_ALIGN(size + offset);
- if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
- __dma_page_cpu_to_dev(page, offset, size, dir);
-
dma_addr = __alloc_iova(mapping, len);
if (dma_addr == DMA_ERROR_CODE)
return dma_addr;
@@ -1402,6 +1591,51 @@ fail:
}
/**
+ * arm_iommu_map_page
+ * @dev: valid struct device pointer
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * IOMMU aware version of arm_dma_map_page()
+ */
+static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ __dma_page_cpu_to_dev(page, offset, size, dir);
+
+ return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs);
+}
+
+/**
+ * arm_coherent_iommu_unmap_page
+ * @dev: valid struct device pointer
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_page)
+ * @dir: DMA transfer direction (same as passed to dma_map_page)
+ *
+ * Coherent IOMMU aware version of arm_dma_unmap_page()
+ */
+static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t iova = handle & PAGE_MASK;
+ int offset = handle & ~PAGE_MASK;
+ int len = PAGE_ALIGN(size + offset);
+
+ if (!iova)
+ return;
+
+ iommu_unmap(mapping->domain, iova, len);
+ __free_iova(mapping, iova, len);
+}
+
+/**
* arm_iommu_unmap_page
* @dev: valid struct device pointer
* @handle: DMA address of buffer
@@ -1423,7 +1657,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
if (!iova)
return;
- if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(page, offset, size, dir);
iommu_unmap(mapping->domain, iova, len);
@@ -1441,8 +1675,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev,
if (!iova)
return;
- if (!arch_is_coherent())
- __dma_page_dev_to_cpu(page, offset, size, dir);
+ __dma_page_dev_to_cpu(page, offset, size, dir);
}
static void arm_iommu_sync_single_for_device(struct device *dev,
@@ -1476,6 +1709,19 @@ struct dma_map_ops iommu_ops = {
.sync_sg_for_device = arm_iommu_sync_sg_for_device,
};
+struct dma_map_ops iommu_coherent_ops = {
+ .alloc = arm_iommu_alloc_attrs,
+ .free = arm_iommu_free_attrs,
+ .mmap = arm_iommu_mmap_attrs,
+ .get_sgtable = arm_iommu_get_sgtable,
+
+ .map_page = arm_coherent_iommu_map_page,
+ .unmap_page = arm_coherent_iommu_unmap_page,
+
+ .map_sg = arm_coherent_iommu_map_sg,
+ .unmap_sg = arm_coherent_iommu_unmap_sg,
+};
+
/**
* arm_iommu_create_mapping
* @bus: pointer to the bus holding the client device (for IOMMU calls)
@@ -1569,7 +1815,7 @@ int arm_iommu_attach_device(struct device *dev,
dev->archdata.mapping = mapping;
set_dma_ops(dev, &iommu_ops);
- pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev));
+ pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
return 0;
}
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 7599e2625c7d..2a5907b5c8d2 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
- struct prio_tree_iter iter;
unsigned long offset;
pgoff_t pgoff;
int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
* cache coherency.
*/
flush_dcache_mmap_lock(mapping);
- vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
/*
* If this VMA is not in our MM, we can ignore it.
* Note that we intentionally mask out the VMA
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c3bd83450227..5dbf13f954f6 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -336,6 +336,7 @@ retry:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 40ca11ed6e5f..1c8f7f564175 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
{
struct mm_struct *mm = current->active_mm;
struct vm_area_struct *mpnt;
- struct prio_tree_iter iter;
pgoff_t pgoff;
/*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
flush_dcache_mmap_lock(mapping);
- vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
unsigned long offset;
/*
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9aec41fa80ae..ad722f1208a5 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -324,7 +324,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
BUG_ON(!arm_memblock_steal_permitted);
- phys = memblock_alloc(size, align);
+ phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
memblock_free(phys, size);
memblock_remove(phys, size);
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 566750fa57d4..5dcc2fd46c46 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -36,6 +36,7 @@
#include <asm/system_info.h>
#include <asm/mach/map.h>
+#include <asm/mach/pci.h>
#include "mm.h"
int ioremap_page(unsigned long virt, unsigned long phys,
@@ -247,6 +248,7 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
if (!area)
return NULL;
addr = (unsigned long)area->addr;
+ area->phys_addr = __pfn_to_phys(pfn);
#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
if (DOMAIN_IO == 0 &&
@@ -383,3 +385,16 @@ void __arm_iounmap(volatile void __iomem *io_addr)
arch_iounmap(io_addr);
}
EXPORT_SYMBOL(__arm_iounmap);
+
+#ifdef CONFIG_PCI
+int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
+{
+ BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT);
+
+ return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
+ PCI_IO_VIRT_BASE + offset + SZ_64K,
+ phys_addr,
+ __pgprot(get_mem_type(MT_DEVICE)->prot_pte));
+}
+EXPORT_SYMBOL_GPL(pci_ioremap_io);
+#endif
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 6776160618ef..a8ee92da3544 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -55,6 +55,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
/* permanent static mappings from iotable_init() */
#define VM_ARM_STATIC_MAPPING 0x40000000
+/* empty mapping */
+#define VM_ARM_EMPTY_MAPPING 0x20000000
+
/* mapping type (attributes) for permanent static mappings */
#define VM_ARM_MTYPE(mt) ((mt) << 20)
#define VM_ARM_MTYPE_MASK (0x1f << 20)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4c2d0451e84a..941dfb9e9a78 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -31,6 +31,7 @@
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
+#include <asm/mach/pci.h>
#include "mm.h"
@@ -216,7 +217,7 @@ static struct mem_type mem_types[] = {
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
.domain = DOMAIN_IO,
- },
+ },
[MT_DEVICE_WC] = { /* ioremap_wc */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
.prot_l1 = PMD_TYPE_TABLE,
@@ -422,17 +423,6 @@ static void __init build_mem_type_table(void)
vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
/*
- * Enable CPU-specific coherency if supported.
- * (Only available on XSC3 at the moment.)
- */
- if (arch_is_coherent() && cpu_is_xsc3()) {
- mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
- mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
- mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
- mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
- mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
- }
- /*
* ARMv6 and above have extended page tables.
*/
if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
@@ -777,14 +767,27 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
create_mapping(md);
vm->addr = (void *)(md->virtual & PAGE_MASK);
vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
- vm->phys_addr = __pfn_to_phys(md->pfn);
- vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
+ vm->phys_addr = __pfn_to_phys(md->pfn);
+ vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
vm->flags |= VM_ARM_MTYPE(md->type);
vm->caller = iotable_init;
vm_area_add_early(vm++);
}
}
+void __init vm_reserve_area_early(unsigned long addr, unsigned long size,
+ void *caller)
+{
+ struct vm_struct *vm;
+
+ vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
+ vm->addr = (void *)addr;
+ vm->size = size;
+ vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING;
+ vm->caller = caller;
+ vm_area_add_early(vm);
+}
+
#ifndef CONFIG_ARM_LPAE
/*
@@ -802,14 +805,7 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
static void __init pmd_empty_section_gap(unsigned long addr)
{
- struct vm_struct *vm;
-
- vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
- vm->addr = (void *)addr;
- vm->size = SECTION_SIZE;
- vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
- vm->caller = pmd_empty_section_gap;
- vm_area_add_early(vm);
+ vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap);
}
static void __init fill_pmd_gaps(void)
@@ -820,7 +816,7 @@ static void __init fill_pmd_gaps(void)
/* we're still single threaded hence no lock needed here */
for (vm = vmlist; vm; vm = vm->next) {
- if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+ if (!(vm->flags & (VM_ARM_STATIC_MAPPING | VM_ARM_EMPTY_MAPPING)))
continue;
addr = (unsigned long)vm->addr;
if (addr < next)
@@ -858,6 +854,28 @@ static void __init fill_pmd_gaps(void)
#define fill_pmd_gaps() do { } while (0)
#endif
+#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H)
+static void __init pci_reserve_io(void)
+{
+ struct vm_struct *vm;
+ unsigned long addr;
+
+ /* we're still single threaded hence no lock needed here */
+ for (vm = vmlist; vm; vm = vm->next) {
+ if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+ continue;
+ addr = (unsigned long)vm->addr;
+ addr &= ~(SZ_2M - 1);
+ if (addr == PCI_IO_VIRT_BASE)
+ return;
+
+ }
+ vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io);
+}
+#else
+#define pci_reserve_io() do { } while (0)
+#endif
+
static void * __initdata vmalloc_min =
(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
@@ -961,8 +979,8 @@ void __init sanity_check_meminfo(void)
* Check whether this memory bank would partially overlap
* the vmalloc area.
*/
- if (__va(bank->start + bank->size) > vmalloc_min ||
- __va(bank->start + bank->size) < __va(bank->start)) {
+ if (__va(bank->start + bank->size - 1) >= vmalloc_min ||
+ __va(bank->start + bank->size - 1) <= __va(bank->start)) {
unsigned long newsize = vmalloc_min - __va(bank->start);
printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
"to -%.8llx (vmalloc region overlap).\n",
@@ -1141,6 +1159,9 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
mdesc->map_io();
fill_pmd_gaps();
+ /* Reserve fixed i/o space in VMALLOC region */
+ pci_reserve_io();
+
/*
* Finally flush the caches and tlb to ensure that we're in a
* consistent state wrt the writebuffer. This also ensures that
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 0650bb87c1e3..2bb61e703d6c 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -368,6 +368,9 @@ ENTRY(arm1020_dma_unmap_area)
mov pc, lr
ENDPROC(arm1020_dma_unmap_area)
+ .globl arm1020_flush_kern_cache_louis
+ .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm1020
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 4188478325a6..8f96aa40f510 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -354,6 +354,9 @@ ENTRY(arm1020e_dma_unmap_area)
mov pc, lr
ENDPROC(arm1020e_dma_unmap_area)
+ .globl arm1020e_flush_kern_cache_louis
+ .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm1020e
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 33c68824bff0..8ebe4a469a22 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -343,6 +343,9 @@ ENTRY(arm1022_dma_unmap_area)
mov pc, lr
ENDPROC(arm1022_dma_unmap_area)
+ .globl arm1022_flush_kern_cache_louis
+ .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm1022
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index fbc1d5fc24dc..093fc7e520c3 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -337,6 +337,9 @@ ENTRY(arm1026_dma_unmap_area)
mov pc, lr
ENDPROC(arm1026_dma_unmap_area)
+ .globl arm1026_flush_kern_cache_louis
+ .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm1026
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 1a8c138eb897..2c3b9421ab5e 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -319,6 +319,9 @@ ENTRY(arm920_dma_unmap_area)
mov pc, lr
ENDPROC(arm920_dma_unmap_area)
+ .globl arm920_flush_kern_cache_louis
+ .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm920
#endif
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 4c44d7e1c3ca..4464c49d7449 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -321,6 +321,9 @@ ENTRY(arm922_dma_unmap_area)
mov pc, lr
ENDPROC(arm922_dma_unmap_area)
+ .globl arm922_flush_kern_cache_louis
+ .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm922
#endif
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ec5b1180994f..281eb9b9c1d6 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -376,6 +376,9 @@ ENTRY(arm925_dma_unmap_area)
mov pc, lr
ENDPROC(arm925_dma_unmap_area)
+ .globl arm925_flush_kern_cache_louis
+ .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm925
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index c31e62c606c0..f1803f7e2972 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -339,6 +339,9 @@ ENTRY(arm926_dma_unmap_area)
mov pc, lr
ENDPROC(arm926_dma_unmap_area)
+ .globl arm926_flush_kern_cache_louis
+ .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm926
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index a613a7dd7146..8da189d4a402 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -267,6 +267,9 @@ ENTRY(arm940_dma_unmap_area)
mov pc, lr
ENDPROC(arm940_dma_unmap_area)
+ .globl arm940_flush_kern_cache_louis
+ .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm940
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 9f4f2999fdd0..f666cf34075a 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -310,6 +310,9 @@ ENTRY(arm946_dma_unmap_area)
mov pc, lr
ENDPROC(arm946_dma_unmap_area)
+ .globl arm946_flush_kern_cache_louis
+ .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions arm946
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 23a8e4c7f2bd..4106b09e0c29 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -415,6 +415,9 @@ ENTRY(feroceon_dma_unmap_area)
mov pc, lr
ENDPROC(feroceon_dma_unmap_area)
+ .globl feroceon_flush_kern_cache_louis
+ .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions feroceon
@@ -431,6 +434,7 @@ ENDPROC(feroceon_dma_unmap_area)
range_alias flush_icache_all
range_alias flush_user_cache_all
range_alias flush_kern_cache_all
+ range_alias flush_kern_cache_louis
range_alias flush_user_cache_range
range_alias coherent_kern_range
range_alias coherent_user_range
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 2d8ff3ad86d3..b29a2265af01 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -299,6 +299,7 @@ ENTRY(\name\()_processor_functions)
ENTRY(\name\()_cache_fns)
.long \name\()_flush_icache_all
.long \name\()_flush_kern_cache_all
+ .long \name\()_flush_kern_cache_louis
.long \name\()_flush_user_cache_all
.long \name\()_flush_user_cache_range
.long \name\()_coherent_kern_range
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index fbb2124a547d..82f9cdc751d6 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -303,6 +303,9 @@ ENTRY(mohawk_dma_unmap_area)
mov pc, lr
ENDPROC(mohawk_dma_unmap_area)
+ .globl mohawk_flush_kern_cache_louis
+ .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions mohawk
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index c2e2b66f72b5..846d279f3176 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -172,7 +172,7 @@ __v7_ca15mp_setup:
__v7_setup:
adr r12, __v7_setup_stack @ the local stack
stmia r12, {r0-r5, r7, r9, r11, lr}
- bl v7_flush_dcache_all
+ bl v7_flush_dcache_louis
ldmia r12, {r0-r5, r7, r9, r11, lr}
mrc p15, 0, r0, c0, c0, 0 @ read main ID register
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index b0d57869da2d..eb93d6487f35 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -337,6 +337,9 @@ ENTRY(xsc3_dma_unmap_area)
mov pc, lr
ENDPROC(xsc3_dma_unmap_area)
+ .globl xsc3_flush_kern_cache_louis
+ .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions xsc3
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 4ffebaa595ee..25510361aa18 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -410,6 +410,9 @@ ENTRY(xscale_dma_unmap_area)
mov pc, lr
ENDPROC(xscale_dma_unmap_area)
+ .globl xscale_flush_kern_cache_louis
+ .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all
+
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions xscale
@@ -439,6 +442,7 @@ ENDPROC(xscale_dma_unmap_area)
a0_alias flush_icache_all
a0_alias flush_user_cache_all
a0_alias flush_kern_cache_all
+ a0_alias flush_kern_cache_louis
a0_alias flush_user_cache_range
a0_alias coherent_kern_range
a0_alias coherent_user_range