diff options
Diffstat (limited to 'arch/arm/mm')
36 files changed, 636 insertions, 187 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 101b9681c08c..94186b6c685f 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -10,8 +10,8 @@ config CPU_ARM7TDMI depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4 + select CPU_PABRT_LEGACY help A 32-bit RISC microprocessor based on the ARM7 processor core which has no memory control unit and cache. @@ -24,11 +24,11 @@ config CPU_ARM720T bool "Support ARM720T processor" if ARCH_INTEGRATOR select CPU_32v4T select CPU_ABRT_LV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4 select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WT if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WT if MMU help A 32-bit RISC processor with 8kByte Cache, Write Buffer and @@ -43,9 +43,9 @@ config CPU_ARM740T depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V3 # although the core is v4t select CPU_CP15_MPU + select CPU_PABRT_LEGACY help A 32-bit RISC processor with 8KB cache or 4KB variants, write buffer and MPU(Protection Unit) built around @@ -60,8 +60,8 @@ config CPU_ARM9TDMI depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU - select CPU_PABRT_LEGACY select CPU_CACHE_V4 + select CPU_PABRT_LEGACY help A 32-bit RISC microprocessor based on the ARM9 processor core which has no memory control unit and cache. @@ -74,11 +74,11 @@ config CPU_ARM920T bool "Support ARM920T processor" if ARCH_INTEGRATOR select CPU_32v4T select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM920T is licensed to be produced by numerous vendors, @@ -92,11 +92,11 @@ config CPU_ARM922T bool "Support ARM922T processor" if ARCH_INTEGRATOR select CPU_32v4T select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM922T is a version of the ARM920T, but with smaller @@ -111,11 +111,11 @@ config CPU_ARM925T bool "Support ARM925T processor" if ARCH_OMAP1 select CPU_32v4T select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM925T is a mix between the ARM920T and ARM926T, but with @@ -130,10 +130,10 @@ config CPU_ARM926T bool "Support ARM926T processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB select CPU_32v5 select CPU_ABRT_EV5TJ - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help This is a variant of the ARM920. It has slightly different @@ -148,11 +148,11 @@ config CPU_FA526 bool select CPU_32v4 select CPU_ABRT_EV4 - select CPU_PABRT_LEGACY - select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_CACHE_FA + select CPU_CACHE_VIVT select CPU_COPY_FA if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_FA if MMU help The FA526 is a version of the ARMv4 compatible processor with @@ -167,9 +167,9 @@ config CPU_ARM940T depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MPU + select CPU_PABRT_LEGACY help ARM940T is a member of the ARM9TDMI family of general- purpose microprocessors with MPU and separate 4KB @@ -185,9 +185,9 @@ config CPU_ARM946E depends on !MMU select CPU_32v5 select CPU_ABRT_NOMMU - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MPU + select CPU_PABRT_LEGACY help ARM946E-S is a member of the ARM9E-S family of high- performance, 32-bit system-on-chip processor solutions. @@ -201,11 +201,11 @@ config CPU_ARM1020 bool "Support ARM1020T (rev 0) processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM1020 is the 32K cached version of the ARM10 processor, @@ -217,25 +217,25 @@ config CPU_ARM1020 # ARM1020E - needs validating config CPU_ARM1020E bool "Support ARM1020E processor" if ARCH_INTEGRATOR + depends on n select CPU_32v5 select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU - depends on n # ARM1022E config CPU_ARM1022 bool "Support ARM1022E processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM1022E is an implementation of the ARMv5TE architecture @@ -250,10 +250,10 @@ config CPU_ARM1026 bool "Support ARM1026EJ-S processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture @@ -268,11 +268,11 @@ config CPU_SA110 select CPU_32v3 if ARCH_RPC select CPU_32v4 if !ARCH_RPC select CPU_ABRT_EV4 - select CPU_PABRT_LEGACY select CPU_CACHE_V4WB select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WB if MMU help The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and @@ -288,10 +288,10 @@ config CPU_SA1100 bool select CPU_32v4 select CPU_ABRT_EV4 - select CPU_PABRT_LEGACY select CPU_CACHE_V4WB select CPU_CACHE_VIVT select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WB if MMU # XScale @@ -299,9 +299,9 @@ config CPU_XSCALE bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU # XScale Core Version 3 @@ -309,9 +309,9 @@ config CPU_XSC3 bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU select IO_36 @@ -320,21 +320,21 @@ config CPU_MOHAWK bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU - select CPU_COPY_V4WB if MMU # Feroceon config CPU_FEROCEON bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_FEROCEON if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_FEROCEON if MMU config CPU_FEROCEON_OLD_ID @@ -349,20 +349,20 @@ config CPU_FEROCEON_OLD_ID # Marvell PJ4 config CPU_PJ4 bool - select CPU_V7 select ARM_THUMBEE + select CPU_V7 # ARMv6 config CPU_V6 bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX select CPU_32v6 select CPU_ABRT_EV6 - select CPU_PABRT_V6 select CPU_CACHE_V6 select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU select CPU_CP15_MMU select CPU_HAS_ASID if MMU - select CPU_COPY_V6 if MMU + select CPU_PABRT_V6 select CPU_TLB_V6 if MMU # ARMv6k @@ -371,12 +371,12 @@ config CPU_V6K select CPU_32v6 select CPU_32v6K select CPU_ABRT_EV6 - select CPU_PABRT_V6 select CPU_CACHE_V6 select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU select CPU_CP15_MMU select CPU_HAS_ASID if MMU - select CPU_COPY_V6 if MMU + select CPU_PABRT_V6 select CPU_TLB_V6 if MMU # ARMv7 @@ -385,44 +385,44 @@ config CPU_V7 select CPU_32v6K select CPU_32v7 select CPU_ABRT_EV7 - select CPU_PABRT_V7 select CPU_CACHE_V7 select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU select CPU_CP15_MMU select CPU_HAS_ASID if MMU - select CPU_COPY_V6 if MMU + select CPU_PABRT_V7 select CPU_TLB_V7 if MMU # Figure out what processor architecture version we should be using. # This defines the compiler instruction set which depends on the machine type. config CPU_32v3 bool - select TLS_REG_EMUL if SMP || !MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4 bool - select TLS_REG_EMUL if SMP || !MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4T bool - select TLS_REG_EMUL if SMP || !MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select TLS_REG_EMUL if SMP || !MMU config CPU_32v5 bool - select TLS_REG_EMUL if SMP || !MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select TLS_REG_EMUL if SMP || !MMU config CPU_32v6 bool - select TLS_REG_EMUL if !CPU_32v6K && !MMU select CPU_USE_DOMAINS if CPU_V6 && MMU + select TLS_REG_EMUL if !CPU_32v6K && !MMU config CPU_32v6K bool @@ -624,11 +624,28 @@ config ARM_THUMBEE Say Y here if you have a CPU with the ThumbEE extension and code to make use of it. Say N for code that can run on CPUs without ThumbEE. +config ARM_VIRT_EXT + bool "Native support for the ARM Virtualization Extensions" + depends on MMU && CPU_V7 + help + Enable the kernel to make use of the ARM Virtualization + Extensions to install hypervisors without run-time firmware + assistance. + + A compliant bootloader is required in order to make maximum + use of this feature. Refer to Documentation/arm/Booting for + details. + + It is safe to enable this option even if the kernel may not be + booted in HYP mode, may not have support for the + virtualization extensions, or may be booted with a + non-compliant bootloader. + config SWP_EMULATE bool "Emulate SWP/SWPB instructions" depends on !CPU_USE_DOMAINS && CPU_V7 - select HAVE_PROC_CPU if PROC_FS default y if SMP + select HAVE_PROC_CPU if PROC_FS help ARMv6 architecture deprecates use of the SWP/SWPB instructions. ARMv7 multiprocessing extensions introduce the ability to disable diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 9107231aacc5..b9f60ebe3bc4 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -699,7 +699,6 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, unsigned long instr = *pinstr; u16 tinst1 = (instr >> 16) & 0xffff; u16 tinst2 = instr & 0xffff; - poffset->un = 0; switch (tinst1 & 0xffe0) { /* A6.3.5 Load/Store multiple */ @@ -854,9 +853,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) break; case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ - if (thumb2_32b) + if (thumb2_32b) { + offset.un = 0; handler = do_alignment_t32_to_handler(&instr, regs, &offset); - else + } else handler = do_alignment_ldmstm; break; diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index 072016371093..e505befe51b5 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -240,6 +240,9 @@ ENTRY(fa_dma_unmap_area) mov pc, lr ENDPROC(fa_dma_unmap_area) + .globl fa_flush_kern_cache_louis + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 2a8e380501e8..8a97e6443c62 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -368,14 +368,18 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) /* l2x0 controller is disabled */ writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); - l2x0_saved_regs.aux_ctrl = aux; - l2x0_inv_all(); /* enable L2X0 */ writel_relaxed(1, l2x0_base + L2X0_CTRL); } + /* Re-read it in case some bits are reserved. */ + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + /* Save the value for resuming. */ + l2x0_saved_regs.aux_ctrl = aux; + outer_cache.inv_range = l2x0_inv_range; outer_cache.clean_range = l2x0_clean_range; outer_cache.flush_range = l2x0_flush_range; @@ -554,7 +558,7 @@ static const struct of_device_id l2x0_ids[] __initconst = { int __init l2x0_of_init(u32 aux_val, u32 aux_mask) { struct device_node *np; - struct l2x0_of_data *data; + const struct l2x0_of_data *data; struct resource res; np = of_find_matching_node(NULL, l2x0_ids); diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 23a7643e9a87..1be0f4e5e6eb 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -15,8 +15,11 @@ */ #include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> #include <asm/cacheflush.h> #include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-tauros2.h> @@ -144,25 +147,8 @@ static inline void __init write_extra_features(u32 u) __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); } -static void __init disable_l2_prefetch(void) -{ - u32 u; - - /* - * Read the CPU Extra Features register and verify that the - * Disable L2 Prefetch bit is set. - */ - u = read_extra_features(); - if (!(u & 0x01000000)) { - printk(KERN_INFO "Tauros2: Disabling L2 prefetch.\n"); - write_extra_features(u | 0x01000000); - } -} - static inline int __init cpuid_scheme(void) { - extern int processor_id; - return !!((processor_id & 0x000f0000) == 0x000f0000); } @@ -189,12 +175,36 @@ static inline void __init write_actlr(u32 actlr) __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); } -void __init tauros2_init(void) +static void enable_extra_feature(unsigned int features) +{ + u32 u; + + u = read_extra_features(); + + if (features & CACHE_TAUROS2_PREFETCH_ON) + u &= ~0x01000000; + else + u |= 0x01000000; + printk(KERN_INFO "Tauros2: %s L2 prefetch.\n", + (features & CACHE_TAUROS2_PREFETCH_ON) + ? "Enabling" : "Disabling"); + + if (features & CACHE_TAUROS2_LINEFILL_BURST8) + u |= 0x00100000; + else + u &= ~0x00100000; + printk(KERN_INFO "Tauros2: %s line fill burt8.\n", + (features & CACHE_TAUROS2_LINEFILL_BURST8) + ? "Enabling" : "Disabling"); + + write_extra_features(u); +} + +static void __init tauros2_internal_init(unsigned int features) { - extern int processor_id; - char *mode; + char *mode = NULL; - disable_l2_prefetch(); + enable_extra_feature(features); #ifdef CONFIG_CPU_32v5 if ((processor_id & 0xff0f0000) == 0x56050000) { @@ -286,3 +296,34 @@ void __init tauros2_init(void) printk(KERN_INFO "Tauros2: L2 cache support initialised " "in %s mode.\n", mode); } + +#ifdef CONFIG_OF +static const struct of_device_id tauros2_ids[] __initconst = { + { .compatible = "marvell,tauros2-cache"}, + {} +}; +#endif + +void __init tauros2_init(unsigned int features) +{ +#ifdef CONFIG_OF + struct device_node *node; + int ret; + unsigned int f; + + node = of_find_matching_node(NULL, tauros2_ids); + if (!node) { + pr_info("Not found marvell,tauros2-cache, disable it\n"); + return; + } + + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; +#endif + tauros2_internal_init(features); +} diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S index 52e35f32eefb..8a3fadece8d3 100644 --- a/arch/arm/mm/cache-v3.S +++ b/arch/arm/mm/cache-v3.S @@ -128,6 +128,9 @@ ENTRY(v3_dma_map_area) ENDPROC(v3_dma_unmap_area) ENDPROC(v3_dma_map_area) + .globl v3_flush_kern_cache_louis + .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 022135d2b7e4..43e5d77be677 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -140,6 +140,9 @@ ENTRY(v4_dma_map_area) ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) + .globl v4_flush_kern_cache_louis + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index 8f1eeae340c8..cd4945321407 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -251,6 +251,9 @@ ENTRY(v4wb_dma_unmap_area) mov pc, lr ENDPROC(v4wb_dma_unmap_area) + .globl v4wb_flush_kern_cache_louis + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index b34a5f908a82..11e5e5838bc5 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -196,6 +196,9 @@ ENTRY(v4wt_dma_map_area) ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) + .globl v4wt_flush_kern_cache_louis + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 4b10760c56d6..d8fd4d4bd3d4 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -326,6 +326,9 @@ ENTRY(v6_dma_unmap_area) mov pc, lr ENDPROC(v6_dma_unmap_area) + .globl v6_flush_kern_cache_louis + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 39e3fb3db801..cd956647c21a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -33,6 +33,24 @@ ENTRY(v7_flush_icache_all) mov pc, lr ENDPROC(v7_flush_icache_all) + /* + * v7_flush_dcache_louis() + * + * Flush the D-cache up to the Level of Unification Inner Shareable + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + */ + +ENTRY(v7_flush_dcache_louis) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr + ands r3, r0, #0xe00000 @ extract LoUIS from clidr + mov r3, r3, lsr #20 @ r3 = LoUIS * 2 + moveq pc, lr @ return if level == 0 + mov r10, #0 @ r10 (starting level) = 0 + b flush_levels @ start flushing cache levels +ENDPROC(v7_flush_dcache_louis) + /* * v7_flush_dcache_all() * @@ -49,7 +67,7 @@ ENTRY(v7_flush_dcache_all) mov r3, r3, lsr #23 @ left align loc bit field beq finished @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +flush_levels: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only @@ -71,9 +89,9 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: +loop1: mov r9, r4 @ create working copy of max way size -loop3: +loop2: ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 THUMB( lsl r6, r9, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 @@ -82,13 +100,13 @@ loop3: THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index bge loop2 + subs r7, r7, #1 @ decrement the index + bge loop1 skip: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 + bgt flush_levels finished: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr @@ -120,6 +138,24 @@ ENTRY(v7_flush_kern_cache_all) mov pc, lr ENDPROC(v7_flush_kern_cache_all) + /* + * v7_flush_kern_cache_louis(void) + * + * Flush the data cache up to Level of Unification Inner Shareable. + * Invalidate the I-cache to the point of unification. + */ +ENTRY(v7_flush_kern_cache_louis) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_louis + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_louis) + /* * v7_flush_cache_all() * @@ -211,6 +247,9 @@ ENTRY(v7_coherent_user_range) * isn't mapped, fail with -EFAULT. */ 9001: +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif mov r0, #-EFAULT mov pc, lr UNWIND(.fnend ) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 119bc52ab93e..4e07eec1270d 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -63,10 +63,11 @@ static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, pid = task_pid_nr(thread->task) << ASID_BITS; asm volatile( " mrc p15, 0, %0, c13, c0, 1\n" - " bfi %1, %0, #0, %2\n" - " mcr p15, 0, %1, c13, c0, 1\n" + " and %0, %0, %2\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c13, c0, 1\n" : "=r" (contextidr), "+r" (pid) - : "I" (ASID_BITS)); + : "I" (~ASID_MASK)); isb(); return NOTIFY_OK; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4e7d1182e8a3..477a2d23ddf1 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + /** * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } @@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_dma_sync_single_for_device(struct device *dev, @@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(page, offset, size, dir); + __dma_page_cpu_to_dev(page, offset, size, dir); } static int arm_dma_set_mask(struct device *dev, u64 dma_mask); @@ -138,6 +143,22 @@ struct dma_map_ops arm_dma_ops = { }; EXPORT_SYMBOL(arm_dma_ops); +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +struct dma_map_ops arm_coherent_dma_ops = { + .alloc = arm_coherent_dma_alloc, + .free = arm_coherent_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_coherent_dma_map_page, + .map_sg = arm_dma_map_sg, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_coherent_dma_ops); + static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)arm_dma_limit; @@ -267,17 +288,19 @@ static void __dma_free_remap(void *cpu_addr, size_t size) vunmap(cpu_addr); } +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K + struct dma_pool { size_t size; spinlock_t lock; unsigned long *bitmap; unsigned long nr_pages; void *vaddr; - struct page *page; + struct page **pages; }; static struct dma_pool atomic_pool = { - .size = SZ_256K, + .size = DEFAULT_DMA_COHERENT_POOL_SIZE, }; static int __init early_coherent_pool(char *p) @@ -287,6 +310,21 @@ static int __init early_coherent_pool(char *p) } early_param("coherent_pool", early_coherent_pool); +void __init init_dma_coherent_pool_size(unsigned long size) +{ + /* + * Catch any attempt to set the pool size too late. + */ + BUG_ON(atomic_pool.vaddr); + + /* + * Set architecture specific coherent pool size only if + * it has not been changed by kernel command line parameter. + */ + if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) + atomic_pool.size = size; +} + /* * Initialise the coherent pool for atomic allocations. */ @@ -297,6 +335,7 @@ static int __init atomic_pool_init(void) unsigned long nr_pages = pool->size >> PAGE_SHIFT; unsigned long *bitmap; struct page *page; + struct page **pages; void *ptr; int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); @@ -304,21 +343,33 @@ static int __init atomic_pool_init(void) if (!bitmap) goto no_bitmap; + pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto no_pages; + if (IS_ENABLED(CONFIG_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); else ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, &page, NULL); if (ptr) { + int i; + + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; + spin_lock_init(&pool->lock); pool->vaddr = ptr; - pool->page = page; + pool->pages = pages; pool->bitmap = bitmap; pool->nr_pages = nr_pages; pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", (unsigned)pool->size / 1024); return 0; } + + kfree(pages); +no_pages: kfree(bitmap); no_bitmap: pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", @@ -443,27 +494,45 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) if (pageno < pool->nr_pages) { bitmap_set(pool->bitmap, pageno, count); ptr = pool->vaddr + PAGE_SIZE * pageno; - *ret_page = pool->page + pageno; + *ret_page = pool->pages[pageno]; + } else { + pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" + "Please increase it with coherent_pool= kernel parameter!\n", + (unsigned)pool->size / 1024); } spin_unlock_irqrestore(&pool->lock, flags); return ptr; } +static bool __in_atomic_pool(void *start, size_t size) +{ + struct dma_pool *pool = &atomic_pool; + void *end = start + size; + void *pool_start = pool->vaddr; + void *pool_end = pool->vaddr + pool->size; + + if (start < pool_start || start >= pool_end) + return false; + + if (end <= pool_end) + return true; + + WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", + start, end - 1, pool_start, pool_end - 1); + + return false; +} + static int __free_from_pool(void *start, size_t size) { struct dma_pool *pool = &atomic_pool; unsigned long pageno, count; unsigned long flags; - if (start < pool->vaddr || start > pool->vaddr + pool->size) + if (!__in_atomic_pool(start, size)) return 0; - if (start + size > pool->vaddr + pool->size) { - WARN(1, "freeing wrong coherent size from pool\n"); - return 0; - } - pageno = (start - pool->vaddr) >> PAGE_SHIFT; count = size >> PAGE_SHIFT; @@ -538,7 +607,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, const void *caller) + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page; @@ -571,7 +640,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) + if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (gfp & GFP_ATOMIC) addr = __alloc_from_pool(size, &page); @@ -599,7 +668,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, prot, + return __dma_alloc(dev, size, handle, gfp, prot, false, + __builtin_return_address(0)); +} + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, true, __builtin_return_address(0)); } @@ -636,8 +718,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, /* * Free a buffer as defined by the above mapping. */ -void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, + bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); @@ -646,7 +729,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) { + if (is_coherent || nommu()) { __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; @@ -662,6 +745,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } } +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); +} + +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); +} + int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, struct dma_attrs *attrs) @@ -964,11 +1059,12 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t if (!pages[i]) goto error; - if (order) + if (order) { split_page(pages[i], order); - j = 1 << order; - while (--j) - pages[i + j] = pages[i] + j; + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } __dma_clear_buffer(pages[i], PAGE_SIZE << order); i += 1 << order; @@ -1090,10 +1186,22 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } +static struct page **__atomic_get_pages(void *addr) +{ + struct dma_pool *pool = &atomic_pool; + struct page **pages = pool->pages; + int offs = (addr - pool->vaddr) >> PAGE_SHIFT; + + return pages + offs; +} + static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) { struct vm_struct *area; + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) return cpu_addr; @@ -1103,6 +1211,34 @@ static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) return NULL; } +static void *__iommu_alloc_atomic(struct device *dev, size_t size, + dma_addr_t *handle) +{ + struct page *page; + void *addr; + + addr = __alloc_from_pool(size, &page); + if (!addr) + return NULL; + + *handle = __iommu_create_mapping(dev, &page, size); + if (*handle == DMA_ERROR_CODE) + goto err_mapping; + + return addr; + +err_mapping: + __free_from_pool(addr, size); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, struct page **pages, + dma_addr_t handle, size_t size) +{ + __iommu_remove_mapping(dev, handle, size); + __free_from_pool(page_address(pages[0]), size); +} + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { @@ -1113,6 +1249,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); + if (gfp & GFP_ATOMIC) + return __iommu_alloc_atomic(dev, size, handle); + pages = __iommu_alloc_buffer(dev, size, gfp); if (!pages) return NULL; @@ -1179,6 +1318,11 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; } + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, pages, handle, size); + return; + } + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { unmap_kernel_range((unsigned long)cpu_addr, size); vunmap(cpu_addr); @@ -1207,7 +1351,8 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, */ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t iova, iova_base; @@ -1226,8 +1371,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, phys_addr_t phys = page_to_phys(sg_page(s)); unsigned int len = PAGE_ALIGN(s->offset + s->length); - if (!arch_is_coherent() && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); ret = iommu_map(mapping->domain, iova, phys, len, 0); @@ -1245,20 +1390,9 @@ fail: return ret; } -/** - * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA - * @dev: valid struct device pointer - * @sg: list of buffers - * @nents: number of buffers to map - * @dir: DMA transfer direction - * - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * The scatter gather list elements are merged together (if possible) and - * tagged with the appropriate dma address and length. They are obtained via - * sg_dma_{address,length}. - */ -int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s = sg, *dma = sg, *start = sg; int i, count = 0; @@ -1274,7 +1408,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { if (__map_sg_chunk(dev, start, size, &dma->dma_address, - dir, attrs) < 0) + dir, attrs, is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1287,7 +1421,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, } size += s->length; } - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0) + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1302,17 +1437,44 @@ bad_mapping: } /** - * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer * @sg: list of buffers - * @nents: number of buffers to unmap (same as was passed to dma_map_sg) - * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * @nents: number of buffers to map + * @dir: DMA transfer direction * - * Unmap a set of streaming mode DMA translations. Again, CPU access - * rules concerning calls here are the same as for dma_unmap_single(). + * Map a set of i/o coherent buffers described by scatterlist in streaming + * mode for DMA. The scatter gather list elements are merged together (if + * possible) and tagged with the appropriate dma address and length. They are + * obtained via sg_dma_{address,length}. */ -void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, false); +} + +static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s; int i; @@ -1321,7 +1483,7 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, if (sg_dma_len(s)) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - if (!arch_is_coherent() && + if (!is_coherent && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); @@ -1329,6 +1491,38 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, } /** + * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); +} + +/** * arm_iommu_sync_sg_for_cpu * @dev: valid struct device pointer * @sg: list of buffers @@ -1342,8 +1536,7 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } @@ -1361,22 +1554,21 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); } /** - * arm_iommu_map_page + * arm_coherent_iommu_map_page * @dev: valid struct device pointer * @page: page that buffer resides in * @offset: offset into page for start of buffer * @size: size of buffer to map * @dir: DMA transfer direction * - * IOMMU aware version of arm_dma_map_page() + * Coherent IOMMU aware version of arm_dma_map_page() */ -static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { @@ -1384,9 +1576,6 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, dma_addr_t dma_addr; int ret, len = PAGE_ALIGN(size + offset); - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) - __dma_page_cpu_to_dev(page, offset, size, dir); - dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; @@ -1402,6 +1591,51 @@ fail: } /** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_coherent_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Coherent IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** * arm_iommu_unmap_page * @dev: valid struct device pointer * @handle: DMA address of buffer @@ -1423,7 +1657,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); @@ -1441,8 +1675,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, if (!iova) return; - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_iommu_sync_single_for_device(struct device *dev, @@ -1476,6 +1709,19 @@ struct dma_map_ops iommu_ops = { .sync_sg_for_device = arm_iommu_sync_sg_for_device, }; +struct dma_map_ops iommu_coherent_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_coherent_iommu_map_page, + .unmap_page = arm_coherent_iommu_unmap_page, + + .map_sg = arm_coherent_iommu_map_sg, + .unmap_sg = arm_coherent_iommu_unmap_sg, +}; + /** * arm_iommu_create_mapping * @bus: pointer to the bus holding the client device (for IOMMU calls) @@ -1569,7 +1815,7 @@ int arm_iommu_attach_device(struct device *dev, dev->archdata.mapping = mapping; set_dma_ops(dev, &iommu_ops); - pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; } diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 7599e2625c7d..2a5907b5c8d2 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; pgoff_t pgoff; int aliases = 0; @@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, * cache coherency. */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c3bd83450227..5dbf13f954f6 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -336,6 +336,7 @@ retry: /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 40ca11ed6e5f..1c8f7f564175 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p { struct mm_struct *mm = current->active_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { unsigned long offset; /* diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9aec41fa80ae..ad722f1208a5 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -324,7 +324,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) BUG_ON(!arm_memblock_steal_permitted); - phys = memblock_alloc(size, align); + phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); memblock_free(phys, size); memblock_remove(phys, size); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 566750fa57d4..5dcc2fd46c46 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -36,6 +36,7 @@ #include <asm/system_info.h> #include <asm/mach/map.h> +#include <asm/mach/pci.h> #include "mm.h" int ioremap_page(unsigned long virt, unsigned long phys, @@ -247,6 +248,7 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, if (!area) return NULL; addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(pfn); #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) if (DOMAIN_IO == 0 && @@ -383,3 +385,16 @@ void __arm_iounmap(volatile void __iomem *io_addr) arch_iounmap(io_addr); } EXPORT_SYMBOL(__arm_iounmap); + +#ifdef CONFIG_PCI +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) +{ + BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + + return ioremap_page_range(PCI_IO_VIRT_BASE + offset, + PCI_IO_VIRT_BASE + offset + SZ_64K, + phys_addr, + __pgprot(get_mem_type(MT_DEVICE)->prot_pte)); +} +EXPORT_SYMBOL_GPL(pci_ioremap_io); +#endif diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 6776160618ef..a8ee92da3544 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -55,6 +55,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page /* permanent static mappings from iotable_init() */ #define VM_ARM_STATIC_MAPPING 0x40000000 +/* empty mapping */ +#define VM_ARM_EMPTY_MAPPING 0x20000000 + /* mapping type (attributes) for permanent static mappings */ #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4c2d0451e84a..941dfb9e9a78 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -31,6 +31,7 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> +#include <asm/mach/pci.h> #include "mm.h" @@ -216,7 +217,7 @@ static struct mem_type mem_types[] = { .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, .domain = DOMAIN_IO, - }, + }, [MT_DEVICE_WC] = { /* ioremap_wc */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, @@ -422,17 +423,6 @@ static void __init build_mem_type_table(void) vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; /* - * Enable CPU-specific coherency if supported. - * (Only available on XSC3 at the moment.) - */ - if (arch_is_coherent() && cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; - } - /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { @@ -777,14 +767,27 @@ void __init iotable_init(struct map_desc *io_desc, int nr) create_mapping(md); vm->addr = (void *)(md->virtual & PAGE_MASK); vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); - vm->phys_addr = __pfn_to_phys(md->pfn); - vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->phys_addr = __pfn_to_phys(md->pfn); + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; vm->flags |= VM_ARM_MTYPE(md->type); vm->caller = iotable_init; vm_area_add_early(vm++); } } +void __init vm_reserve_area_early(unsigned long addr, unsigned long size, + void *caller) +{ + struct vm_struct *vm; + + vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); + vm->addr = (void *)addr; + vm->size = size; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; + vm->caller = caller; + vm_area_add_early(vm); +} + #ifndef CONFIG_ARM_LPAE /* @@ -802,14 +805,7 @@ void __init iotable_init(struct map_desc *io_desc, int nr) static void __init pmd_empty_section_gap(unsigned long addr) { - struct vm_struct *vm; - - vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); - vm->addr = (void *)addr; - vm->size = SECTION_SIZE; - vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; - vm->caller = pmd_empty_section_gap; - vm_area_add_early(vm); + vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); } static void __init fill_pmd_gaps(void) @@ -820,7 +816,7 @@ static void __init fill_pmd_gaps(void) /* we're still single threaded hence no lock needed here */ for (vm = vmlist; vm; vm = vm->next) { - if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + if (!(vm->flags & (VM_ARM_STATIC_MAPPING | VM_ARM_EMPTY_MAPPING))) continue; addr = (unsigned long)vm->addr; if (addr < next) @@ -858,6 +854,28 @@ static void __init fill_pmd_gaps(void) #define fill_pmd_gaps() do { } while (0) #endif +#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) +static void __init pci_reserve_io(void) +{ + struct vm_struct *vm; + unsigned long addr; + + /* we're still single threaded hence no lock needed here */ + for (vm = vmlist; vm; vm = vm->next) { + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + addr = (unsigned long)vm->addr; + addr &= ~(SZ_2M - 1); + if (addr == PCI_IO_VIRT_BASE) + return; + + } + vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); +} +#else +#define pci_reserve_io() do { } while (0) +#endif + static void * __initdata vmalloc_min = (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); @@ -961,8 +979,8 @@ void __init sanity_check_meminfo(void) * Check whether this memory bank would partially overlap * the vmalloc area. */ - if (__va(bank->start + bank->size) > vmalloc_min || - __va(bank->start + bank->size) < __va(bank->start)) { + if (__va(bank->start + bank->size - 1) >= vmalloc_min || + __va(bank->start + bank->size - 1) <= __va(bank->start)) { unsigned long newsize = vmalloc_min - __va(bank->start); printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " "to -%.8llx (vmalloc region overlap).\n", @@ -1141,6 +1159,9 @@ static void __init devicemaps_init(struct machine_desc *mdesc) mdesc->map_io(); fill_pmd_gaps(); + /* Reserve fixed i/o space in VMALLOC region */ + pci_reserve_io(); + /* * Finally flush the caches and tlb to ensure that we're in a * consistent state wrt the writebuffer. This also ensures that diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 0650bb87c1e3..2bb61e703d6c 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -368,6 +368,9 @@ ENTRY(arm1020_dma_unmap_area) mov pc, lr ENDPROC(arm1020_dma_unmap_area) + .globl arm1020_flush_kern_cache_louis + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm1020 diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 4188478325a6..8f96aa40f510 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -354,6 +354,9 @@ ENTRY(arm1020e_dma_unmap_area) mov pc, lr ENDPROC(arm1020e_dma_unmap_area) + .globl arm1020e_flush_kern_cache_louis + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm1020e diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 33c68824bff0..8ebe4a469a22 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -343,6 +343,9 @@ ENTRY(arm1022_dma_unmap_area) mov pc, lr ENDPROC(arm1022_dma_unmap_area) + .globl arm1022_flush_kern_cache_louis + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm1022 diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index fbc1d5fc24dc..093fc7e520c3 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -337,6 +337,9 @@ ENTRY(arm1026_dma_unmap_area) mov pc, lr ENDPROC(arm1026_dma_unmap_area) + .globl arm1026_flush_kern_cache_louis + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm1026 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 1a8c138eb897..2c3b9421ab5e 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -319,6 +319,9 @@ ENTRY(arm920_dma_unmap_area) mov pc, lr ENDPROC(arm920_dma_unmap_area) + .globl arm920_flush_kern_cache_louis + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm920 #endif diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 4c44d7e1c3ca..4464c49d7449 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -321,6 +321,9 @@ ENTRY(arm922_dma_unmap_area) mov pc, lr ENDPROC(arm922_dma_unmap_area) + .globl arm922_flush_kern_cache_louis + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm922 #endif diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ec5b1180994f..281eb9b9c1d6 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -376,6 +376,9 @@ ENTRY(arm925_dma_unmap_area) mov pc, lr ENDPROC(arm925_dma_unmap_area) + .globl arm925_flush_kern_cache_louis + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm925 diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index c31e62c606c0..f1803f7e2972 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -339,6 +339,9 @@ ENTRY(arm926_dma_unmap_area) mov pc, lr ENDPROC(arm926_dma_unmap_area) + .globl arm926_flush_kern_cache_louis + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm926 diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index a613a7dd7146..8da189d4a402 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -267,6 +267,9 @@ ENTRY(arm940_dma_unmap_area) mov pc, lr ENDPROC(arm940_dma_unmap_area) + .globl arm940_flush_kern_cache_louis + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm940 diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 9f4f2999fdd0..f666cf34075a 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -310,6 +310,9 @@ ENTRY(arm946_dma_unmap_area) mov pc, lr ENDPROC(arm946_dma_unmap_area) + .globl arm946_flush_kern_cache_louis + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions arm946 diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 23a8e4c7f2bd..4106b09e0c29 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -415,6 +415,9 @@ ENTRY(feroceon_dma_unmap_area) mov pc, lr ENDPROC(feroceon_dma_unmap_area) + .globl feroceon_flush_kern_cache_louis + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions feroceon @@ -431,6 +434,7 @@ ENDPROC(feroceon_dma_unmap_area) range_alias flush_icache_all range_alias flush_user_cache_all range_alias flush_kern_cache_all + range_alias flush_kern_cache_louis range_alias flush_user_cache_range range_alias coherent_kern_range range_alias coherent_user_range diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 2d8ff3ad86d3..b29a2265af01 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -299,6 +299,7 @@ ENTRY(\name\()_processor_functions) ENTRY(\name\()_cache_fns) .long \name\()_flush_icache_all .long \name\()_flush_kern_cache_all + .long \name\()_flush_kern_cache_louis .long \name\()_flush_user_cache_all .long \name\()_flush_user_cache_range .long \name\()_coherent_kern_range diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index fbb2124a547d..82f9cdc751d6 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -303,6 +303,9 @@ ENTRY(mohawk_dma_unmap_area) mov pc, lr ENDPROC(mohawk_dma_unmap_area) + .globl mohawk_flush_kern_cache_louis + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions mohawk diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c2e2b66f72b5..846d279f3176 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -172,7 +172,7 @@ __v7_ca15mp_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_all + bl v7_flush_dcache_louis ldmia r12, {r0-r5, r7, r9, r11, lr} mrc p15, 0, r0, c0, c0, 0 @ read main ID register diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index b0d57869da2d..eb93d6487f35 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -337,6 +337,9 @@ ENTRY(xsc3_dma_unmap_area) mov pc, lr ENDPROC(xsc3_dma_unmap_area) + .globl xsc3_flush_kern_cache_louis + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions xsc3 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 4ffebaa595ee..25510361aa18 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -410,6 +410,9 @@ ENTRY(xscale_dma_unmap_area) mov pc, lr ENDPROC(xscale_dma_unmap_area) + .globl xscale_flush_kern_cache_louis + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions xscale @@ -439,6 +442,7 @@ ENDPROC(xscale_dma_unmap_area) a0_alias flush_icache_all a0_alias flush_user_cache_all a0_alias flush_kern_cache_all + a0_alias flush_kern_cache_louis a0_alias flush_user_cache_range a0_alias coherent_kern_range a0_alias coherent_user_range |