diff options
author | Gary King <GKing@nvidia.com> | 2010-01-29 17:46:27 -0800 |
---|---|---|
committer | Gary King <GKing@nvidia.com> | 2010-02-02 10:52:45 -0800 |
commit | 40c86b9c228b7b04300c52dd8955e9dc585ba881 (patch) | |
tree | 88ceedf3d19c7abc1afb95863a3f193314fbfbf5 /arch/arm | |
parent | 8b8ffa33276fc5b2069a14724ce82cdeaf7e848c (diff) |
[ARM] expose full data cache clean and flush DMA maintenance
drivers which perform DMA mapping can optimize necessary cache
maintenance by using a full-cache clean or flush rather than
looping over large regions line-by-line.
there was no previous full-cache operation other than
flush_kern_cache_all, and this operation both invalidates the
data cache (not always necessary) and invalidates the instruction
cache, both of which unnecessarily hurt performance on CPUs with
Harvard caches
Change-Id: If71015525457e9e7e481fc2afcdc76bc3fa8f8f4
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/include/asm/cacheflush.h | 25 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 6 | ||||
-rw-r--r-- | arch/arm/mm/cache-v4.S | 2 | ||||
-rw-r--r-- | arch/arm/mm/cache-v4wb.S | 2 | ||||
-rw-r--r-- | arch/arm/mm/cache-v4wt.S | 2 | ||||
-rw-r--r-- | arch/arm/mm/cache-v6.S | 32 | ||||
-rw-r--r-- | arch/arm/mm/cache-v7.S | 65 |
7 files changed, 122 insertions, 12 deletions
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 262120acc888..c4fbcc59203b 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -208,6 +208,8 @@ struct cpu_cache_fns { void (*dma_inv_range)(const void *, const void *); void (*dma_clean_range)(const void *, const void *); void (*dma_flush_range)(const void *, const void *); + void (*dma_clean_all)(void); + void (*dma_flush_all)(void); }; struct outer_cache_fns { @@ -239,6 +241,8 @@ extern struct cpu_cache_fns cpu_cache; #define dmac_inv_range cpu_cache.dma_inv_range #define dmac_clean_range cpu_cache.dma_clean_range #define dmac_flush_range cpu_cache.dma_flush_range +#define dmac_clean_all cpu_cache.dma_clean_all +#define dmac_flush_all cpu_cache.dma_flush_all #else @@ -255,6 +259,8 @@ static inline void v7m_flush_kern_dcache_page(void *a) { } static inline void v7m_dma_inv_range(const void *a, const void *b) { } static inline void v7m_dma_clean_range(const void *a, const void *b) { } static inline void v7m_dma_flush_range(const void *a, const void *b) { } +static inline void v7m_dma_clean_all(void) { } +static inline void v7m_dma_flush_all(void) { } #endif @@ -281,10 +287,14 @@ extern void __cpuc_flush_dcache_page(void *); #define dmac_inv_range __glue(_CACHE,_dma_inv_range) #define dmac_clean_range __glue(_CACHE,_dma_clean_range) #define dmac_flush_range __glue(_CACHE,_dma_flush_range) +#define dmac_clean_all __glue(_CACHE,_dma_clean_all) +#define dmac_flush_all __glue(_CACHE,_dma_flush_all) extern void dmac_inv_range(const void *, const void *); extern void dmac_clean_range(const void *, const void *); extern void dmac_flush_range(const void *, const void *); +extern void dmac_clean_all(void); +extern void dmac_flush_all(void); #endif @@ -293,6 +303,8 @@ enum smp_dma_cache_type { SMP_DMA_CACHE_INV, SMP_DMA_CACHE_CLEAN, SMP_DMA_CACHE_FLUSH, + SMP_DMA_CACHE_CLEAN_ALL, + SMP_DMA_CACHE_FLUSH_ALL, }; extern void smp_dma_cache_op(int type, const void *start, const void *end); @@ -311,10 +323,23 @@ static inline void smp_dma_flush_range(const void *start, const void *end) { smp_dma_cache_op(SMP_DMA_CACHE_FLUSH, start, end); } + +static inline void smp_dma_clean_all(void) +{ + smp_dma_cache_op(SMP_DMA_CACHE_CLEAN_ALL, NULL, NULL); +} + +static inline void smp_dma_flush_all(void) +{ + smp_dma_cache_op(SMP_DMA_CACHE_FLUSH_ALL, NULL, NULL); +} + #else #define smp_dma_inv_range dmac_inv_range #define smp_dma_clean_range dmac_clean_range #define smp_dma_flush_range dmac_flush_range +#define smp_dma_clean_all dmac_clean_all +#define smp_dma_flush_all dmac_flush_all #endif #ifdef CONFIG_OUTER_CACHE diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index f130fda2fbbe..40eff61d6d8e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -751,6 +751,12 @@ static void local_dma_cache_op(int type, const void *start, const void *end) case SMP_DMA_CACHE_FLUSH: dmac_flush_range(start, end); break; + case SMP_DMA_CACHE_CLEAN_ALL: + dmac_clean_all(); + break; + case SMP_DMA_CACHE_FLUSH_ALL: + dmac_flush_all(); + break; default: printk(KERN_CRIT "CPU%u: Unknown SMP DMA cache type %d\n", smp_processor_id(), type); diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 3668611cb400..f8ff33d66ede 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -145,4 +145,6 @@ ENTRY(v4_cache_fns) .long v4_dma_inv_range .long v4_dma_clean_range .long v4_dma_flush_range + .long v4_flush_kern_cache_all + .long v4_flush_kern_cache_all .size v4_cache_fns, . - v4_cache_fns diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index 2ebc1b3bf856..771b978c59df 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -228,4 +228,6 @@ ENTRY(v4wb_cache_fns) .long v4wb_dma_inv_range .long v4wb_dma_clean_range .long v4wb_dma_flush_range + .long v4wb_flush_kern_cache_all + .long v4wb_flush_kern_cache_all .size v4wb_cache_fns, . - v4wb_cache_fns diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index c54fa2cc40e6..1dfe16103e42 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -184,4 +184,6 @@ ENTRY(v4wt_cache_fns) .long v4wt_dma_inv_range .long v4wt_dma_clean_range .long v4wt_dma_flush_range + .long v4wt_flush_kern_cache_all + .long v4wt_flush_kern_cache_all .size v4wt_cache_fns, . - v4wt_cache_fns diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 55f7ecd1264e..12d17c702e16 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -274,6 +274,36 @@ v6_dma_flush_dcache_all: mov pc, lr #endif +/* + * v6_dma_clean_all() + * + * cleans the entire L1 data cache + */ +ENTRY(v6_dma_clean_all) +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 0 @ D cache clean +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * v6_dma_flush_all() + * + * flushes the entire L1 data cache + */ +ENTRY(v6_dma_flush_all) +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + + + __INITDATA .type v6_cache_fns, #object @@ -287,4 +317,6 @@ ENTRY(v6_cache_fns) .long v6_dma_inv_range .long v6_dma_clean_range .long v6_dma_flush_range + .long v6_dma_clean_all + .long v6_dma_flush_all .size v6_cache_fns, . - v6_cache_fns diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 5327bd1b9bcf..8c2975898da9 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -26,19 +26,19 @@ * * - mm - mm_struct describing address space */ -ENTRY(v7_flush_dcache_all) + .macro v7_way_op, op dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr ands r3, r0, #0x7000000 @ extract loc from clidr mov r3, r3, lsr #23 @ left align loc bit field - beq finished @ if loc is 0, then no need to clean + beq 50f @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +10: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache + blt 40f @ skip if no cache, or just i-cache mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb @ isb to sych the new cssr&csidr mrc p15, 1, r1, c0, c0, 0 @ read the new csidr @@ -49,32 +49,42 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: +20: mov r9, r4 @ create working copy of max way size -loop3: +30: ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 THUMB( lsl r6, r9, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 THUMB( lsl r6, r7, r2 ) THUMB( orr r11, r11, r6 ) @ factor index number into r11 - mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + mcr p15, 0, r11, c7, \op , 2 @ clean & invalidate by set/way subs r9, r9, #1 @ decrement the way - bge loop3 + bge 30b subs r7, r7, #1 @ decrement the index - bge loop2 -skip: + bge 20b +40: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 -finished: + bgt 10b +50: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr dsb isb + .endm + +ENTRY(v7_flush_dcache_all) + v7_way_op c14 mov pc, lr ENDPROC(v7_flush_dcache_all) + +ENTRY(v7_clean_dcache_all) + v7_way_op c10 + mov pc, lr +ENDPROC(v7_clean_dcache_all) + /* * v7_flush_cache_all() * @@ -266,6 +276,35 @@ ENTRY(v7_dma_flush_range) mov pc, lr ENDPROC(v7_dma_flush_range) +/* + * v7_dma_flush_all() + * + * flushes the entire L1 data cache + */ +ENTRY(v7_dma_flush_all) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_all + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_dma_flush_all) + +/* + * v7_dma_clean_all() + * + * cleans the entire L1 data cache + */ +ENTRY(v7_dma_clean_all) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_clean_dcache_all + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_dma_clean_all) + + __INITDATA .type v7_cache_fns, #object @@ -279,4 +318,6 @@ ENTRY(v7_cache_fns) .long v7_dma_inv_range .long v7_dma_clean_range .long v7_dma_flush_range + .long v7_dma_clean_all + .long v7_dma_flush_all .size v7_cache_fns, . - v7_cache_fns |