summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvdumpa <vdumpa@nvidia.com>2010-10-29 11:37:42 -0700
committerVarun Colbert <vcolbert@nvidia.com>2010-11-24 15:18:45 -0800
commit05074ac6ce18a61467ad6d5dd5e53d56725c8d58 (patch)
treed71845194cb37ea94937ba0855cebbb440f41861
parentf2b1e18bc44cd6bf8e76d00a5f0fcb0d2b2e0995 (diff)
video: tegra: nvmap: Clean whole L1 instead of VA cleaning
For large allocations, cleaning each page of the allocation can take a significant amount of time. If an allocation that nvmap needs to clean or invalidate out of the cache is significantly larger than the cache, just flush the entire cache. bug 711478 and bug 744221 Revert "video: tegra: nvmap: perform cache maintenance for rw_handle" This reverts commit d963f09c3ebb2d690d266f8f607b4876acaf2ab1. Reviewed-on: http://git-master/r/10213 (cherry picked from commit 6469f378a596f0572035cd27a17851ea86b763c9) Change-Id: I1e514f505db860eb01d575a98f0f80c8794e8463 Reviewed-on: http://git-master/r/10497 Reviewed-by: Eric Werness <ewerness@nvidia.com> Reviewed-by: Markus Holtmanns <mholtmanns@nvidia.com> Tested-by: Markus Holtmanns <mholtmanns@nvidia.com> Reviewed-by: Janne Hellsten <jhellsten@nvidia.com> Reviewed-by: Varun Colbert <vcolbert@nvidia.com> Tested-by: Varun Colbert <vcolbert@nvidia.com>
-rw-r--r--arch/arm/mm/cache-v7.S53
-rw-r--r--drivers/video/tegra/nvmap.c26
2 files changed, 55 insertions, 24 deletions
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 884ac48a2010..24494f0ef917 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -18,27 +18,28 @@
#include "proc-macros.S"
/*
- * v7_flush_dcache_all()
+ * v7_op_dcache_all op
*
- * Flush the whole D-cache.
+ * op=c14, Flush the whole D-cache.
+ * op=c10 Clean the whould D-cache.
*
* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v7_flush_dcache_all)
+.macro v7_op_dcache_all op @ op=c10 clean, op=c14 flush
dmb @ ensure ordering with previous memory accesses
mrc p15, 1, r0, c0, c0, 1 @ read clidr
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
- beq finished @ if loc is 0, then no need to clean
+ beq 1005f @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
-loop1:
+1001:
add r2, r10, r10, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
and r1, r1, #7 @ mask of the bits for current cache only
cmp r1, #2 @ see what cache we have at this level
- blt skip @ skip if no cache, or just i-cache
+ blt 1004f @ skip if no cache, or just i-cache
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
isb @ isb to sych the new cssr&csidr
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
@@ -49,32 +50,40 @@ loop1:
clz r5, r4 @ find bit position of way size increment
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
-loop2:
+1002:
mov r9, r4 @ create working copy of max way size
-loop3:
+1003:
ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
THUMB( lsl r6, r9, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
THUMB( lsl r6, r7, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
- mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
+ mcr p15, 0, r11, c7, \op, 2 @ op=c10/c14, clean/clean & invalidate by set/way
subs r9, r9, #1 @ decrement the way
- bge loop3
+ bge 1003b
subs r7, r7, #1 @ decrement the index
- bge loop2
-skip:
+ bge 1002b
+1004:
add r10, r10, #2 @ increment cache number
cmp r3, r10
- bgt loop1
-finished:
+ bgt 1001b
+1005:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
dsb
isb
mov pc, lr
+.endm
+
+ENTRY(v7_flush_dcache_all)
+ v7_op_dcache_all c14
ENDPROC(v7_flush_dcache_all)
+ENTRY(v7_clean_dcache_all)
+ v7_op_dcache_all c10
+ENDPROC(v7_clean_dcache_all)
+
/*
* v7_flush_cache_all()
*
@@ -102,6 +111,22 @@ ENTRY(v7_flush_kern_cache_all)
ENDPROC(v7_flush_kern_cache_all)
/*
+ * v7_clean_kern_cache_all()
+ */
+ENTRY(v7_clean_kern_cache_all)
+ ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( stmfd sp!, {r4-r7, r9-r11, lr} )
+ bl v7_clean_dcache_all
+ mov r0, #0
+#ifdef CONFIG_SMP
+ mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable
+#endif
+ ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
+ mov pc, lr
+ENDPROC(v7_clean_kern_cache_all)
+
+/*
* v7_flush_cache_all()
*
* Flush all TLB entries in a particular address space
diff --git a/drivers/video/tegra/nvmap.c b/drivers/video/tegra/nvmap.c
index f260e1cb9573..ce0da9904afe 100644
--- a/drivers/video/tegra/nvmap.c
+++ b/drivers/video/tegra/nvmap.c
@@ -149,6 +149,9 @@ static struct rb_root nvmap_handles = RB_ROOT;
static struct tegra_iovmm_client *nvmap_vm_client = NULL;
+extern void v7_flush_kern_cache_all(void);
+extern void v7_clean_kern_cache_all(void);
+
/* default heap order policy */
static unsigned int _nvmap_heap_policy (unsigned int heaps, int numpages)
{
@@ -2831,6 +2834,16 @@ static int _nvmap_do_cache_maint(struct nvmap_handle *h,
outer_maint = NULL;
}
+ if (end - start > PAGE_SIZE * 3) {
+ if (op == NVMEM_CACHE_OP_WB) {
+ v7_clean_kern_cache_all();
+ inner_maint = NULL;
+ } else if (op == NVMEM_CACHE_OP_WB_INV) {
+ v7_flush_kern_cache_all();
+ inner_maint = NULL;
+ }
+ }
+
prot = _nvmap_flag_to_pgprot(h->flags, pgprot_kernel);
if (h->alloc && !h->heap_pgalloc) {
@@ -2839,7 +2852,7 @@ static int _nvmap_do_cache_maint(struct nvmap_handle *h,
spin_unlock(&h->carveout.co_heap->lock);
}
- while (start < end) {
+ while (start < end && (inner_maint || outer_maint)) {
struct page *page = NULL;
unsigned long phys;
void *src;
@@ -2868,7 +2881,7 @@ static int _nvmap_do_cache_maint(struct nvmap_handle *h,
src = addr + (phys & ~PAGE_MASK);
count = min_t(size_t, end-start, PAGE_SIZE-(phys&~PAGE_MASK));
- inner_maint(src, src+count);
+ if (inner_maint) inner_maint(src, src+count);
if (outer_maint) outer_maint(phys, phys+count);
start += count;
if (page) put_page(page);
@@ -3013,19 +3026,12 @@ static ssize_t _nvmap_do_rw_handle(struct nvmap_handle *h, int is_read,
}
while (count--) {
- size_t ret;
- if (is_read)
- _nvmap_do_cache_maint(h, h_offs, h_offs + elem_size,
- NVMEM_CACHE_OP_INV, false);
- ret = _nvmap_do_one_rw_handle(h, is_read,
+ size_t ret = _nvmap_do_one_rw_handle(h, is_read,
is_user, h_offs, sys_addr, elem_size, &addr);
if (ret < 0) {
if (!bytes_copied) bytes_copied = ret;
break;
}
- if (!is_read)
- _nvmap_do_cache_maint(h, h_offs, h_offs + ret,
- NVMEM_CACHE_OP_WB, false);
bytes_copied += ret;
if (ret < elem_size) break;
sys_addr += sys_stride;