diff options
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/tile/lib/cacheflush.c | 30 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_user_64.c | 8 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_32.c | 2 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_common.h | 2 |
5 files changed, 37 insertions, 6 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086ecbef..985f59858234 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ strchr_$(BITS).o strlen_$(BITS).o ifeq ($(CONFIG_TILEGX),y) +CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer lib-y += memcpy_user_64.o else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8928aace7a64..db4fb89e12d8 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else const unsigned long STRIPE_WIDTH = 8192; +#endif + #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing @@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping + * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. @@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * - * FIXME: See bug 9535 for some issues with this code. + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); +#endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); @@ -109,7 +133,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); - if ((long)base < (long)buffer) + if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 4763b3aff1cc..37440caa7370 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -14,7 +14,13 @@ * Do memcpy(), but trap and return "n" when a load or store faults. * * Note: this idiom only works when memcpy() compiles to a leaf function. - * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. * * Also note that we are capturing "n" from the containing scope here. */ diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index cb0999fb64b4..b16ac49a968e 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -144,7 +144,7 @@ void arch_read_unlock(arch_rwlock_t *rwlock) for (;;) { __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); val = __insn_tns((int *)&rwlock->lock); - if (likely(val & 1) == 0) { + if (likely((val & 1) == 0)) { rwlock->lock = val - (1 << _RD_COUNT_SHIFT); __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); break; diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c10109809132..6ac37509faca 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h @@ -60,5 +60,5 @@ static void delay_backoff(int iterations) loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & (loops - 1); - relax(1 << exponent); + relax(loops); } |