summaryrefslogtreecommitdiff
path: root/arch/arm/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/include/asm')
-rw-r--r--arch/arm/include/asm/arch_timer.h3
-rw-r--r--arch/arm/include/asm/delay.h32
-rw-r--r--arch/arm/include/asm/locks.h274
-rw-r--r--arch/arm/include/asm/memory.h2
-rw-r--r--arch/arm/include/asm/perf_event.h17
-rw-r--r--arch/arm/include/asm/pmu.h3
-rw-r--r--arch/arm/include/asm/spinlock.h76
-rw-r--r--arch/arm/include/asm/spinlock_types.h17
-rw-r--r--arch/arm/include/asm/timex.h10
-rw-r--r--arch/arm/include/asm/uaccess.h27
-rw-r--r--arch/arm/include/asm/word-at-a-time.h96
11 files changed, 204 insertions, 353 deletions
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index ed2e95d46e29..62e75475e57e 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -1,7 +1,10 @@
#ifndef __ASMARM_ARCH_TIMER_H
#define __ASMARM_ARCH_TIMER_H
+#include <asm/errno.h>
+
#ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_HAS_READ_CURRENT_TIMER
int arch_timer_of_register(void);
int arch_timer_sched_clock_init(void);
#else
diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index b2deda181549..dc6145120de3 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,22 @@
#ifndef __ASM_ARM_DELAY_H
#define __ASM_ARM_DELAY_H
+#include <asm/memory.h>
#include <asm/param.h> /* HZ */
-extern void __delay(int loops);
+#define MAX_UDELAY_MS 2
+#define UDELAY_MULT ((UL(2199023) * HZ) >> 11)
+#define UDELAY_SHIFT 30
+
+#ifndef __ASSEMBLY__
+
+extern struct arm_delay_ops {
+ void (*delay)(unsigned long);
+ void (*const_udelay)(unsigned long);
+ void (*udelay)(unsigned long);
+} arm_delay_ops;
+
+#define __delay(n) arm_delay_ops.delay(n)
/*
* This function intentionally does not exist; if you see references to
@@ -23,22 +36,27 @@ extern void __bad_udelay(void);
* division by multiplication: you don't have to worry about
* loss of precision.
*
- * Use only for very small delays ( < 1 msec). Should probably use a
+ * Use only for very small delays ( < 2 msec). Should probably use a
* lookup table, really, as the multiplications take much too long with
* short delays. This is a "reasonable" implementation, though (and the
* first constant multiplications gets optimized away if the delay is
* a constant)
*/
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
+#define __udelay(n) arm_delay_ops.udelay(n)
+#define __const_udelay(n) arm_delay_ops.const_udelay(n)
#define udelay(n) \
(__builtin_constant_p(n) ? \
((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \
- __const_udelay((n) * ((2199023U*HZ)>>11))) : \
+ __const_udelay((n) * UDELAY_MULT)) : \
__udelay(n))
+/* Loop-based definitions for assembly code. */
+extern void __loop_delay(unsigned long loops);
+extern void __loop_udelay(unsigned long usecs);
+extern void __loop_const_udelay(unsigned long);
+
+#endif /* __ASSEMBLY__ */
+
#endif /* defined(_ARM_DELAY_H) */
diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h
deleted file mode 100644
index ef4c897772d1..000000000000
--- a/arch/arm/include/asm/locks.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * arch/arm/include/asm/locks.h
- *
- * Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Interrupt safe locking assembler.
- */
-#ifndef __ASM_PROC_LOCKS_H
-#define __ASM_PROC_LOCKS_H
-
-#if __LINUX_ARM_ARCH__ >= 6
-
-#define __down_op(ptr,fail) \
- ({ \
- __asm__ __volatile__( \
- "@ down_op\n" \
-"1: ldrex lr, [%0]\n" \
-" sub lr, lr, %1\n" \
-" strex ip, lr, [%0]\n" \
-" teq ip, #0\n" \
-" bne 1b\n" \
-" teq lr, #0\n" \
-" movmi ip, %0\n" \
-" blmi " #fail \
- : \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- })
-
-#define __down_op_ret(ptr,fail) \
- ({ \
- unsigned int ret; \
- __asm__ __volatile__( \
- "@ down_op_ret\n" \
-"1: ldrex lr, [%1]\n" \
-" sub lr, lr, %2\n" \
-" strex ip, lr, [%1]\n" \
-" teq ip, #0\n" \
-" bne 1b\n" \
-" teq lr, #0\n" \
-" movmi ip, %1\n" \
-" movpl ip, #0\n" \
-" blmi " #fail "\n" \
-" mov %0, ip" \
- : "=&r" (ret) \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- ret; \
- })
-
-#define __up_op(ptr,wake) \
- ({ \
- smp_mb(); \
- __asm__ __volatile__( \
- "@ up_op\n" \
-"1: ldrex lr, [%0]\n" \
-" add lr, lr, %1\n" \
-" strex ip, lr, [%0]\n" \
-" teq ip, #0\n" \
-" bne 1b\n" \
-" cmp lr, #0\n" \
-" movle ip, %0\n" \
-" blle " #wake \
- : \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- })
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes. BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS 0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail) \
- ({ \
- __asm__ __volatile__( \
- "@ down_op_write\n" \
-"1: ldrex lr, [%0]\n" \
-" sub lr, lr, %1\n" \
-" strex ip, lr, [%0]\n" \
-" teq ip, #0\n" \
-" bne 1b\n" \
-" teq lr, #0\n" \
-" movne ip, %0\n" \
-" blne " #fail \
- : \
- : "r" (ptr), "I" (RW_LOCK_BIAS) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- })
-
-#define __up_op_write(ptr,wake) \
- ({ \
- smp_mb(); \
- __asm__ __volatile__( \
- "@ up_op_write\n" \
-"1: ldrex lr, [%0]\n" \
-" adds lr, lr, %1\n" \
-" strex ip, lr, [%0]\n" \
-" teq ip, #0\n" \
-" bne 1b\n" \
-" movcs ip, %0\n" \
-" blcs " #wake \
- : \
- : "r" (ptr), "I" (RW_LOCK_BIAS) \
- : "ip", "lr", "cc"); \
- })
-
-#define __down_op_read(ptr,fail) \
- __down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake) \
- ({ \
- smp_mb(); \
- __asm__ __volatile__( \
- "@ up_op_read\n" \
-"1: ldrex lr, [%0]\n" \
-" add lr, lr, %1\n" \
-" strex ip, lr, [%0]\n" \
-" teq ip, #0\n" \
-" bne 1b\n" \
-" teq lr, #0\n" \
-" moveq ip, %0\n" \
-" bleq " #wake \
- : \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- })
-
-#else
-
-#define __down_op(ptr,fail) \
- ({ \
- __asm__ __volatile__( \
- "@ down_op\n" \
-" mrs ip, cpsr\n" \
-" orr lr, ip, #128\n" \
-" msr cpsr_c, lr\n" \
-" ldr lr, [%0]\n" \
-" subs lr, lr, %1\n" \
-" str lr, [%0]\n" \
-" msr cpsr_c, ip\n" \
-" movmi ip, %0\n" \
-" blmi " #fail \
- : \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- })
-
-#define __down_op_ret(ptr,fail) \
- ({ \
- unsigned int ret; \
- __asm__ __volatile__( \
- "@ down_op_ret\n" \
-" mrs ip, cpsr\n" \
-" orr lr, ip, #128\n" \
-" msr cpsr_c, lr\n" \
-" ldr lr, [%1]\n" \
-" subs lr, lr, %2\n" \
-" str lr, [%1]\n" \
-" msr cpsr_c, ip\n" \
-" movmi ip, %1\n" \
-" movpl ip, #0\n" \
-" blmi " #fail "\n" \
-" mov %0, ip" \
- : "=&r" (ret) \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- ret; \
- })
-
-#define __up_op(ptr,wake) \
- ({ \
- smp_mb(); \
- __asm__ __volatile__( \
- "@ up_op\n" \
-" mrs ip, cpsr\n" \
-" orr lr, ip, #128\n" \
-" msr cpsr_c, lr\n" \
-" ldr lr, [%0]\n" \
-" adds lr, lr, %1\n" \
-" str lr, [%0]\n" \
-" msr cpsr_c, ip\n" \
-" movle ip, %0\n" \
-" blle " #wake \
- : \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- })
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes. BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS 0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail) \
- ({ \
- __asm__ __volatile__( \
- "@ down_op_write\n" \
-" mrs ip, cpsr\n" \
-" orr lr, ip, #128\n" \
-" msr cpsr_c, lr\n" \
-" ldr lr, [%0]\n" \
-" subs lr, lr, %1\n" \
-" str lr, [%0]\n" \
-" msr cpsr_c, ip\n" \
-" movne ip, %0\n" \
-" blne " #fail \
- : \
- : "r" (ptr), "I" (RW_LOCK_BIAS) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- })
-
-#define __up_op_write(ptr,wake) \
- ({ \
- __asm__ __volatile__( \
- "@ up_op_write\n" \
-" mrs ip, cpsr\n" \
-" orr lr, ip, #128\n" \
-" msr cpsr_c, lr\n" \
-" ldr lr, [%0]\n" \
-" adds lr, lr, %1\n" \
-" str lr, [%0]\n" \
-" msr cpsr_c, ip\n" \
-" movcs ip, %0\n" \
-" blcs " #wake \
- : \
- : "r" (ptr), "I" (RW_LOCK_BIAS) \
- : "ip", "lr", "cc"); \
- smp_mb(); \
- })
-
-#define __down_op_read(ptr,fail) \
- __down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake) \
- ({ \
- smp_mb(); \
- __asm__ __volatile__( \
- "@ up_op_read\n" \
-" mrs ip, cpsr\n" \
-" orr lr, ip, #128\n" \
-" msr cpsr_c, lr\n" \
-" ldr lr, [%0]\n" \
-" adds lr, lr, %1\n" \
-" str lr, [%0]\n" \
-" msr cpsr_c, ip\n" \
-" moveq ip, %0\n" \
-" bleq " #wake \
- : \
- : "r" (ptr), "I" (1) \
- : "ip", "lr", "cc"); \
- })
-
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index fcb575747e5e..e965f1b560f1 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -16,7 +16,7 @@
#include <linux/compiler.h>
#include <linux/const.h>
#include <linux/types.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
#ifdef CONFIG_NEED_MACH_MEMORY_H
#include <mach/memory.h>
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 00cbe10a50e3..e074948d8143 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,21 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/* ARM perf PMU IDs for use by internal perf clients. */
-enum arm_perf_pmu_ids {
- ARM_PERF_PMU_ID_XSCALE1 = 0,
- ARM_PERF_PMU_ID_XSCALE2,
- ARM_PERF_PMU_ID_V6,
- ARM_PERF_PMU_ID_V6MP,
- ARM_PERF_PMU_ID_CA8,
- ARM_PERF_PMU_ID_CA9,
- ARM_PERF_PMU_ID_CA5,
- ARM_PERF_PMU_ID_CA15,
- ARM_PERF_PMU_ID_CA7,
- ARM_NUM_PMU_IDS,
-};
-
-extern enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void);
+/* Nothing to see here... */
#endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 90114faa9f3c..4432305f4a2a 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -103,10 +103,9 @@ struct pmu_hw_events {
struct arm_pmu {
struct pmu pmu;
- enum arm_perf_pmu_ids id;
enum arm_pmu_type type;
cpumask_t active_irqs;
- const char *name;
+ char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 65fa3c88095c..b4ca707d0a69 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -59,18 +59,13 @@ static inline void dsb_sev(void)
}
/*
- * ARMv6 Spin-locking.
+ * ARMv6 ticket-based spin-locking.
*
- * We exclusively read the old value. If it is zero, we may have
- * won the lock, so we try exclusively storing it. A memory barrier
- * is required after we get a lock, and before we release it, because
- * V6 CPUs are assumed to have weakly ordered memory.
- *
- * Unlocked value: 0
- * Locked value: 1
+ * A memory barrier is required after we get a lock, and before we
+ * release it, because V6 CPUs are assumed to have weakly ordered
+ * memory.
*/
-#define arch_spin_is_locked(x) ((x)->lock != 0)
#define arch_spin_unlock_wait(lock) \
do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
@@ -79,31 +74,39 @@ static inline void dsb_sev(void)
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned long tmp;
+ u32 newval;
+ arch_spinlock_t lockval;
__asm__ __volatile__(
-"1: ldrex %0, [%1]\n"
-" teq %0, #0\n"
- WFE("ne")
-" strexeq %0, %2, [%1]\n"
-" teqeq %0, #0\n"
+"1: ldrex %0, [%3]\n"
+" add %1, %0, %4\n"
+" strex %2, %1, [%3]\n"
+" teq %2, #0\n"
" bne 1b"
- : "=&r" (tmp)
- : "r" (&lock->lock), "r" (1)
+ : "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
+ : "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
+ while (lockval.tickets.next != lockval.tickets.owner) {
+ wfe();
+ lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
+ }
+
smp_mb();
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned long tmp;
+ u32 slock;
__asm__ __volatile__(
-" ldrex %0, [%1]\n"
-" teq %0, #0\n"
-" strexeq %0, %2, [%1]"
- : "=&r" (tmp)
- : "r" (&lock->lock), "r" (1)
+" ldrex %0, [%2]\n"
+" subs %1, %0, %0, ror #16\n"
+" addeq %0, %0, %3\n"
+" strexeq %1, %0, [%2]"
+ : "=&r" (slock), "=&r" (tmp)
+ : "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
if (tmp == 0) {
@@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
+ unsigned long tmp;
+ u32 slock;
+
smp_mb();
__asm__ __volatile__(
-" str %1, [%0]\n"
- :
- : "r" (&lock->lock), "r" (0)
+" mov %1, #1\n"
+"1: ldrex %0, [%2]\n"
+" uadd16 %0, %0, %1\n"
+" strex %1, %0, [%2]\n"
+" teq %1, #0\n"
+" bne 1b"
+ : "=&r" (slock), "=&r" (tmp)
+ : "r" (&lock->slock)
: "cc");
dsb_sev();
}
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+ return tickets.owner != tickets.next;
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+ struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+ return (tickets.next - tickets.owner) > 1;
+}
+#define arch_spin_is_contended arch_spin_is_contended
+
/*
* RWLOCKS
*
@@ -158,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
unsigned long tmp;
__asm__ __volatile__(
-"1: ldrex %0, [%1]\n"
+" ldrex %0, [%1]\n"
" teq %0, #0\n"
" strexeq %0, %2, [%1]"
: "=&r" (tmp)
@@ -244,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
unsigned long tmp, tmp2 = 1;
__asm__ __volatile__(
-"1: ldrex %0, [%2]\n"
+" ldrex %0, [%2]\n"
" adds %0, %0, #1\n"
" strexpl %1, %0, [%2]\n"
: "=&r" (tmp), "+r" (tmp2)
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index d14d197ae04a..b262d2f8b478 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
# error "please don't include this file directly"
#endif
+#define TICKET_SHIFT 16
+
typedef struct {
- volatile unsigned int lock;
+ union {
+ u32 slock;
+ struct __raw_tickets {
+#ifdef __ARMEB__
+ u16 next;
+ u16 owner;
+#else
+ u16 owner;
+ u16 next;
+#endif
+ } tickets;
+ };
} arch_spinlock_t;
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
typedef struct {
volatile unsigned int lock;
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adaba..ce119442277c 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -12,13 +12,15 @@
#ifndef _ASMARM_TIMEX_H
#define _ASMARM_TIMEX_H
+#include <asm/arch_timer.h>
#include <mach/timex.h>
typedef unsigned long cycles_t;
-static inline cycles_t get_cycles (void)
-{
- return 0;
-}
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#else
+#define get_cycles() (0)
+#endif
#endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 71f6536d17ac..479a6352e0b5 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs)
#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
+#define user_addr_max() \
+ (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+
/*
* The "__xxx" versions of the user access functions do not verify the
* address space - it must have been done previously with a separate
@@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l
#define __clear_user(addr,n) (memset((void __force *)addr, 0, n), 0)
#endif
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
@@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return n;
}
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
- long res = -EFAULT;
- if (access_ok(VERIFY_READ, src, 1))
- res = __strncpy_from_user(dst, src, count);
- return res;
-}
-
-#define strlen_user(s) strnlen_user(s, ~0UL >> 1)
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
-static inline long __must_check strnlen_user(const char __user *s, long n)
-{
- unsigned long res = 0;
-
- if (__addr_ok(s))
- res = __strnlen_user(s, n);
-
- return res;
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
#endif /* _ASMARM_UACCESS_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..4d52f92967a6
--- /dev/null
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -0,0 +1,96 @@
+#ifndef __ASM_ARM_WORD_AT_A_TIME_H
+#define __ASM_ARM_WORD_AT_A_TIME_H
+
+#ifndef __ARMEB__
+
+/*
+ * Little-endian word-at-a-time zero byte handling.
+ * Heavily based on the x86 algorithm.
+ */
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+ const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+ const struct word_at_a_time *c)
+{
+ unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+ *bits = mask;
+ return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+ unsigned long ret;
+
+#if __LINUX_ARM_ARCH__ >= 5
+ /* We have clz available. */
+ ret = fls(mask) >> 3;
+#else
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ ret = (0x0ff0001 + mask) >> 23;
+ /* Fix the 1 for 00 case */
+ ret &= mask;
+#endif
+
+ return ret;
+}
+
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#define zero_bytemask(mask) (mask)
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long ret, offset;
+
+ /* Load word from unaligned pointer addr */
+ asm(
+ "1: ldr %0, [%2]\n"
+ "2:\n"
+ " .pushsection .fixup,\"ax\"\n"
+ " .align 2\n"
+ "3: and %1, %2, #0x3\n"
+ " bic %2, %2, #0x3\n"
+ " ldr %0, [%2]\n"
+ " lsl %1, %1, #0x3\n"
+ " lsr %0, %0, %1\n"
+ " b 2b\n"
+ " .popsection\n"
+ " .pushsection __ex_table,\"a\"\n"
+ " .align 3\n"
+ " .long 1b, 3b\n"
+ " .popsection"
+ : "=&r" (ret), "=&r" (offset)
+ : "r" (addr), "Qo" (*(unsigned long *)addr));
+
+ return ret;
+}
+
+
+#endif /* DCACHE_WORD_ACCESS */
+
+#else /* __ARMEB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+#endif /* __ASM_ARM_WORD_AT_A_TIME_H */