From a42548a18866e87092db93b771e6c5b060d78401 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:29 +0200
Subject: cputime: Optimize jiffies_to_cputime(1)

For powerpc with CONFIG_VIRT_CPU_ACCOUNTING
jiffies_to_cputime(1) is not compile time constant and run time
calculations are quite expensive. To optimize we use
precomputed value. For all other architectures is is
preprocessor definition.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-5-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/cputime.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index d20b998cb91d..7fa8a8594660 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -30,6 +30,7 @@ typedef u64 cputime_t;
 typedef u64 cputime64_t;
 
 #define cputime_zero			((cputime_t)0)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~((cputime_t)0) >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
-- 
cgit v1.2.3


From 90f72aa58bbf076b68e289fbd71eb829bc505923 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 17:03:45 -0700
Subject: mm: add MAP_HUGETLB for mmaping pseudo-anonymous huge page regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to user space.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

The patch also adds the MAP_STACK flag, which was previously defined only
on some architectures but not on others.  Since MAP_STACK is meant to be a
hint only, architectures can define it without assigning a specific
meaning to it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: David Rientjes <rientjes@google.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/mman.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index 48cf8b98a0b4..cf55884e7f39 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
-- 
cgit v1.2.3


From 6e17b17f1fc7b2f24383a693d63550d9e1460081 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 17:03:48 -0700
Subject: mm: remove duplicate asm/mman.h files

A number of architectures have identical asm/mman.h files so they can all
be merged by using the new generic file.

The remaining asm/mman.h files are substantially different from each
other.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/mman.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index cf55884e7f39..4459028e5aa8 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -8,21 +8,9 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
-#include <asm-generic/mman-common.h>
+#include <asm-generic/mman.h>
 
-#define MAP_GROWSDOWN	0x00100		/* stack-like segment */
-#define MAP_GROWSUP	0x00200		/* register stack-like segment */
-#define MAP_DENYWRITE	0x00800		/* ETXTBSY */
-#define MAP_EXECUTABLE	0x01000		/* mark it as an executable */
-#define MAP_LOCKED	0x02000		/* pages are locked */
-#define MAP_NORESERVE	0x04000		/* don't check for reservations */
-#define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
-#define MAP_NONBLOCK	0x10000		/* do not block on IO */
-#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
-
-#define MCL_CURRENT	1		/* lock all current mappings */
-#define MCL_FUTURE	2		/* lock all future mappings */
+#define MAP_GROWSUP	0x0200		/* register stack-like segment */
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
-- 
cgit v1.2.3


From 29c337a034b5526e80a785409d15d3b7c7edecf4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:26 -0600
Subject: cpumask: remove obsolete node_to_cpumask now everyone uses
 cpumask_of_node

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/ia64/include/asm/topology.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index d0141fbf51d0..e85da7f1db56 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -33,7 +33,6 @@
 /*
  * Returns a bitmask of CPUs on Node 'node'.
  */
-#define node_to_cpumask(node) (node_to_cpu_mask[node])
 #define cpumask_of_node(node) (&node_to_cpu_mask[node])
 
 /*
-- 
cgit v1.2.3


From e50a6f19537362ed61f7f74af724345975e602ed Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:42 -0600
Subject: cpumask: remove obsolete topology_core_siblings and
 topology_thread_siblings: ia64

There were replaced by topology_core_cpumask and topology_thread_cpumask.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/ia64/include/asm/topology.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index e85da7f1db56..3ddb4e709dba 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -103,8 +103,6 @@ void build_cpu_to_node_map(void);
 #ifdef CONFIG_SMP
 #define topology_physical_package_id(cpu)	(cpu_data(cpu)->socket_id)
 #define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
-#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
 #define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #define smt_capable() 				(smp_num_siblings > 1)
-- 
cgit v1.2.3


From 0748bd01773395003208996c4c0b3f80caf80976 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:46 -0600
Subject: cpumask: remove arch_send_call_function_ipi

Now everyone is converted to arch_send_call_function_ipi_mask, remove
the shim and the #defines.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/ia64/include/asm/smp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index d217d1d4e051..0b3b3997decd 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -127,7 +127,6 @@ extern int is_multithreading_enabled(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 
 #else /* CONFIG_SMP */
 
-- 
cgit v1.2.3


From 2c86963b093c1a0887dfc6b32c6e5ea3a80f2922 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 25 Sep 2009 08:42:16 -0700
Subject: [IA64] implement ticket locks for Itanium

Back in January 2008 Nick Piggin implemented "ticket" spinlocks
for X86 (See commit 314cdbefd1fd0a7acf3780e9628465b77ea6a836).

IA64 implementation has a couple of differences because of the
available atomic operations ... e.g. we have no fetchadd2 instruction
that operates on a 16-bit quantity so we make ticket locks use
a 32-bit word for each of the current ticket and now-serving values.

Performance on uncontended locks is about 8% worse than the previous
implementation, but this seems a good trade for determinism in the
contended case. Performance impact on macro-level benchmarks is in
the noise.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/spinlock.h       | 175 +++++++++++++++++----------------
 arch/ia64/include/asm/spinlock_types.h |   2 +-
 2 files changed, 90 insertions(+), 87 deletions(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 13ab71576bc7..30bb930e1111 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -19,103 +19,106 @@
 
 #define __raw_spin_lock_init(x)			((x)->lock = 0)
 
-#ifdef ASM_SUPPORTED
 /*
- * Try to get the lock.  If we fail to get the lock, make a non-standard call to
- * ia64_spinlock_contention().  We do not use a normal call because that would force all
- * callers of __raw_spin_lock() to be non-leaf routines.  Instead, ia64_spinlock_contention() is
- * carefully coded to touch only those registers that __raw_spin_lock() marks "clobbered".
+ * Ticket locks are conceptually two parts, one indicating the current head of
+ * the queue, and the other indicating the current tail. The lock is acquired
+ * by atomically noting the tail and incrementing it by one (thus adding
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ *
+ *   63                     32  31                      0
+ *  +----------------------------------------------------+
+ *  |  next_ticket_number      |     now_serving         |
+ *  +----------------------------------------------------+
  */
 
-#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory"
+#define TICKET_SHIFT	32
 
-static inline void
-__raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
-	register volatile unsigned int *ptr asm ("r31") = &lock->lock;
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-# ifdef CONFIG_ITANIUM
-	/* don't use brl on Itanium... */
-	asm volatile ("{\n\t"
-		      "  mov ar.ccv = r0\n\t"
-		      "  mov r28 = ip\n\t"
-		      "  mov r30 = 1;;\n\t"
-		      "}\n\t"
-		      "cmpxchg4.acq r30 = [%1], r30, ar.ccv\n\t"
-		      "movl r29 = ia64_spinlock_contention_pre3_4;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "mov b6 = r29;;\n\t"
-		      "mov r27=%2\n\t"
-		      "(p14) br.cond.spnt.many b6"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# else
-	asm volatile ("{\n\t"
-		      "  mov ar.ccv = r0\n\t"
-		      "  mov r28 = ip\n\t"
-		      "  mov r30 = 1;;\n\t"
-		      "}\n\t"
-		      "cmpxchg4.acq r30 = [%1], r30, ar.ccv;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "mov r27=%2\n\t"
-		      "(p14) brl.cond.spnt.many ia64_spinlock_contention_pre3_4;;"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# endif /* CONFIG_MCKINLEY */
-#else
-# ifdef CONFIG_ITANIUM
-	/* don't use brl on Itanium... */
-	/* mis-declare, so we get the entry-point, not it's function descriptor: */
-	asm volatile ("mov r30 = 1\n\t"
-		      "mov r27=%2\n\t"
-		      "mov ar.ccv = r0;;\n\t"
-		      "cmpxchg4.acq r30 = [%0], r30, ar.ccv\n\t"
-		      "movl r29 = ia64_spinlock_contention;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "mov b6 = r29;;\n\t"
-		      "(p14) br.call.spnt.many b6 = b6"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# else
-	asm volatile ("mov r30 = 1\n\t"
-		      "mov r27=%2\n\t"
-		      "mov ar.ccv = r0;;\n\t"
-		      "cmpxchg4.acq r30 = [%0], r30, ar.ccv;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# endif /* CONFIG_MCKINLEY */
-#endif
+	int	*p = (int *)&lock->lock, turn, now_serving;
+
+	now_serving = *p;
+	turn = ia64_fetchadd(1, p+1, acq);
+
+	if (turn == now_serving)
+		return;
+
+	do {
+		cpu_relax();
+	} while (ACCESS_ONCE(*p) != turn);
 }
 
-#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock), try;
 
-/* Unlock by doing an ordered store and releasing the cacheline with nta */
-static inline void __raw_spin_unlock(raw_spinlock_t *x) {
-	barrier();
-	asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));
+	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) {
+		try = tmp + (1L << TICKET_SHIFT);
+
+		return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp;
+	}
+	return 0;
 }
 
-#else /* !ASM_SUPPORTED */
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-# define __raw_spin_lock(x)								\
-do {											\
-	__u32 *ia64_spinlock_ptr = (__u32 *) (x);					\
-	__u64 ia64_spinlock_val;							\
-	ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);			\
-	if (unlikely(ia64_spinlock_val)) {						\
-		do {									\
-			while (*ia64_spinlock_ptr)					\
-				ia64_barrier();						\
-			ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);	\
-		} while (ia64_spinlock_val);						\
-	}										\
-} while (0)
-#define __raw_spin_unlock(x)	do { barrier(); ((raw_spinlock_t *) x)->lock = 0; } while (0)
-#endif /* !ASM_SUPPORTED */
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock;
+
+	(void)ia64_fetchadd(1, p, rel);
+}
+
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
+
+	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1));
+}
+
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
 
-#define __raw_spin_is_locked(x)		((x)->lock != 0)
-#define __raw_spin_trylock(x)		(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+	return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1;
+}
+
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+	return __ticket_spin_is_locked(lock);
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+	return __ticket_spin_is_contended(lock);
+}
+#define __raw_spin_is_contended	__raw_spin_is_contended
+
+static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	__ticket_spin_lock(lock);
+}
+
+static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	return __ticket_spin_trylock(lock);
+}
+
+static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	__ticket_spin_unlock(lock);
+}
+
+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+						  unsigned long flags)
+{
+	__raw_spin_lock(lock);
+}
+
+static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+	while (__raw_spin_is_locked(lock))
+		cpu_relax();
+}
 
 #define __raw_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
 #define __raw_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 474e46f1ab4a..b61d136d9bc2 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
 #endif
 
 typedef struct {
-	volatile unsigned int lock;
+	volatile unsigned long lock;
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
-- 
cgit v1.2.3


From e56d953d190061938b31cabbe01b7f3d76c60bd0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sun, 27 Sep 2009 04:17:21 -0400
Subject: ACPI: IA64=y ACPI=n build fix

ia64's sim_defconfig uses CONFIG_ACPI=n
which now #define's acpi_disabled in <linux/acpi.h>

So we shouldn't re-define it here in <asm/acpi.h>

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/include/asm/acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index 0f82cc2934e1..91df9686a0da 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -89,10 +89,12 @@ ia64_acpi_release_global_lock (unsigned int *lock)
 #define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq)				\
 	((Acq) = ia64_acpi_release_global_lock(&facs->global_lock))
 
+#ifdef	CONFIG_ACPI
 #define acpi_disabled 0	/* ACPI always enabled on IA64 */
 #define acpi_noirq 0	/* ACPI always enabled on IA64 */
 #define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */
 #define acpi_strict 1	/* no ACPI spec workarounds on IA64 */
+#endif
 #define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */
 static inline void disable_acpi(void) { }
 
-- 
cgit v1.2.3


From 9d40ee200a527ce08ab8c793ba8ae3e242edbb0e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 7 Oct 2009 10:54:19 -0700
Subject: [IA64] Squeeze ticket locks back into 4 bytes.

Linus pointed out that other people have spent large amounts of time
and effort to optimize the layout of frequently used structures. Often
these have embedded locks, and the assumption is that a lock takes
4 bytes.  Linus also pointed out how to work with the limited options
for atomic instructions on Itanium.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/spinlock.h       | 45 ++++++++++++++++++++--------------
 arch/ia64/include/asm/spinlock_types.h |  2 +-
 2 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 30bb930e1111..4fa502739d64 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -25,61 +25,68 @@
  * by atomically noting the tail and incrementing it by one (thus adding
  * ourself to the queue and noting our position), then waiting until the head
  * becomes equal to the the initial value of the tail.
+ * The pad bits in the middle are used to prevent the next_ticket number
+ * overflowing into the now_serving number.
  *
- *   63                     32  31                      0
+ *   31             17  16    15  14                    0
  *  +----------------------------------------------------+
- *  |  next_ticket_number      |     now_serving         |
+ *  |  now_serving     | padding |   next_ticket         |
  *  +----------------------------------------------------+
  */
 
-#define TICKET_SHIFT	32
+#define TICKET_SHIFT	17
+#define TICKET_BITS	15
+#define	TICKET_MASK	((1 << TICKET_BITS) - 1)
 
 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
-	int	*p = (int *)&lock->lock, turn, now_serving;
+	int	*p = (int *)&lock->lock, ticket, serve;
 
-	now_serving = *p;
-	turn = ia64_fetchadd(1, p+1, acq);
+	ticket = ia64_fetchadd(1, p, acq);
 
-	if (turn == now_serving)
+	if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
 		return;
 
-	do {
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory");
+
+		if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+			return;
 		cpu_relax();
-	} while (ACCESS_ONCE(*p) != turn);
+	}
 }
 
 static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
 {
-	long tmp = ACCESS_ONCE(lock->lock), try;
+	int tmp = ACCESS_ONCE(lock->lock);
 
-	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) {
-		try = tmp + (1L << TICKET_SHIFT);
-
-		return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp;
-	}
+	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK))
+		return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp;
 	return 0;
 }
 
 static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 {
-	int	*p = (int *)&lock->lock;
+	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
 
-	(void)ia64_fetchadd(1, p, rel);
+	asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
+	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
 }
 
 static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
 
-	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1));
+	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK);
 }
 
 static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
 
-	return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1;
+	return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1;
 }
 
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index b61d136d9bc2..474e46f1ab4a 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
 #endif
 
 typedef struct {
-	volatile unsigned long lock;
+	volatile unsigned int lock;
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
-- 
cgit v1.2.3


From 1502f08edc040b6ba4b986454416564088995e79 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 12 Oct 2009 09:51:41 -0700
Subject: [IA64] SMT friendly version of spin_unlock_wait()

We can be kinder to SMT systems in spin_unlock_wait.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/spinlock.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 4fa502739d64..239ecdc9516d 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -75,6 +75,20 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
 	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
 }
 
+static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock, ticket;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
+		if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+			return;
+		cpu_relax();
+	}
+}
+
 static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
@@ -123,8 +137,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {
-	while (__raw_spin_is_locked(lock))
-		cpu_relax();
+	__ticket_spin_unlock_wait(lock);
 }
 
 #define __raw_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
-- 
cgit v1.2.3


From b94b08081fcecf83fa690d6c5664f6316fe72208 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 14 Oct 2009 15:10:03 -0700
Subject: [IA64] fix percpu warnings

Fix percpu types warning in ia64/sn:

arch/ia64/sn/kernel/setup.c:74: error: conflicting types for '__pcpu_scope___sn_cnodeid_to_nasid'
arch/ia64/include/asm/sn/arch.h:74: error: previous declaration of '__pcpu_scope___sn_cnodeid_to_nasid' was here

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Jes Sorensen <jes@sgi.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/sn/arch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h
index 7caa1f44cd95..f5f493b0c077 100644
--- a/arch/ia64/include/asm/sn/arch.h
+++ b/arch/ia64/include/asm/sn/arch.h
@@ -71,7 +71,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
  * Compact node ID to nasid mappings kept in the per-cpu data areas of each
  * cpu.
  */
-DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+DECLARE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid);
 #define sn_cnodeid_to_nasid	(&__get_cpu_var(__sn_cnodeid_to_nasid[0]))
 
 
-- 
cgit v1.2.3


From e8c93fc7b7221b6ac7e7ddbd0e21e205bf9e801a Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 2 Nov 2009 09:23:08 -0800
Subject: Revert "[IA64] fix percpu warnings"

This reverts commit b94b08081fcecf83fa690d6c5664f6316fe72208.

genksyms currently cannot handle complicated types for exported
percpu variables.  Drop this patch for now as it prevents a
module from being loaded on sn2 systems:

 xpc: no symbol version for per_cpu____sn_cnodeid_to_nasid
 xpc: Unknown symbol per_cpu____sn_cnodeid_to_nasid

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/sn/arch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/ia64/include')

diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h
index f5f493b0c077..7caa1f44cd95 100644
--- a/arch/ia64/include/asm/sn/arch.h
+++ b/arch/ia64/include/asm/sn/arch.h
@@ -71,7 +71,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
  * Compact node ID to nasid mappings kept in the per-cpu data areas of each
  * cpu.
  */
-DECLARE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid);
+DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
 #define sn_cnodeid_to_nasid	(&__get_cpu_var(__sn_cnodeid_to_nasid[0]))
 
 
-- 
cgit v1.2.3