From a6fa8e5a6172a5a5bc06ed04f34e50b36c978127 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:30:00 +0100
Subject: time: clean hungarian notation from timers

Clean up hungarian notation from timer code.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/timer.c | 80 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index f739dfb539ce..aadfbc8367f5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -58,59 +58,57 @@ EXPORT_SYMBOL(jiffies_64);
 #define TVN_MASK (TVN_SIZE - 1)
 #define TVR_MASK (TVR_SIZE - 1)
 
-typedef struct tvec_s {
+struct tvec {
 	struct list_head vec[TVN_SIZE];
-} tvec_t;
+};
 
-typedef struct tvec_root_s {
+struct tvec_root {
 	struct list_head vec[TVR_SIZE];
-} tvec_root_t;
+};
 
-struct tvec_t_base_s {
+struct tvec_base {
 	spinlock_t lock;
 	struct timer_list *running_timer;
 	unsigned long timer_jiffies;
-	tvec_root_t tv1;
-	tvec_t tv2;
-	tvec_t tv3;
-	tvec_t tv4;
-	tvec_t tv5;
+	struct tvec_root tv1;
+	struct tvec tv2;
+	struct tvec tv3;
+	struct tvec tv4;
+	struct tvec tv5;
 } ____cacheline_aligned;
 
-typedef struct tvec_t_base_s tvec_base_t;
-
-tvec_base_t boot_tvec_bases;
+struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /*
- * Note that all tvec_bases is 2 byte aligned and lower bit of
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
  * base in timer_list is guaranteed to be zero. Use the LSB for
  * the new flag to indicate whether the timer is deferrable
  */
 #define TBASE_DEFERRABLE_FLAG		(0x1)
 
 /* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
 	return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
 }
 
-static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 {
-	return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+	return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-	timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+	timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
 				       TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void
-timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
 {
-	timer->base = (tvec_base_t *)((unsigned long)(new_base) |
+	timer->base = (struct tvec_base *)((unsigned long)(new_base) |
 				      tbase_get_deferrable(timer->base));
 }
 
@@ -246,7 +244,7 @@ unsigned long round_jiffies_relative(unsigned long j)
 EXPORT_SYMBOL_GPL(round_jiffies_relative);
 
 
-static inline void set_running_timer(tvec_base_t *base,
+static inline void set_running_timer(struct tvec_base *base,
 					struct timer_list *timer)
 {
 #ifdef CONFIG_SMP
@@ -254,7 +252,7 @@ static inline void set_running_timer(tvec_base_t *base,
 #endif
 }
 
-static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - base->timer_jiffies;
@@ -371,14 +369,14 @@ static inline void detach_timer(struct timer_list *timer,
  * possible to set timer->base = NULL and drop the lock: the timer remains
  * locked.
  */
-static tvec_base_t *lock_timer_base(struct timer_list *timer,
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
 					unsigned long *flags)
 	__acquires(timer->base->lock)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 
 	for (;;) {
-		tvec_base_t *prelock_base = timer->base;
+		struct tvec_base *prelock_base = timer->base;
 		base = tbase_get_base(prelock_base);
 		if (likely(base != NULL)) {
 			spin_lock_irqsave(&base->lock, *flags);
@@ -393,7 +391,7 @@ static tvec_base_t *lock_timer_base(struct timer_list *timer,
 
 int __mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	tvec_base_t *base, *new_base;
+	struct tvec_base *base, *new_base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -445,7 +443,7 @@ EXPORT_SYMBOL(__mod_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	tvec_base_t *base = per_cpu(tvec_bases, cpu);
+	struct tvec_base *base = per_cpu(tvec_bases, cpu);
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
@@ -508,7 +506,7 @@ EXPORT_SYMBOL(mod_timer);
  */
 int del_timer(struct timer_list *timer)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -539,7 +537,7 @@ EXPORT_SYMBOL(del_timer);
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 	unsigned long flags;
 	int ret = -1;
 
@@ -591,7 +589,7 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static int cascade(tvec_base_t *base, tvec_t *tv, int index)
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
 {
 	/* cascade all the timers from tv up one level */
 	struct timer_list *timer, *tmp;
@@ -620,7 +618,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
  * This function cascades all vectors and executes all expired timer
  * vectors.
  */
-static inline void __run_timers(tvec_base_t *base)
+static inline void __run_timers(struct tvec_base *base)
 {
 	struct timer_list *timer;
 
@@ -678,13 +676,13 @@ static inline void __run_timers(tvec_base_t *base)
  * is used on S/390 to stop all activity when a cpus is idle.
  * This functions needs to be called disabled.
  */
-static unsigned long __next_timer_interrupt(tvec_base_t *base)
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
 {
 	unsigned long timer_jiffies = base->timer_jiffies;
 	unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
 	int index, slot, array, found = 0;
 	struct timer_list *nte;
-	tvec_t *varray[4];
+	struct tvec *varray[4];
 
 	/* Look for timer events in tv1. */
 	index = slot = timer_jiffies & TVR_MASK;
@@ -716,7 +714,7 @@ cascade:
 	varray[3] = &base->tv5;
 
 	for (array = 0; array < 4; array++) {
-		tvec_t *varp = varray[array];
+		struct tvec *varp = varray[array];
 
 		index = slot = timer_jiffies & TVN_MASK;
 		do {
@@ -795,7 +793,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
  */
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
-	tvec_base_t *base = __get_cpu_var(tvec_bases);
+	struct tvec_base *base = __get_cpu_var(tvec_bases);
 	unsigned long expires;
 
 	spin_lock(&base->lock);
@@ -894,7 +892,7 @@ static inline void calc_load(unsigned long ticks)
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-	tvec_base_t *base = __get_cpu_var(tvec_bases);
+	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
 	hrtimer_run_pending();
 
@@ -1223,7 +1221,7 @@ static struct lock_class_key base_lock_keys[NR_CPUS];
 static int __cpuinit init_timers_cpu(int cpu)
 {
 	int j;
-	tvec_base_t *base;
+	struct tvec_base *base;
 	static char __cpuinitdata tvec_base_done[NR_CPUS];
 
 	if (!tvec_base_done[cpu]) {
@@ -1278,7 +1276,7 @@ static int __cpuinit init_timers_cpu(int cpu)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
 	struct timer_list *timer;
 
@@ -1292,8 +1290,8 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
 
 static void __cpuinit migrate_timers(int cpu)
 {
-	tvec_base_t *old_base;
-	tvec_base_t *new_base;
+	struct tvec_base *old_base;
+	struct tvec_base *new_base;
 	int i;
 
 	BUG_ON(cpu_online(cpu));
-- 
cgit v1.2.3


From 4c9dc6412247abf4972080c51cd16a58c4009c19 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:30:00 +0100
Subject: time: timer cleanups

Small cleanups to tick-related code. Wrong preempt count is followed
by BUG(), so it is hardly KERN_WARNING.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 4 ++--
 kernel/timer.c           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1a21b6fdb674..d36ee2fd1a3b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(void)
 			/* Check, if the timer was already in the past */
 			if (hrtimer_active(&ts->sched_timer))
 				goto out;
-		} else if(!tick_program_event(expires, 0))
+		} else if (!tick_program_event(expires, 0))
 				goto out;
 		/*
 		 * We are past the event already. So we crossed a
@@ -507,7 +507,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
  */
 #ifdef CONFIG_HIGH_RES_TIMERS
 /*
- * We rearm the timer until we get disabled by the idle code
+ * We rearm the timer until we get disabled by the idle code.
  * Called with interrupts disabled and timer->base->cpu_base->lock held.
  */
 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
diff --git a/kernel/timer.c b/kernel/timer.c
index aadfbc8367f5..23f7ead78fae 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -655,7 +655,7 @@ static inline void __run_timers(struct tvec_base *base)
 				int preempt_count = preempt_count();
 				fn(data);
 				if (preempt_count != preempt_count()) {
-					printk(KERN_WARNING "huh, entered %p "
+					printk(KERN_ERR "huh, entered %p "
 					       "with preempt_count %08x, exited"
 					       " with %08x?\n",
 					       fn, preempt_count,
-- 
cgit v1.2.3


From b10db7f0d2b589a7f88dc3026e150756cb437a28 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 30 Jan 2008 13:30:00 +0100
Subject: time: more timer related cleanups

I was confused by FSEC = 10^15 NSEC statement, plus small whitespace
fixes. When there's copyright, there should be GPL.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/softirq.c          | 4 +++-
 kernel/time/tick-sched.c  | 2 +-
 kernel/time/timer_stats.c | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/softirq.c b/kernel/softirq.c
index bd89bc4eb0b9..8fe1ff40102d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -3,7 +3,9 @@
  *
  *	Copyright (C) 1992 Linus Torvalds
  *
- * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *	Distribute under GPLv2.
+ *
+ *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
  */
 
 #include <linux/module.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d36ee2fd1a3b..49e12f6a4bab 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -9,7 +9,7 @@
  *
  *  Started by: Thomas Gleixner and Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ *  Distribute under GPLv2.
  */
 #include <linux/cpu.h>
 #include <linux/err.h>
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c36bb7ed0301..417da8c5bc72 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -26,7 +26,7 @@
  * the pid and cmdline from the owner process if applicable.
  *
  * Start/stop data collection:
- * # echo 1[0] >/proc/timer_stats
+ * # echo [1|0] >/proc/timer_stats
  *
  * Display the information collected so far:
  * # cat /proc/timer_stats
-- 
cgit v1.2.3


From 186e3cb8a465bac010ee3b020768d2fa2b505aef Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: timer: clean up tick-broadcast.c

clean up tick-broadcast.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/time/tick-broadcast.c | 7 ++-----
 kernel/time/tick-internal.h  | 2 --
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 5b86698faa0b..e1bd50cbbf5d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -126,9 +126,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 /*
  * Broadcast the event to the cpus, which are set in the mask
  */
-int tick_do_broadcast(cpumask_t mask)
+static void tick_do_broadcast(cpumask_t mask)
 {
-	int ret = 0, cpu = smp_processor_id();
+	int cpu = smp_processor_id();
 	struct tick_device *td;
 
 	/*
@@ -138,7 +138,6 @@ int tick_do_broadcast(cpumask_t mask)
 		cpu_clear(cpu, mask);
 		td = &per_cpu(tick_cpu_device, cpu);
 		td->evtdev->event_handler(td->evtdev);
-		ret = 1;
 	}
 
 	if (!cpus_empty(mask)) {
@@ -151,9 +150,7 @@ int tick_do_broadcast(cpumask_t mask)
 		cpu = first_cpu(mask);
 		td = &per_cpu(tick_cpu_device, cpu);
 		td->evtdev->broadcast(mask);
-		ret = 1;
 	}
-	return ret;
 }
 
 /*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index bb13f2724905..f13f2b7f4fd4 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -70,8 +70,6 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  * Broadcasting support
  */
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern int tick_do_broadcast(cpumask_t mask);
-
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern int tick_check_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
-- 
cgit v1.2.3


From efd9ac8630e89b9ee7ce64008bd7783952374f37 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: time: fold __get_realtime_clock_ts() into getnstimeofday()

  - getnstimeofday() was just a wrapper around __get_realtime_clock_ts()
  - Replace calls to __get_realtime_clock_ts() by calls to getnstimeofday()
  - Fix bogus reference to get_realtime_clock_ts(), which never existed

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ab46ae8c062b..77680195cf84 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -82,13 +82,12 @@ static inline s64 __get_nsec_offset(void)
 }
 
 /**
- * __get_realtime_clock_ts - Returns the time of day in a timespec
+ * getnstimeofday - Returns the time of day in a timespec
  * @ts:		pointer to the timespec to be set
  *
- * Returns the time of day in a timespec. Used by
- * do_gettimeofday() and get_realtime_clock_ts().
+ * Returns the time of day in a timespec.
  */
-static inline void __get_realtime_clock_ts(struct timespec *ts)
+void getnstimeofday(struct timespec *ts)
 {
 	unsigned long seq;
 	s64 nsecs;
@@ -104,30 +103,19 @@ static inline void __get_realtime_clock_ts(struct timespec *ts)
 	timespec_add_ns(ts, nsecs);
 }
 
-/**
- * getnstimeofday - Returns the time of day in a timespec
- * @ts:		pointer to the timespec to be set
- *
- * Returns the time of day in a timespec.
- */
-void getnstimeofday(struct timespec *ts)
-{
-	__get_realtime_clock_ts(ts);
-}
-
 EXPORT_SYMBOL(getnstimeofday);
 
 /**
  * do_gettimeofday - Returns the time of day in a timeval
  * @tv:		pointer to the timeval to be set
  *
- * NOTE: Users should be converted to using get_realtime_clock_ts()
+ * NOTE: Users should be converted to using getnstimeofday()
  */
 void do_gettimeofday(struct timeval *tv)
 {
 	struct timespec now;
 
-	__get_realtime_clock_ts(&now);
+	getnstimeofday(&now);
 	tv->tv_sec = now.tv_sec;
 	tv->tv_usec = now.tv_nsec/1000;
 }
-- 
cgit v1.2.3


From 1077f5a917b7c630231037826b344b2f7f5b903f Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.warudkar@gmail.com>
Date: Wed, 30 Jan 2008 13:30:01 +0100
Subject: clocksource.c: use init_timer_deferrable for clocksource_watchdog

clocksource_watchdog can use a deferrable timer - reduces wakeups from
idle per second.

Signed-off-by: Parag Warudkar <parag.warudkar@gmail.com>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clocksource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 8d6125ad2cf0..cabfa193efb3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -175,7 +175,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 			if (watchdog)
 				del_timer(&watchdog_timer);
 			watchdog = cs;
-			init_timer(&watchdog_timer);
+			init_timer_deferrable(&watchdog_timer);
 			watchdog_timer.function = clocksource_watchdog;
 
 			/* Reset watchdog cycles */
-- 
cgit v1.2.3


From 1ada5cba6a0318f90e45b38557e7b5206a9cba38 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:30:02 +0100
Subject: clocksource: make clocksource watchdog cycle through online CPUs

This way it checks if the clocks are synchronized between CPUs too.
This might be able to detect slowly drifting TSCs which only
go wrong over longer time.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clocksource.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cabfa193efb3..edd5ef8e1765 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,8 +142,13 @@ static void clocksource_watchdog(unsigned long data)
 	}
 
 	if (!list_empty(&watchdog_list)) {
-		__mod_timer(&watchdog_timer,
-			    watchdog_timer.expires + WATCHDOG_INTERVAL);
+		/* Cycle through CPUs to check if the CPUs stay synchronized to
+		 * each other. */
+		int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
+		if (next_cpu >= NR_CPUS)
+			next_cpu = first_cpu(cpu_online_map);
+		watchdog_timer.expires += WATCHDOG_INTERVAL;
+		add_timer_on(&watchdog_timer, next_cpu);
 	}
 	spin_unlock(&watchdog_lock);
 }
@@ -165,7 +170,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 		if (!started && watchdog) {
 			watchdog_last = watchdog->read();
 			watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
-			add_timer(&watchdog_timer);
+			add_timer_on(&watchdog_timer, first_cpu(cpu_online_map));
 		}
 	} else {
 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -186,7 +191,8 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 				watchdog_last = watchdog->read();
 				watchdog_timer.expires =
 					jiffies + WATCHDOG_INTERVAL;
-				add_timer(&watchdog_timer);
+				add_timer_on(&watchdog_timer,
+						first_cpu(cpu_online_map));
 			}
 		}
 	}
-- 
cgit v1.2.3


From 4713e22ce81eb8b3353e16435362eb3d0ec95640 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 30 Jan 2008 13:30:02 +0100
Subject: clocksource: add unregister function to disable unusable clocksources

On x86 the PIT might become an unusable clocksource. Add an unregister
function to provide a possibilty to remove the PIT from the list of
available clock sources.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/time/clocksource.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index edd5ef8e1765..6e9259a5d501 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -337,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
+/**
+ * clocksource_unregister - remove a registered clocksource
+ */
+void clocksource_unregister(struct clocksource *cs)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocksource_lock, flags);
+	list_del(&cs->list);
+	if (clocksource_override == cs)
+		clocksource_override = NULL;
+	next_clocksource = select_clocksource();
+	spin_unlock_irqrestore(&clocksource_lock, flags);
+}
+
 #ifdef CONFIG_SYSFS
 /**
  * sysfs_show_current_clocksources - sysfs interface for current clocksource
-- 
cgit v1.2.3


From 45fe4fe19120a22f7339f5bb110447170c25fca9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: x86: make clockevents more robust

detect zero event-device multiplicators - they then cause
division-by-zero crashes if a clockevent has been initialized
incorrectly.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clockevents.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5fb139fef9fa..3e59fce6dd43 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -41,6 +41,11 @@ unsigned long clockevent_delta2ns(unsigned long latch,
 {
 	u64 clc = ((u64) latch << evt->shift);
 
+	if (unlikely(!evt->mult)) {
+		evt->mult = 1;
+		WARN_ON(1);
+	}
+
 	do_div(clc, evt->mult);
 	if (clc < 1000)
 		clc = 1000;
@@ -151,6 +156,14 @@ static void clockevents_notify_released(void)
 void clockevents_register_device(struct clock_event_device *dev)
 {
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	/*
+	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
+	 * on it, so fix it up and emit a warning:
+	 */
+	if (unlikely(!dev->mult)) {
+		dev->mult = 1;
+		WARN_ON(1);
+	}
 
 	spin_lock(&clockevents_lock);
 
-- 
cgit v1.2.3


From bbe4d18ac2e058c56adb0cd71f49d9ed3216a405 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 30 Jan 2008 13:30:03 +0100
Subject: NTP: correct inconsistent ntp interval/tick_length usage

I recently noticed on one of my boxes that when synched with an NTP
server, the drift value reported for the system was ~283ppm. While in
some cases, clock hardware can be that bad, it struck me as unusual as
the system was using the acpi_pm clocksource, which is one of the more
trustworthy and accurate clocksources on x86 hardware.

I brought up another system and let it sync to the same NTP server, and
I noticed a similar 280some ppm drift.

In looking at the code, I found that the acpi_pm's constant frequency
was being computed correctly at boot-up, however once the system was up,
even without the ntp daemon running, the clocksource's frequency was
being modified by the clocksource_adjust() function.

Digging deeper, I realized that in the code that keeps track of how much
the clocksource is skewing from the ntp desired time, we were using
different lengths to establish how long an time interval was.

The clocksource was being setup with the following interval:
	NTP_INTERVAL_LENGTH = NSEC_PER_SEC/NTP_INTERVAL_FREQ

While the ntp code was using the tick_length_base value:
	tick_length_base ~= (tick_usec * NSEC_PER_USEC * USER_HZ)
					/NTP_INTERVAL_FREQ

The subtle difference is:
	(tick_usec * NSEC_PER_USEC * USER_HZ) != NSEC_PER_SEC

This difference in calculation was causing the clocksource correction
code to apply a correction factor to the clocksource so the two
intervals were the same, however this results in the actual frequency of
the clocksource to be made incorrect. I believe this difference would
affect all clocksources, although to differing degrees depending on the
clocksource resolution.

The issue was introduced when my HZ free ntp patch landed in 2.6.21-rc1,
so my apologies for the mistake, and for not noticing it until now.

The following patch, corrects the clocksource's initialization code so
it uses the same interval length as the code in ntp.c. After applying
this patch, the drift value for the same system went from ~283ppm to
only 2.635ppm.

I believe this patch to be good, however it does affect all arches and
I've only tested on x86, so some caution is advised. I do think it would
be a likely candidate for a stable 2.6.24.x release.

Any thoughts or feedback would be appreciated.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 77680195cf84..092a2366b5a9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -186,7 +186,8 @@ static void change_clocksource(void)
 
 	clock->error = 0;
 	clock->xtime_nsec = 0;
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+	clocksource_calculate_interval(clock,
+		(unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
 
 	tick_clock_notify();
 
@@ -243,7 +244,8 @@ void __init timekeeping_init(void)
 	ntp_clear();
 
 	clock = clocksource_get_next();
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+	clocksource_calculate_interval(clock,
+		(unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
 	clock->cycle_last = clocksource_read(clock);
 
 	xtime.tv_sec = sec;
-- 
cgit v1.2.3


From 6378ddb592158db4b42197f1bc8666228800e379 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 30 Jan 2008 13:30:04 +0100
Subject: time: track accurate idle time with tick_sched.idle_sleeptime

Current idle time in kstat is based on jiffies and is coarse grained.
tick_sched.idle_sleeptime is making some attempt to keep track of idle time
in a fine grained manner.  But, it is not handling the time spent in
interrupts fully.

Make tick_sched.idle_sleeptime accurate with respect to time spent on
handling interrupts and also add tick_sched.idle_lastupdate, which keeps
track of last time when idle_sleeptime was updated.

This statistics will be crucial for cpufreq-ondemand governor, which can
shed some conservative gaurd band that is uses today while setting the
frequency.  The ondemand changes that uses the exact idle time is coming
soon.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/softirq.c         |  7 ++++-
 kernel/time/tick-sched.c | 70 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 54 insertions(+), 23 deletions(-)

(limited to 'kernel')

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8fe1ff40102d..d7837d45419e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -280,9 +280,14 @@ asmlinkage void do_softirq(void)
  */
 void irq_enter(void)
 {
+#ifdef CONFIG_NO_HZ
+	int cpu = smp_processor_id();
+	if (idle_cpu(cpu) && !in_interrupt())
+		tick_nohz_stop_idle(cpu);
+#endif
 	__irq_enter();
 #ifdef CONFIG_NO_HZ
-	if (idle_cpu(smp_processor_id()))
+	if (idle_cpu(cpu))
 		tick_nohz_update_jiffies();
 #endif
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 49e12f6a4bab..63f24b550695 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void)
 	local_irq_restore(flags);
 }
 
+void tick_nohz_stop_idle(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	if (ts->idle_active) {
+		ktime_t now, delta;
+		now = ktime_get();
+		delta = ktime_sub(now, ts->idle_entrytime);
+		ts->idle_lastupdate = now;
+		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+		ts->idle_active = 0;
+	}
+}
+
+static ktime_t tick_nohz_start_idle(int cpu)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	ktime_t now, delta;
+
+	now = ktime_get();
+	if (ts->idle_active) {
+		delta = ktime_sub(now, ts->idle_entrytime);
+		ts->idle_lastupdate = now;
+		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+	}
+	ts->idle_entrytime = now;
+	ts->idle_active = 1;
+	return now;
+}
+
+u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	*last_update_time = ktime_to_us(ts->idle_lastupdate);
+	return ktime_to_us(ts->idle_sleeptime);
+}
+
 /**
  * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
  *
@@ -155,13 +193,14 @@ void tick_nohz_stop_sched_tick(void)
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	unsigned long rt_jiffies;
 	struct tick_sched *ts;
-	ktime_t last_update, expires, now, delta;
+	ktime_t last_update, expires, now;
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	int cpu;
 
 	local_irq_save(flags);
 
 	cpu = smp_processor_id();
+	now = tick_nohz_start_idle(cpu);
 	ts = &per_cpu(tick_cpu_sched, cpu);
 
 	/*
@@ -193,19 +232,7 @@ void tick_nohz_stop_sched_tick(void)
 		}
 	}
 
-	now = ktime_get();
-	/*
-	 * When called from irq_exit we need to account the idle sleep time
-	 * correctly.
-	 */
-	if (ts->tick_stopped) {
-		delta = ktime_sub(now, ts->idle_entrytime);
-		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-	}
-
-	ts->idle_entrytime = now;
 	ts->idle_calls++;
-
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
 		seq = read_seqbegin(&xtime_lock);
@@ -337,23 +364,22 @@ void tick_nohz_restart_sched_tick(void)
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	unsigned long ticks;
-	ktime_t now, delta;
+	ktime_t now;
 
-	if (!ts->tick_stopped)
+	local_irq_disable();
+	tick_nohz_stop_idle(cpu);
+
+	if (!ts->tick_stopped) {
+		local_irq_enable();
 		return;
+	}
 
 	/* Update jiffies first */
-	now = ktime_get();
-
-	local_irq_disable();
 	select_nohz_load_balancer(0);
+	now = ktime_get();
 	tick_do_update_jiffies64(now);
 	cpu_clear(cpu, nohz_cpu_mask);
 
-	/* Account the idle time */
-	delta = ktime_sub(now, ts->idle_entrytime);
-	ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
-- 
cgit v1.2.3


From 6e7c402590b75b6b45138792445ee0f0315a8473 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:30:05 +0100
Subject: x86: various changes and cleanups to in_p/out_p delay details

various changes to the in_p/out_p delay details:

- add the io_delay=none method
- make each method selectable from the kernel config
- simplify the delay code a bit by getting rid of an indirect function call
- add the /proc/sys/kernel/io_delay_type sysctl
- change 'io_delay=standard|alternate' to io_delay=0x80 and io_delay=0xed
- make the io delay config not depend on CONFIG_DEBUG_KERNEL

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: "David P. Reed" <dpreed@reed.com>
---
 kernel/sysctl.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4bc8e48434a7..357b68ba23ec 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -53,6 +53,7 @@
 #ifdef CONFIG_X86
 #include <asm/nmi.h>
 #include <asm/stacktrace.h>
+#include <asm/io.h>
 #endif
 
 static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -727,6 +728,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "io_delay_type",
+		.data		= &io_delay_type,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 #if defined(CONFIG_MMU)
 	{
-- 
cgit v1.2.3


From 36df29d7994180cf7f0ccc5d46495855d56d2721 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:51 +0100
Subject: ptrace: generic resume

This makes ptrace_request handle all the ptrace requests that wake
up the traced task.  These do low-level ptrace implementation magic
that is not arch-specific and should be kept out of arch code.  The
implementations on each arch usually do the same thing.  The new
generic code makes use of the arch_has_single_step macro and generic
entry points to handle PTRACE_SINGLESTEP.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index c719bb9d79ab..fad5f1e8511f 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -366,6 +366,50 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
 	return error;
 }
 
+
+#ifdef PTRACE_SINGLESTEP
+#define is_singlestep(request)		((request) == PTRACE_SINGLESTEP)
+#else
+#define is_singlestep(request)		0
+#endif
+
+#ifdef PTRACE_SYSEMU
+#define is_sysemu_singlestep(request)	((request) == PTRACE_SYSEMU_SINGLESTEP)
+#else
+#define is_sysemu_singlestep(request)	0
+#endif
+
+static int ptrace_resume(struct task_struct *child, long request, long data)
+{
+	if (!valid_signal(data))
+		return -EIO;
+
+	if (request == PTRACE_SYSCALL)
+		set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+	else
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
+#ifdef TIF_SYSCALL_EMU
+	if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+		set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+	else
+		clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+
+	if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+		if (unlikely(!arch_has_single_step()))
+			return -EIO;
+		user_enable_single_step(child);
+	}
+	else
+		user_disable_single_step(child);
+
+	child->exit_code = data;
+	wake_up_process(child);
+
+	return 0;
+}
+
 int ptrace_request(struct task_struct *child, long request,
 		   long addr, long data)
 {
@@ -390,6 +434,23 @@ int ptrace_request(struct task_struct *child, long request,
 	case PTRACE_DETACH:	 /* detach a process that was attached. */
 		ret = ptrace_detach(child, data);
 		break;
+
+#ifdef PTRACE_SINGLESTEP
+	case PTRACE_SINGLESTEP:
+#endif
+#ifdef PTRACE_SYSEMU
+	case PTRACE_SYSEMU:
+	case PTRACE_SYSEMU_SINGLESTEP:
+#endif
+	case PTRACE_SYSCALL:
+	case PTRACE_CONT:
+		return ptrace_resume(child, request, data);
+
+	case PTRACE_KILL:
+		if (child->exit_state)	/* already dead */
+			return 0;
+		return ptrace_resume(child, request, SIGKILL);
+
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 5b88abbf770a0e1975c668743100f42934f385e8 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:30:53 +0100
Subject: ptrace: generic PTRACE_SINGLEBLOCK

This makes ptrace_request handle PTRACE_SINGLEBLOCK along with
PTRACE_CONT et al.  The new generic code makes use of the
arch_has_block_step macro and generic entry points on machines
that define them.

[ mingo@elte.hu: bugfix ]

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index fad5f1e8511f..973d727f5e84 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -373,6 +373,12 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
 #define is_singlestep(request)		0
 #endif
 
+#ifdef PTRACE_SINGLEBLOCK
+#define is_singleblock(request)		((request) == PTRACE_SINGLEBLOCK)
+#else
+#define is_singleblock(request)		0
+#endif
+
 #ifdef PTRACE_SYSEMU
 #define is_sysemu_singlestep(request)	((request) == PTRACE_SYSEMU_SINGLESTEP)
 #else
@@ -396,7 +402,11 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
 		clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
 
-	if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+	if (is_singleblock(request)) {
+		if (unlikely(!arch_has_block_step()))
+			return -EIO;
+		user_enable_block_step(child);
+	} else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
 		if (unlikely(!arch_has_single_step()))
 			return -EIO;
 		user_enable_single_step(child);
@@ -438,6 +448,9 @@ int ptrace_request(struct task_struct *child, long request,
 #ifdef PTRACE_SINGLESTEP
 	case PTRACE_SINGLESTEP:
 #endif
+#ifdef PTRACE_SINGLEBLOCK
+	case PTRACE_SINGLEBLOCK:
+#endif
 #ifdef PTRACE_SYSEMU
 	case PTRACE_SYSEMU:
 	case PTRACE_SYSEMU_SINGLESTEP:
-- 
cgit v1.2.3


From 65ea5b0349903585bfed9720fa06f5edb4f1cd25 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 30 Jan 2008 13:30:56 +0100
Subject: x86: rename the struct pt_regs members for 32/64-bit consistency

We have a lot of code which differs only by the naming of specific
members of structures that contain registers.  In order to enable
additional unifications, this patch drops the e- or r- size prefix
from the register names in struct pt_regs, and drops the x- prefixes
for segment registers on the 32-bit side.

This patch also performs the equivalent renames in some additional
places that might be candidates for unification in the future.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/signal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index afa4f781f924..bf49ce6f016b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -733,13 +733,13 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
 		current->comm, task_pid_nr(current), signr);
 
 #if defined(__i386__) && !defined(__arch_um__)
-	printk("code at %08lx: ", regs->eip);
+	printk("code at %08lx: ", regs->ip);
 	{
 		int i;
 		for (i = 0; i < 16; i++) {
 			unsigned char insn;
 
-			__get_user(insn, (unsigned char *)(regs->eip + i));
+			__get_user(insn, (unsigned char *)(regs->ip + i));
 			printk("%02x ", insn);
 		}
 	}
-- 
cgit v1.2.3


From 95c354fe9f7d6decc08a92aa26eb233ecc2155bf Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jan 2008 13:31:20 +0100
Subject: spinlock: lockbreak cleanup

The break_lock data structure and code for spinlocks is quite nasty.
Not only does it double the size of a spinlock but it changes locking to
a potentially less optimal trylock.

Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a
__raw_spin_is_contended that uses the lock data itself to determine whether
there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is
not set.

Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to
decouple it from the spinlock implementation, and make it typesafe (rwlocks
do not have any need_lockbreak sites -- why do they even get bloated up
with that break_lock then?).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched.c    | 16 ++++++----------
 kernel/spinlock.c |  3 +--
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 524285e46fa7..ba4c88088f62 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4945,19 +4945,15 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int cond_resched_lock(spinlock_t *lock)
 {
+	int resched = need_resched() && system_state == SYSTEM_RUNNING;
 	int ret = 0;
 
-	if (need_lockbreak(lock)) {
+	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
-		cpu_relax();
-		ret = 1;
-		spin_lock(lock);
-	}
-	if (need_resched() && system_state == SYSTEM_RUNNING) {
-		spin_release(&lock->dep_map, 1, _THIS_IP_);
-		_raw_spin_unlock(lock);
-		preempt_enable_no_resched();
-		__cond_resched();
+		if (resched && need_resched())
+			__cond_resched();
+		else
+			cpu_relax();
 		ret = 1;
 		spin_lock(lock);
 	}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index cd72424c2662..ae28c8245123 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock);
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
-	defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
-- 
cgit v1.2.3


From 16c3e389e7a7254ff8dc7029ac4fbe996c3c75bf Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:47 +0100
Subject: x86: ptrace_request peekdata/pokedata

This makes ptrace_request handle {PEEK,POKE}{TEXT,DATA} directly.
Every arch_ptrace that could call generic_ptrace_peekdata already
has a default case calling ptrace_request, so this keeps things
simpler for the arch code.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 973d727f5e84..e6a99d2793b3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -426,6 +426,13 @@ int ptrace_request(struct task_struct *child, long request,
 	int ret = -EIO;
 
 	switch (request) {
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		return generic_ptrace_peekdata(child, addr, data);
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		return generic_ptrace_pokedata(child, addr, data);
+
 #ifdef PTRACE_OLDSETOPTIONS
 	case PTRACE_OLDSETOPTIONS:
 #endif
-- 
cgit v1.2.3


From 032d82d9065dec0e26718eca376c2029e4bd0595 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:47 +0100
Subject: x86: compat_ptrace_request

This adds a compat_ptrace_request that is the analogue of ptrace_request
for the things that 32-on-64 ptrace implementations can share in common.
So far there are just a couple of requests handled generically.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6a99d2793b3..ed1c3d56c2cd 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -607,3 +607,41 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
 	copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
 	return (copied == sizeof(data)) ? 0 : -EIO;
 }
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_ptrace_request(struct task_struct *child, compat_long_t request,
+			  compat_ulong_t addr, compat_ulong_t data)
+{
+	compat_ulong_t __user *datap = compat_ptr(data);
+	compat_ulong_t word;
+	int ret;
+
+	switch (request) {
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+		ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+		if (ret != sizeof(word))
+			ret = -EIO;
+		else
+			ret = put_user(word, datap);
+		break;
+
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEDATA:
+		ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+		ret = (ret != sizeof(data) ? -EIO : 0);
+		break;
+
+	case PTRACE_GETEVENTMSG:
+		ret = put_user((compat_ulong_t) child->ptrace_message, datap);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+	}
+
+	return ret;
+}
+#endif	/* CONFIG_COMPAT */
-- 
cgit v1.2.3


From c269f19617f508cc5c29c0b064c1a437d7011a46 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 30 Jan 2008 13:31:48 +0100
Subject: x86: compat_sys_ptrace

This adds a generic definition of compat_sys_ptrace that calls
compat_arch_ptrace, parallel to sys_ptrace/arch_ptrace.  Some
machines needing this already define a function by that name.
The new generic function is defined only on machines that
put #define __ARCH_WANT_COMPAT_SYS_PTRACE into asm/ptrace.h.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/ptrace.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ed1c3d56c2cd..e6e9b8be4b05 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -644,4 +644,50 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 
 	return ret;
 }
+
+#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+				  compat_long_t addr, compat_long_t data)
+{
+	struct task_struct *child;
+	long ret;
+
+	/*
+	 * This lock_kernel fixes a subtle race with suid exec
+	 */
+	lock_kernel();
+	if (request == PTRACE_TRACEME) {
+		ret = ptrace_traceme();
+		goto out;
+	}
+
+	child = ptrace_get_task_struct(pid);
+	if (IS_ERR(child)) {
+		ret = PTR_ERR(child);
+		goto out;
+	}
+
+	if (request == PTRACE_ATTACH) {
+		ret = ptrace_attach(child);
+		/*
+		 * Some architectures need to do book-keeping after
+		 * a ptrace attach.
+		 */
+		if (!ret)
+			arch_ptrace_attach(child);
+		goto out_put_task_struct;
+	}
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (!ret)
+		ret = compat_arch_ptrace(child, request, addr, data);
+
+ out_put_task_struct:
+	put_task_struct(child);
+ out:
+	unlock_kernel();
+	return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+
 #endif	/* CONFIG_COMPAT */
-- 
cgit v1.2.3


From 9e094c17ee2b85130ab7b2ea37456f6867eb687a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:48 +0100
Subject: genirq: turn irq debugging options into module params

This allows to change them at runtime using sysfs. No need to
reboot to set them.

I only added aliases (kernel.noirqdebug etc.) so the old options
still work.

Signed-off-by: Andi Kleen <ak@suse.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/spurious.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 32b161972fad..a6b2bc831dd0 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
+#include <linux/moduleparam.h>
 
 static int irqfixup __read_mostly;
 
@@ -225,6 +226,8 @@ int noirqdebug_setup(char *str)
 }
 
 __setup("noirqdebug", noirqdebug_setup);
+module_param(noirqdebug, bool, 0644);
+MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
 
 static int __init irqfixup_setup(char *str)
 {
@@ -236,6 +239,8 @@ static int __init irqfixup_setup(char *str)
 }
 
 __setup("irqfixup", irqfixup_setup);
+module_param(irqfixup, int, 0644);
+MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
 
 static int __init irqpoll_setup(char *str)
 {
-- 
cgit v1.2.3


From 96d97cf03b3d68e6c857623da93acd522b2b7e1a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 30 Jan 2008 13:32:48 +0100
Subject: x86: add /proc/irq/*/spurious to dump the spurious irq debugging
 state

This is useful to debug problems with interrupt handlers that return
sometimes IRQ_NONE.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/proc.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 50b81b98046a..c2f2ccb0549a 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -75,6 +75,18 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
 
 #endif
 
+static int irq_spurious_read(char *page, char **start, off_t off,
+				  int count, int *eof, void *data)
+{
+	struct irq_desc *d = &irq_desc[(long) data];
+	return sprintf(page, "count %u\n"
+			     "unhandled %u\n"
+			     "last_unhandled %u ms\n",
+			d->irq_count,
+			d->irqs_unhandled,
+			jiffies_to_msecs(d->last_unhandled));
+}
+
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -118,6 +130,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 void register_irq_proc(unsigned int irq)
 {
 	char name [MAX_NAMELEN];
+	struct proc_dir_entry *entry;
 
 	if (!root_irq_dir ||
 		(irq_desc[irq].chip == &no_irq_chip) ||
@@ -132,8 +145,6 @@ void register_irq_proc(unsigned int irq)
 
 #ifdef CONFIG_SMP
 	{
-		struct proc_dir_entry *entry;
-
 		/* create /proc/irq/<irq>/smp_affinity */
 		entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
 
@@ -144,6 +155,12 @@ void register_irq_proc(unsigned int irq)
 		}
 	}
 #endif
+
+	entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
+	if (entry) {
+		entry->data = (void *)(long)irq;
+		entry->read_proc = irq_spurious_read;
+	}
 }
 
 #undef MAX_NAMELEN
-- 
cgit v1.2.3


From 79b4cc5ee7a8086ac2c9c0afa52e6d687ce1ffef Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: debug: move WARN_ON() out of line

A quick grep shows that there are currently 1145 instances of WARN_ON
in the kernel. Currently, WARN_ON is pretty much entirely inlined,
which makes it hard to enhance it without growing the size of the kernel
(and getting Andrew unhappy).

This patch build on top of Olof's patch that introduces __WARN,
and places the slowpath out of line. It also uses Ingo's suggestion
to not use __FUNCTION__ but to use kallsyms to do the lookup;
this saves a ton of extra space since gcc doesn't need to store the function
string twice now:

3936367  833603  624736 5394706  525112 vmlinux.before
3917508  833603  624736 5375847  520767 vmlinux-slowpath

15Kb savings...

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Olof Johansson <olof@lixom.net>
Acked-by: Matt Meckall <mpm@selenic.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/panic.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index da4d6bac270e..0ebea438278a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -20,6 +20,7 @@
 #include <linux/kexec.h>
 #include <linux/debug_locks.h>
 #include <linux/random.h>
+#include <linux/kallsyms.h>
 
 int panic_on_oops;
 int tainted;
@@ -292,6 +293,20 @@ void oops_exit(void)
 		(unsigned long long)oops_id);
 }
 
+#ifdef WANT_WARN_ON_SLOWPATH
+void warn_on_slowpath(const char *file, int line)
+{
+	char function[KSYM_SYMBOL_LEN];
+	unsigned long caller = (unsigned long) __builtin_return_address(0);
+
+	sprint_symbol(function, caller);
+	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+		line, function);
+	dump_stack();
+}
+EXPORT_SYMBOL(warn_on_slowpath);
+#endif
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 /*
  * Called when gcc's -fstack-protector feature is used, and
-- 
cgit v1.2.3


From 71c339116a216b181fc5e203ef51a033fe5e38cf Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 30 Jan 2008 13:32:50 +0100
Subject: debug: add the end-of-trace marker and the module list to

Unlike oopses, WARN_ON() currently does't print the loaded modules list.
This makes it harder to take action on certain bug reports. For example,
recently there were a set of WARN_ON()s reported in the mac80211 stack,
which were just signalling a driver bug. It takes then anther round trip
to the bug reporter (if he responds at all) to find out which driver
is at fault.

Another issue is that, unlike oopses, WARN_ON() doesn't currently printk
the helpful "cut here" line, nor the "end of trace" marker.
Now that WARN_ON() is out of line, the size increase due to this is
minimal and it's worth adding.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/panic.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index 0ebea438278a..d9e90cfe3298 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -281,6 +281,13 @@ static int init_oops_id(void)
 }
 late_initcall(init_oops_id);
 
+static void print_oops_end_marker(void)
+{
+	init_oops_id();
+	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
+		(unsigned long long)oops_id);
+}
+
 /*
  * Called when the architecture exits its oops handler, after printing
  * everything.
@@ -288,9 +295,7 @@ late_initcall(init_oops_id);
 void oops_exit(void)
 {
 	do_oops_enter_exit();
-	init_oops_id();
-	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
-		(unsigned long long)oops_id);
+	print_oops_end_marker();
 }
 
 #ifdef WANT_WARN_ON_SLOWPATH
@@ -298,11 +303,14 @@ void warn_on_slowpath(const char *file, int line)
 {
 	char function[KSYM_SYMBOL_LEN];
 	unsigned long caller = (unsigned long) __builtin_return_address(0);
-
 	sprint_symbol(function, caller);
+
+	printk(KERN_WARNING "------------[ cut here ]------------\n");
 	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
 		line, function);
+	print_modules();
 	dump_stack();
+	print_oops_end_marker();
 }
 EXPORT_SYMBOL(warn_on_slowpath);
 #endif
-- 
cgit v1.2.3


From 8c1c9356429741a82ff176d0f3400fb9e06b2a30 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Wed, 30 Jan 2008 13:32:53 +0100
Subject: x86: kprobes: add kprobes smoke tests that run on boot

Here is a quick and naive smoke test for kprobes. This is intended to
just verify if some unrelated change broke the *probes subsystem. It is
self contained, architecture agnostic and isn't of any great use by itself.

This needs to be built in the kernel and runs a basic set of tests to
verify if kprobes, jprobes and kretprobes run fine on the kernel. In case
of an error, it'll print out a message with a "BUG" prefix.

This is a start; we intend to add more tests to this bucket over time.

Thanks to Jim Keniston and Masami Hiramatsu for comments and suggestions.

Tested on x86 (32/64) and powerpc.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/Makefile       |   1 +
 kernel/kprobes.c      |   2 +
 kernel/test_kprobes.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 219 insertions(+)
 create mode 100644 kernel/test_kprobes.c

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index 390d42146267..62015c3d8d91 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e3a5d817ac9b..d0493eafea3e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -824,6 +824,8 @@ static int __init init_kprobes(void)
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
 
+	if (!err)
+		init_test_probes();
 	return err;
 }
 
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
new file mode 100644
index 000000000000..88cdb109e13c
--- /dev/null
+++ b/kernel/test_kprobes.c
@@ -0,0 +1,216 @@
+/*
+ * test_kprobes.c - simple sanity test for *probes
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+
+#define div_factor 3
+
+static u32 rand1, preh_val, posth_val, jph_val;
+static int errors, handler_errors, num_tests;
+
+static noinline u32 kprobe_target(u32 value)
+{
+	/*
+	 * gcc ignores noinline on some architectures unless we stuff
+	 * sufficient lard into the function. The get_kprobe() here is
+	 * just for that.
+	 *
+	 * NOTE: We aren't concerned about the correctness of get_kprobe()
+	 * here; hence, this call is neither under !preempt nor with the
+	 * kprobe_mutex held. This is fine(tm)
+	 */
+	if (get_kprobe((void *)0xdeadbeef))
+		printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
+
+	return (value / div_factor);
+}
+
+static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	preh_val = (rand1 / div_factor);
+	return 0;
+}
+
+static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
+		unsigned long flags)
+{
+	if (preh_val != (rand1 / div_factor)) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in post_handler\n");
+	}
+	posth_val = preh_val + div_factor;
+}
+
+static struct kprobe kp = {
+	.symbol_name = "kprobe_target",
+	.pre_handler = kp_pre_handler,
+	.post_handler = kp_post_handler
+};
+
+static int test_kprobe(void)
+{
+	int ret;
+
+	ret = register_kprobe(&kp);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_kprobe returned %d\n", ret);
+		return ret;
+	}
+
+	ret = kprobe_target(rand1);
+	unregister_kprobe(&kp);
+
+	if (preh_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe pre_handler not called\n");
+		handler_errors++;
+	}
+
+	if (posth_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe post_handler not called\n");
+		handler_errors++;
+	}
+
+	return 0;
+}
+
+static u32 j_kprobe_target(u32 value)
+{
+	if (value != rand1) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in jprobe handler\n");
+	}
+
+	jph_val = rand1;
+	jprobe_return();
+	return 0;
+}
+
+static struct jprobe jp = {
+	.entry		= j_kprobe_target,
+	.kp.symbol_name = "kprobe_target"
+};
+
+static int test_jprobe(void)
+{
+	int ret;
+
+	ret = register_jprobe(&jp);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_jprobe returned %d\n", ret);
+		return ret;
+	}
+
+	ret = kprobe_target(rand1);
+	unregister_jprobe(&jp);
+	if (jph_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"jprobe handler not called\n");
+		handler_errors++;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_KRETPROBES
+static u32 krph_val;
+
+static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	unsigned long ret = regs_return_value(regs);
+
+	if (ret != (rand1 / div_factor)) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in kretprobe handler\n");
+	}
+
+	krph_val = (rand1 / div_factor);
+	return 0;
+}
+
+static struct kretprobe rp = {
+	.handler	= return_handler,
+	.kp.symbol_name = "kprobe_target"
+};
+
+static int test_kretprobe(void)
+{
+	int ret;
+
+	ret = register_kretprobe(&rp);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_kretprobe returned %d\n", ret);
+		return ret;
+	}
+
+	ret = kprobe_target(rand1);
+	unregister_kretprobe(&rp);
+	if (krph_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kretprobe handler not called\n");
+		handler_errors++;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_KRETPROBES */
+
+int init_test_probes(void)
+{
+	int ret;
+
+	do {
+		rand1 = random32();
+	} while (rand1 <= div_factor);
+
+	printk(KERN_INFO "Kprobe smoke test started\n");
+	num_tests++;
+	ret = test_kprobe();
+	if (ret < 0)
+		errors++;
+
+	num_tests++;
+	ret = test_jprobe();
+	if (ret < 0)
+		errors++;
+
+#ifdef CONFIG_KRETPROBES
+	num_tests++;
+	ret = test_kretprobe();
+	if (ret < 0)
+		errors++;
+#endif /* CONFIG_KRETPROBES */
+
+	if (errors)
+		printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
+				"%d tests failed\n", errors, num_tests);
+	else if (handler_errors)
+		printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
+				"running handlers\n", handler_errors);
+	else
+		printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+
+	return 0;
+}
-- 
cgit v1.2.3


From 076f9776f5d8d131b36955db8641aba3893c2c1b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:06 +0100
Subject: x86: make early printk selectable on 64-bit as well

Enable CONFIG_EMBEDDED to select CONFIG_EARLY_PRINTK on 64-bit as well.

saves ~2K:

   text    data     bss     dec     hex filename
   7290283 3672091 1907848 12870222         c4624e vmlinux.before
   7288373 3671795 1907848 12868016         c459b0 vmlinux.after

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/printk.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 3b7c968d0ef9..58bbec684119 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -36,6 +36,13 @@
 
 #include <asm/uaccess.h>
 
+/*
+ * Architectures can override it:
+ */
+void __attribute__((weak)) early_printk(const char *fmt, ...)
+{
+}
+
 #define __LOG_BUF_LEN	(1 << CONFIG_LOG_BUF_SHIFT)
 
 /* printk's without a loglevel use this.. */
-- 
cgit v1.2.3


From 6dab27784b2a97823b522e1cb88e40be40a93d45 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 30 Jan 2008 13:33:08 +0100
Subject: x86: add a simple backtrace test module

During the work on the x86 32 and 64 bit backtrace code I found it useful
to have a simple test module to test a process and irq context backtrace.
Since the existing backtrace code was buggy, I figure it might be useful
to have such a test module in the kernel so that maybe we can even
detect such bugs earlier..

[ mingo@elte.hu: build fix ]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/Makefile        |  1 +
 kernel/backtracetest.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 kernel/backtracetest.c

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index 62015c3d8d91..8885627ea021 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
new file mode 100644
index 000000000000..d1a7605c5b8f
--- /dev/null
+++ b/kernel/backtracetest.c
@@ -0,0 +1,48 @@
+/*
+ * Simple stack backtrace regression test module
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+
+static struct timer_list backtrace_timer;
+
+static void backtrace_test_timer(unsigned long data)
+{
+	printk("Testing a backtrace from irq context.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+	dump_stack();
+}
+static int backtrace_regression_test(void)
+{
+	printk("====[ backtrace testing ]===========\n");
+	printk("Testing a backtrace from process context.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+	dump_stack();
+
+	init_timer(&backtrace_timer);
+	backtrace_timer.function = backtrace_test_timer;
+	mod_timer(&backtrace_timer, jiffies + 10);
+
+	msleep(10);
+	printk("====[ end of backtrace testing ]====\n");
+	return 0;
+}
+
+static void exitf(void)
+{
+}
+
+module_init(backtrace_regression_test);
+module_exit(exitf);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
-- 
cgit v1.2.3


From 70edcd77a0d6d0f8731c826764f5eb6732f521e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 30 Jan 2008 13:33:24 +0100
Subject: genirq: stackdump after the "Trying to free already-free IRQ" message

these bugs are harder to find than they seem, a stackdump helps.

make it dependent on CONFIG_DEBUG_SHIRQ so that people can turn it off
if it annoys them.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/manage.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1f314221d534..438a01464287 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -479,6 +479,9 @@ void free_irq(unsigned int irq, void *dev_id)
 			return;
 		}
 		printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
+#ifdef CONFIG_DEBUG_SHIRQ
+		dump_stack();
+#endif
 		spin_unlock_irqrestore(&desc->lock, flags);
 		return;
 	}
-- 
cgit v1.2.3


From dd5af90a7f3d79e04b7eace9a98644dbf2038f4d Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 30 Jan 2008 13:33:32 +0100
Subject: x86/non-x86: percpu, node ids, apic ids x86.git fixup

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index f6a4e721fd49..bd60278ee703 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -430,6 +430,14 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
 	return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
 }
 
+static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		memcpy(pcpudest + per_cpu_offset(cpu), from, size);
+}
+
 static int percpu_modinit(void)
 {
 	pcpu_num_used = 2;
-- 
cgit v1.2.3