From 41df0d61c266998b8049df7fec119cd518a43aa1 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 12 May 2008 21:21:13 +0200
Subject: x86: Add performance variants of cpumask operators

  * Increase performance for systems with large count NR_CPUS by limiting
    the range of the cpumask operators that loop over the bits in a cpumask_t
    variable.  This removes a large amount of wasted cpu cycles.

  * Add performance variants of the cpumask operators:

    int cpus_weight_nr(mask)	     Same using nr_cpu_ids instead of NR_CPUS
    int first_cpu_nr(mask)	     Number lowest set bit, or nr_cpu_ids
    int next_cpu_nr(cpu, mask)	     Next cpu past 'cpu', or nr_cpu_ids
    for_each_cpu_mask_nr(cpu, mask)  for-loop cpu over mask using nr_cpu_ids

  * Modify following to use performance variants:

    #define num_online_cpus()	cpus_weight_nr(cpu_online_map)
    #define num_possible_cpus()	cpus_weight_nr(cpu_possible_map)
    #define num_present_cpus()	cpus_weight_nr(cpu_present_map)

    #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
    #define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), ...)
    #define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), ...)

  * Comment added to include/linux/cpumask.h:

    Note: The alternate operations with the suffix "_nr" are used
	  to limit the range of the loop to nr_cpu_ids instead of
	  NR_CPUS when NR_CPUS > 64 for performance reasons.
	  If NR_CPUS is <= 64 then most assembler bitmask
	  operators execute faster with a constant range, so
	  the operator will continue to use NR_CPUS.

	  Another consideration is that nr_cpu_ids is initialized
	  to NR_CPUS and isn't lowered until the possible cpus are
	  discovered (including any disabled cpus).  So early uses
	  will span the entire range of NR_CPUS.

    (The net effect is that for systems with 64 or less CPU's there are no
     functional changes.)

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Reviewed-by: Paul Jackson <pj@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/cpumask.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/cpumask.c b/lib/cpumask.c
index bb4f76d3c3e7..5f97dc25ef9c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp)
 }
 EXPORT_SYMBOL(__next_cpu);
 
+#if NR_CPUS > 64
+int __next_cpu_nr(int n, const cpumask_t *srcp)
+{
+	return min_t(int, nr_cpu_ids,
+				find_next_bit(srcp->bits, nr_cpu_ids, n+1));
+}
+EXPORT_SYMBOL(__next_cpu_nr);
+#endif
+
 int __any_online_cpu(const cpumask_t *mask)
 {
 	int cpu;
-- 
cgit v1.2.3


From 9c44bc03fff44ff04237a7d92e35304a0e50c331 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 12 May 2008 21:21:04 +0200
Subject: softlockup: allow panic on lockup

allow users to configure the softlockup detector to generate a panic
instead of a warning message.

high-availability systems might opt for this strict method (combined
with panic_timeout= boot option/sysctl), instead of generating
softlockup warnings ad infinitum.

also, automated tests work better if the system reboots reliably (into
a safe kernel) in case of a lockup.

The full spectrum of configurability is supported: boot option, sysctl
option and Kconfig option.

it's default-disabled.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/Kconfig.debug | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..509ae35a9ef5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -147,7 +147,7 @@ config DETECT_SOFTLOCKUP
 	help
 	  Say Y here to enable the kernel to detect "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 10 seconds, without giving other tasks a
+	  mode for more than 60 seconds, without giving other tasks a
 	  chance to run.
 
 	  When a soft-lockup is detected, the kernel will print the
@@ -159,6 +159,30 @@ config DETECT_SOFTLOCKUP
 	   can be detected via the NMI-watchdog, on platforms that
 	   support it.)
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+	bool "Panic (Reboot) On Soft Lockups"
+	depends on DETECT_SOFTLOCKUP
+	help
+	  Say Y here to enable the kernel to panic on "soft lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode for more than 60 seconds, without giving other tasks a
+	  chance to run.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  lockup has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a lockup must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+	int
+	depends on DETECT_SOFTLOCKUP
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config SCHED_DEBUG
 	bool "Collect scheduler debugging info"
 	depends on DEBUG_KERNEL && PROC_FS
-- 
cgit v1.2.3


From 4755b9291204982ac908e069674d6e5eb45815b3 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 15 Jul 2008 14:14:35 -0700
Subject: cpumask: Optimize cpumask_of_cpu in lib/smp_processor_id.c

  * Optimize various places where a pointer to the cpumask_of_cpu value
    will result in reducing stack pressure.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/smp_processor_id.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 3b4dc098181e..9fb2df0d8dfb 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -11,7 +11,7 @@ notrace unsigned int debug_smp_processor_id(void)
 {
 	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
-	cpumask_t this_mask;
+	cpumask_of_cpu_ptr_declare(this_mask);
 
 	if (likely(preempt_count))
 		goto out;
@@ -23,9 +23,9 @@ notrace unsigned int debug_smp_processor_id(void)
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	this_mask = cpumask_of_cpu(this_cpu);
+	cpumask_of_cpu_ptr_next(this_mask, cpu);
 
-	if (cpus_equal(current->cpus_allowed, this_mask))
+	if (cpus_equal(current->cpus_allowed, *this_mask))
 		goto out;
 
 	/*
-- 
cgit v1.2.3


From 06f8d00e9eecb738c99b737ac38a585ea7583ad5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 18 Jul 2008 22:34:00 +0200
Subject: cpumask: Optimize cpumask_of_cpu in lib/smp_processor_id.c, fix

fix typo.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/smp_processor_id.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 9fb2df0d8dfb..c4381d9516f6 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -23,7 +23,7 @@ notrace unsigned int debug_smp_processor_id(void)
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	cpumask_of_cpu_ptr_next(this_mask, cpu);
+	cpumask_of_cpu_ptr_next(this_mask, this_cpu);
 
 	if (cpus_equal(current->cpus_allowed, *this_mask))
 		goto out;
-- 
cgit v1.2.3


From 9f255651fb41c111ee35a2ae632df8ce9bd61def Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 6 May 2008 22:24:04 +0200
Subject: kobject: replace '/' with '!' in name

Some (block) devices have a '/' in the name, and need special
handling. Let's have that rule to the core, so we can remove it
from the block class.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index dcade0543bd2..744401571ed7 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -216,13 +216,19 @@ static int kobject_add_internal(struct kobject *kobj)
 static int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
 				  va_list vargs)
 {
-	/* Free the old name, if necessary. */
-	kfree(kobj->name);
+	const char *old_name = kobj->name;
+	char *s;
 
 	kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
 	if (!kobj->name)
 		return -ENOMEM;
 
+	/* ewww... some of these buggers have '/' in the name ... */
+	s = strchr(kobj->name, '/');
+	if (s)
+		s[0] = '!';
+
+	kfree(old_name);
 	return 0;
 }
 
-- 
cgit v1.2.3


From ff543332ec5d23833994fca9e7789f067fb51221 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Tue, 20 May 2008 00:06:00 +0200
Subject: debugfs: Add a reference to the debugfs API documentation.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/Kconfig.debug | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index df27132a56f4..ba106db5a65b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -74,6 +74,9 @@ config DEBUG_FS
 	  debugging files into.  Enable this option to be able to read and
 	  write to these files.
 
+	  For detailed documentation on the debugfs API, see
+	  Documentation/DocBook/filesystems.
+
 	  If unsure, say N.
 
 config HEADERS_CHECK
-- 
cgit v1.2.3


From 0ad1d6f37cc3bb234c6e7ae30e40d1d40b9aa258 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Tue, 24 Jun 2008 16:59:02 +0800
Subject: kobject: Transmit return value of call_usermodehelper() to caller

kobject_uevent_env() drops the return value of call_usermodehelper().
It will make upper caller, such as dm_send_uevents(), to lose error
information.

BTW, Previously kobject_uevent_env() transmitted return of
call_usermodehelper() to callers, but
commit	5f123fbd80f4f788554636f02bf73e40f914e0d6
"[PATCH] merge kobject_uevent and kobject_hotplug" removed it.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Cc: Kay Sievers <kay.sievers@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject_uevent.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 2fa545a63160..9f8d599459d1 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -245,7 +245,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 		if (retval)
 			goto exit;
 
-		call_usermodehelper(argv[0], argv, env->envp, UMH_WAIT_EXEC);
+		retval = call_usermodehelper(argv[0], argv,
+					     env->envp, UMH_WAIT_EXEC);
 	}
 
 exit:
-- 
cgit v1.2.3


From a00caa1fa954c734f4214a074727a329a9ba6568 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 8 Jul 2008 19:00:24 +0200
Subject: remove CONFIG_KMOD from lib

textsearch algorithms can be loaded, make the code depend
on CONFIG_MODULES instead of CONFIG_KMOD.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 lib/textsearch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/textsearch.c b/lib/textsearch.c
index 4b7c6075256f..9fbcb44c554f 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -267,7 +267,7 @@ struct ts_config *textsearch_prepare(const char *algo, const void *pattern,
 		return ERR_PTR(-EINVAL);
 
 	ops = lookup_ts_algo(algo);
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
 	/*
 	 * Why not always autoload you may ask. Some users are
 	 * in a situation where requesting a module may deadlock,
-- 
cgit v1.2.3


From 137d3edb48425f82a6a4226b664f90ed5e42eea5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sat, 19 Jul 2008 23:03:35 +0900
Subject: sg: reimplement sg mapping iterator

This is alternative implementation of sg content iterator introduced
by commit 83e7d317... from Pierre Ossman in next-20080716.  As there's
already an sg iterator which iterates over sg entries themselves, name
this sg_mapping_iterator.

Slightly edited description from the original implementation follows.

Iteration over a sg list is not that trivial when you take into
account that memory pages might have to be mapped before being used.
Unfortunately, that means that some parts of the kernel restrict
themselves to directly accesible memory just to not have to deal with
the mess.

This patch adds a simple iterator system that allows any code to
easily traverse an sg list and not have to deal with all the details.
The user can decide to consume part of the iteration.  Also, iteration
can be stopped and resumed later if releasing the kmap between
iteration steps is necessary.  These features are useful to implement
piecemeal sg copying for interrupt drive PIO for example.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 lib/scatterlist.c | 176 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 130 insertions(+), 46 deletions(-)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index b80c21100d78..876ba6d5b670 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -294,6 +294,117 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(sg_alloc_table);
 
+/**
+ * sg_miter_start - start mapping iteration over a sg list
+ * @miter: sg mapping iter to be started
+ * @sgl: sg list to iterate over
+ * @nents: number of sg entries
+ *
+ * Description:
+ *   Starts mapping iterator @miter.
+ *
+ * Context:
+ *   Don't care.
+ */
+void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
+		    unsigned int nents, unsigned int flags)
+{
+	memset(miter, 0, sizeof(struct sg_mapping_iter));
+
+	miter->__sg = sgl;
+	miter->__nents = nents;
+	miter->__offset = 0;
+	miter->__flags = flags;
+}
+EXPORT_SYMBOL(sg_miter_start);
+
+/**
+ * sg_miter_next - proceed mapping iterator to the next mapping
+ * @miter: sg mapping iter to proceed
+ *
+ * Description:
+ *   Proceeds @miter@ to the next mapping.  @miter@ should have been
+ *   started using sg_miter_start().  On successful return,
+ *   @miter@->page, @miter@->addr and @miter@->length point to the
+ *   current mapping.
+ *
+ * Context:
+ *   IRQ disabled if SG_MITER_ATOMIC.  IRQ must stay disabled till
+ *   @miter@ is stopped.  May sleep if !SG_MITER_ATOMIC.
+ *
+ * Returns:
+ *   true if @miter contains the next mapping.  false if end of sg
+ *   list is reached.
+ */
+bool sg_miter_next(struct sg_mapping_iter *miter)
+{
+	unsigned int off, len;
+
+	/* check for end and drop resources from the last iteration */
+	if (!miter->__nents)
+		return false;
+
+	sg_miter_stop(miter);
+
+	/* get to the next sg if necessary.  __offset is adjusted by stop */
+	if (miter->__offset == miter->__sg->length && --miter->__nents) {
+		miter->__sg = sg_next(miter->__sg);
+		miter->__offset = 0;
+	}
+
+	/* map the next page */
+	off = miter->__sg->offset + miter->__offset;
+	len = miter->__sg->length - miter->__offset;
+
+	miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
+	miter->length = min_t(unsigned int, len, PAGE_SIZE - off);
+	miter->consumed = miter->length;
+
+	if (miter->__flags & SG_MITER_ATOMIC)
+		miter->addr = kmap_atomic(miter->page, KM_BIO_SRC_IRQ) + off;
+	else
+		miter->addr = kmap(miter->page) + off;
+
+	return true;
+}
+EXPORT_SYMBOL(sg_miter_next);
+
+/**
+ * sg_miter_stop - stop mapping iteration
+ * @miter: sg mapping iter to be stopped
+ *
+ * Description:
+ *   Stops mapping iterator @miter.  @miter should have been started
+ *   started using sg_miter_start().  A stopped iteration can be
+ *   resumed by calling sg_miter_next() on it.  This is useful when
+ *   resources (kmap) need to be released during iteration.
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+void sg_miter_stop(struct sg_mapping_iter *miter)
+{
+	WARN_ON(miter->consumed > miter->length);
+
+	/* drop resources from the last iteration */
+	if (miter->addr) {
+		miter->__offset += miter->consumed;
+
+		if (miter->__flags & SG_MITER_ATOMIC) {
+			WARN_ON(!irqs_disabled());
+			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
+		} else
+			kunmap(miter->addr);
+
+		miter->page = NULL;
+		miter->addr = NULL;
+		miter->length = 0;
+		miter->consumed = 0;
+	}
+}
+EXPORT_SYMBOL(sg_miter_stop);
+
 /**
  * sg_copy_buffer - Copy data between a linear buffer and an SG list
  * @sgl:		 The SG list
@@ -309,56 +420,29 @@ EXPORT_SYMBOL(sg_alloc_table);
 static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 			     void *buf, size_t buflen, int to_buffer)
 {
-	struct scatterlist *sg;
-	size_t buf_off = 0;
-	int i;
-
-	WARN_ON(!irqs_disabled());
-
-	for_each_sg(sgl, sg, nents, i) {
-		struct page *page;
-		int n = 0;
-		unsigned int sg_off = sg->offset;
-		unsigned int sg_copy = sg->length;
-
-		if (sg_copy > buflen)
-			sg_copy = buflen;
-		buflen -= sg_copy;
-
-		while (sg_copy > 0) {
-			unsigned int page_copy;
-			void *p;
-
-			page_copy = PAGE_SIZE - sg_off;
-			if (page_copy > sg_copy)
-				page_copy = sg_copy;
-
-			page = nth_page(sg_page(sg), n);
-			p = kmap_atomic(page, KM_BIO_SRC_IRQ);
-
-			if (to_buffer)
-				memcpy(buf + buf_off, p + sg_off, page_copy);
-			else {
-				memcpy(p + sg_off, buf + buf_off, page_copy);
-				flush_kernel_dcache_page(page);
-			}
-
-			kunmap_atomic(p, KM_BIO_SRC_IRQ);
-
-			buf_off += page_copy;
-			sg_off += page_copy;
-			if (sg_off == PAGE_SIZE) {
-				sg_off = 0;
-				n++;
-			}
-			sg_copy -= page_copy;
+	unsigned int offset = 0;
+	struct sg_mapping_iter miter;
+
+	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
+
+	while (sg_miter_next(&miter) && offset < buflen) {
+		unsigned int len;
+
+		len = min(miter.length, buflen - offset);
+
+		if (to_buffer)
+			memcpy(buf + offset, miter.addr, len);
+		else {
+			memcpy(miter.addr, buf + offset, len);
+			flush_kernel_dcache_page(miter.page);
 		}
 
-		if (!buflen)
-			break;
+		offset += len;
 	}
 
-	return buf_off;
+	sg_miter_stop(&miter);
+
+	return offset;
 }
 
 /**
-- 
cgit v1.2.3


From 68afab1cb31436fc9b256a5f44771fa58ed019e2 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 23 Jul 2008 11:30:15 -0500
Subject: kgdb: remove unused HAVE_ARCH_KGDB_SHADOW_INFO config variable

Remove HAVE_ARCH_KGDB_SHADOW_INFO because it does not
exist anywhere in the kernel mainline sources

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 lib/Kconfig.kgdb | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index a5d4b1dac2a5..2cfd2721f7ed 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -1,7 +1,4 @@
 
-config HAVE_ARCH_KGDB_SHADOW_INFO
-	bool
-
 config HAVE_ARCH_KGDB
 	bool
 
-- 
cgit v1.2.3


From 6b74ab97bc12ce74acec900f1d89a4aee2e4d70d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 23 Jul 2008 21:26:49 -0700
Subject: mm: add a basic debugging framework for memory initialisation

Boot initialisation is very complex, with significant numbers of
architecture-specific routines, hooks and code ordering.  While significant
amounts of the initialisation is architecture-independent, it trusts the data
received from the architecture layer.  This is a mistake, and has resulted in
a number of difficult-to-diagnose bugs.

This patchset adds some validation and tracing to memory initialisation.  It
also introduces a few basic defensive measures.  The validation code can be
explicitly disabled for embedded systems.

This patch:

Add additional debugging and verification code for memory initialisation.

Once enabled, the verification checks are always run and when required
additional debugging information may be outputted via a mminit_loglevel=
command-line parameter.

The verification code is placed in a new file mm/mm_init.c.  Ideally other mm
initialisation code will be moved here over time.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 882c51048993..e1d4764435ed 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -505,6 +505,18 @@ config DEBUG_WRITECOUNT
 
 	  If unsure, say N.
 
+config DEBUG_MEMORY_INIT
+	bool "Debug memory initialisation" if EMBEDDED
+	default !EMBEDDED
+	help
+	  Enable this for additional checks during memory initialisation.
+	  The sanity checks verify aspects of the VM such as the memory model
+	  and other information provided by the architecture. Verbose
+	  information will be printed at KERN_DEBUG loglevel depending
+	  on the mminit_loglevel= command-line option.
+
+	  If unsure, say Y
+
 config DEBUG_LIST
 	bool "Debug linked list manipulation"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 8b05c7e6e159d2f33c9275281b8b909a89eb7c5d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 23 Jul 2008 21:26:53 -0700
Subject: add a helper function to test if an object is on the stack

lib/debugobjects.c has a function to test if an object is on the stack.
The block layer and ide needs it (they need to avoid DMA from/to stack
buffers).  This patch moves the function to include/linux/sched.h so that
everyone can use it.

lib/debugobjects.c uses current->stack but this patch uses a
task_stack_page() accessor, which is a preferable way to access the stack.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/debugobjects.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 85b18d79be89..f86196390cfd 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -226,15 +226,13 @@ debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
 
 static void debug_object_is_on_stack(void *addr, int onstack)
 {
-	void *stack = current->stack;
 	int is_on_stack;
 	static int limit;
 
 	if (limit > 4)
 		return;
 
-	is_on_stack = (addr >= stack && addr < (stack + THREAD_SIZE));
-
+	is_on_stack = object_is_on_stack(addr);
 	if (is_on_stack == onstack)
 		return;
 
-- 
cgit v1.2.3


From d3de851a445123f24ad8ece18662014b5e8a8b4e Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 23 Jul 2008 21:30:37 -0700
Subject: rtc: BCD codeshrink

This updates <linux/bcd.h> to define the key routines as constant
functions, which the macros will then call.  Newer code can now call
bcd2bin() instead of SCREAMING BCD2BIN() TO THE FOUR WINDS.

This lets each driver shrink their codespace by using N function calls to
a single (global) copy of those routines, instead of N inlined copies of
these functions per driver.

These routines aren't used in speed-critical code.  Almost all callers are
in the RTC framework.  Typical per-driver savings is near 300 bytes.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Adrian Bunk <bunk@kernel.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Makefile |  2 +-
 lib/bcd.c    | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 lib/bcd.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 818c4d455518..9085ad6fa53d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o klist.o
 
-obj-y += div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
diff --git a/lib/bcd.c b/lib/bcd.c
new file mode 100644
index 000000000000..d74257fd0fe7
--- /dev/null
+++ b/lib/bcd.c
@@ -0,0 +1,14 @@
+#include <linux/bcd.h>
+#include <linux/module.h>
+
+unsigned bcd2bin(unsigned char val)
+{
+	return (val & 0x0f) + (val >> 4) * 10;
+}
+EXPORT_SYMBOL(bcd2bin);
+
+unsigned char bin2bcd(unsigned val)
+{
+	return ((val / 10) << 4) + val % 10;
+}
+EXPORT_SYMBOL(bin2bcd);
-- 
cgit v1.2.3


From 545e400619b24b6b17b7f1f1e838e9ff6d036949 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:45:27 -0700
Subject: lzo: use get/put_unaligned_* helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/lzo/lzo1x_decompress.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 77f0f9b775a9..5dc6b29c1575 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -138,8 +138,7 @@ match:
 					t += 31 + *ip++;
 				}
 				m_pos = op - 1;
-				m_pos -= le16_to_cpu(get_unaligned(
-					(const unsigned short *)ip)) >> 2;
+				m_pos -= get_unaligned_le16(ip) >> 2;
 				ip += 2;
 			} else if (t >= 16) {
 				m_pos = op;
@@ -157,8 +156,7 @@ match:
 					}
 					t += 7 + *ip++;
 				}
-				m_pos -= le16_to_cpu(get_unaligned(
-					(const unsigned short *)ip)) >> 2;
+				m_pos -= get_unaligned_le16(ip) >> 2;
 				ip += 2;
 				if (m_pos == op)
 					goto eof_found;
-- 
cgit v1.2.3


From fd193829744bc77392395cf8f47889235c97f0a3 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:31 -0700
Subject: lib: allow memparse() to accept a NULL and ignorable second parm

Extend memparse() to allow the caller to use a NULL second parameter, which
would represent no interest in returning the address of the end of the parsed
string.

In numerous cases, callers invoke memparse() to parse a possibly-suffixed
string (such as "64K" or "2G" or whatever) and define a character pointer to
accept the end pointer being returned by memparse() even though they have no
interest in it and promptly throw it away.

This (backward-compatible) enhancement allows callers to use NULL in the cases
where they just don't care about getting back that end pointer.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/cmdline.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/cmdline.c b/lib/cmdline.c
index f596c08d213a..5ba8a942a478 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -116,7 +116,7 @@ char *get_options(const char *str, int nints, int *ints)
 /**
  *	memparse - parse a string with mem suffixes into a number
  *	@ptr: Where parse begins
- *	@retptr: (output) Pointer to next char after parse completes
+ *	@retptr: (output) Optional pointer to next char after parse completes
  *
  *	Parses a string into a number.  The number stored at @ptr is
  *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
@@ -126,11 +126,13 @@ char *get_options(const char *str, int nints, int *ints)
  *	megabyte, or one gigabyte, respectively.
  */
 
-unsigned long long memparse (char *ptr, char **retptr)
+unsigned long long memparse(char *ptr, char **retptr)
 {
-	unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+	char *endptr;	/* local pointer to end of parsed string */
 
-	switch (**retptr) {
+	unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+	switch (*endptr) {
 	case 'G':
 	case 'g':
 		ret <<= 10;
@@ -140,10 +142,14 @@ unsigned long long memparse (char *ptr, char **retptr)
 	case 'K':
 	case 'k':
 		ret <<= 10;
-		(*retptr)++;
+		endptr++;
 	default:
 		break;
 	}
+
+	if (retptr)
+		*retptr = endptr;
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From e0ce0da9fefcc723dc006c35a7f91a32750abd40 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:32 -0700
Subject: lists: remove a redundant conditional definition of list_add()

Remove the conditional surrounding the definition of list_add() from list.h
since, if you define CONFIG_DEBUG_LIST, the definition you will subsequently
pick up from lib/list_debug.c will be absolutely identical, at which point you
can remove that redundant definition from list_debug.c as well.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_debug.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'lib')

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9655bd..45c03fd608dd 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -39,20 +39,6 @@ void __list_add(struct list_head *new,
 }
 EXPORT_SYMBOL(__list_add);
 
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
-- 
cgit v1.2.3


From 2d6ffcca623a9a16df6cdfbe8250b7a5904a5f5e Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 25 Jul 2008 01:45:44 -0700
Subject: inflate: refactor inflate malloc code

Inflate requires some dynamic memory allocation very early in the boot
process and this is provided with a set of four functions:
malloc/free/gzip_mark/gzip_release.

The old inflate code used a mark/release strategy rather than implement
free.  This new version instead keeps a count on the number of outstanding
allocations and when it hits zero, it resets the malloc arena.

This allows removing all the mark and release implementations and unifying
all the malloc/free implementations.

The architecture-dependent code must define two addresses:
 - free_mem_ptr, the address of the beginning of the area in which
   allocations should be made
 - free_mem_end_ptr, the address of the end of the area in which
   allocations should be made. If set to 0, then no check is made on
   the number of allocations, it just grows as much as needed

The architecture-dependent code can also provide an arch_decomp_wdog()
function call.  This function will be called several times during the
decompression process, and allow to notify the watchdog that the system is
still running.  If an architecture provides such a call, then it must
define ARCH_HAS_DECOMP_WDOG so that the generic inflate code calls
arch_decomp_wdog().

Work initially done by Matt Mackall, updated to a recent version of the
kernel and improved by me.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Mikael Starvik <mikael.starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/inflate.c | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/inflate.c b/lib/inflate.c
index 9762294be062..1a8e8a978128 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -230,6 +230,45 @@ STATIC const ush mask_bits[] = {
 #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
 #define DUMPBITS(n) {b>>=(n);k-=(n);}
 
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+       void *p;
+
+       if (size < 0)
+		error("Malloc error");
+       if (!malloc_ptr)
+		malloc_ptr = free_mem_ptr;
+
+       malloc_ptr = (malloc_ptr + 3) & ~3;     /* Align */
+
+       p = (void *)malloc_ptr;
+       malloc_ptr += size;
+
+       if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+		error("Out of memory");
+
+       malloc_count++;
+       return p;
+}
+
+static void free(void *where)
+{
+       malloc_count--;
+       if (!malloc_count)
+		malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
 
 /*
    Huffman code decoding is performed using a multi-level table lookup.
@@ -1045,7 +1084,6 @@ STATIC int INIT inflate(void)
   int e;                /* last block flag */
   int r;                /* result code */
   unsigned h;           /* maximum struct huft's malloc'ed */
-  void *ptr;
 
   /* initialize window, bit buffer */
   wp = 0;
@@ -1057,12 +1095,12 @@ STATIC int INIT inflate(void)
   h = 0;
   do {
     hufts = 0;
-    gzip_mark(&ptr);
-    if ((r = inflate_block(&e)) != 0) {
-      gzip_release(&ptr);	    
-      return r;
-    }
-    gzip_release(&ptr);
+#ifdef ARCH_HAS_DECOMP_WDOG
+    arch_decomp_wdog();
+#endif
+    r = inflate_block(&e);
+    if (r)
+	    return r;
     if (hufts > h)
       h = hufts;
   } while (!e);
-- 
cgit v1.2.3


From d955c78ac4699ac9c3fe07be62982cda13d13267 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:55 -0700
Subject: Example use of WARN()

Now that WARN() exists, we can fold some of the printk's into it.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kobject.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index 744401571ed7..bd732ffebc85 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -164,9 +164,8 @@ static int kobject_add_internal(struct kobject *kobj)
 		return -ENOENT;
 
 	if (!kobj->name || !kobj->name[0]) {
-		pr_debug("kobject: (%p): attempted to be registered with empty "
+		WARN(1, "kobject: (%p): attempted to be registered with empty "
 			 "name!\n", kobj);
-		WARN_ON(1);
 		return -EINVAL;
 	}
 
@@ -583,12 +582,10 @@ static void kobject_release(struct kref *kref)
 void kobject_put(struct kobject *kobj)
 {
 	if (kobj) {
-		if (!kobj->state_initialized) {
-			printk(KERN_WARNING "kobject: '%s' (%p): is not "
+		if (!kobj->state_initialized)
+			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
 			       "initialized, yet kobject_put() is being "
 			       "called.\n", kobject_name(kobj), kobj);
-			WARN_ON(1);
-		}
 		kref_put(&kobj->kref, kobject_release);
 	}
 }
-- 
cgit v1.2.3


From 924d9addb9b1474fc81a78a5c6706755efea7aaa Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 25 Jul 2008 01:45:55 -0700
Subject: list debugging: use WARN() instead of BUG()

Arjan noted that the list_head debugging is BUG'ing when it detects
corruption.  By causing the box to panic immediately, we're possibly
losing some bug reports.  Changing this to a WARN() should mean we at the
least start seeing reports collected at kerneloops.org

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_debug.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

(limited to 'lib')

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 45c03fd608dd..1a39f4e3ae1f 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,18 +20,14 @@ void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
-	if (unlikely(next->prev != prev)) {
-		printk(KERN_ERR "list_add corruption. next->prev should be "
-			"prev (%p), but was %p. (next=%p).\n",
-			prev, next->prev, next);
-		BUG();
-	}
-	if (unlikely(prev->next != next)) {
-		printk(KERN_ERR "list_add corruption. prev->next should be "
-			"next (%p), but was %p. (prev=%p).\n",
-			next, prev->next, prev);
-		BUG();
-	}
+	WARN(next->prev != prev,
+		"list_add corruption. next->prev should be "
+		"prev (%p), but was %p. (next=%p).\n",
+		prev, next->prev, next);
+	WARN(prev->next != next,
+		"list_add corruption. prev->next should be "
+		"next (%p), but was %p. (prev=%p).\n",
+		next, prev->next, prev);
 	next->prev = new;
 	new->next = next;
 	new->prev = prev;
@@ -47,16 +43,12 @@ EXPORT_SYMBOL(__list_add);
  */
 void list_del(struct list_head *entry)
 {
-	if (unlikely(entry->prev->next != entry)) {
-		printk(KERN_ERR "list_del corruption. prev->next should be %p, "
-				"but was %p\n", entry, entry->prev->next);
-		BUG();
-	}
-	if (unlikely(entry->next->prev != entry)) {
-		printk(KERN_ERR "list_del corruption. next->prev should be %p, "
-				"but was %p\n", entry, entry->next->prev);
-		BUG();
-	}
+	WARN(entry->prev->next != entry,
+		"list_del corruption. prev->next should be %p, "
+		"but was %p\n", entry, entry->prev->next);
+	WARN(entry->next->prev != entry,
+		"list_del corruption. next->prev should be %p, "
+		"but was %p\n", entry, entry->next->prev);
 	__list_del(entry->prev, entry->next);
 	entry->next = LIST_POISON1;
 	entry->prev = LIST_POISON2;
-- 
cgit v1.2.3


From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: printk ratelimiting rewrite

All ratelimit user use same jiffies and burst params, so some messages
(callbacks) will be lost.

For example:
a call printk_ratelimit(5 * HZ, 1)
b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will
will be supressed.

- rewrite __ratelimit, and use a ratelimit_state as parameter.  Thanks for
  hints from andrew.

- Add WARN_ON_RATELIMIT, update rcupreempt.h

- remove __printk_ratelimit

- use __ratelimit in net_ratelimit

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ratelimit.c | 55 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

(limited to 'lib')

diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e3040dcd4..35136671b215 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
  *
  * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
  *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
  * This file is released under the GPLv2.
  *
  */
@@ -11,41 +14,43 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
 /*
  * __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
  *
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
 {
-	static DEFINE_SPINLOCK(ratelimit_lock);
-	static unsigned toks = 10 * 5 * HZ;
-	static unsigned long last_msg;
-	static int missed;
-	unsigned long flags;
-	unsigned long now = jiffies;
+	if (!rs->interval)
+		return 1;
 
 	spin_lock_irqsave(&ratelimit_lock, flags);
-	toks += now - last_msg;
-	last_msg = now;
-	if (toks > (ratelimit_burst * ratelimit_jiffies))
-		toks = ratelimit_burst * ratelimit_jiffies;
-	if (toks >= ratelimit_jiffies) {
-		int lost = missed;
+	if (!rs->begin)
+		rs->begin = jiffies;
 
-		missed = 0;
-		toks -= ratelimit_jiffies;
-		spin_unlock_irqrestore(&ratelimit_lock, flags);
-		if (lost)
-			printk(KERN_WARNING "%s: %d messages suppressed\n",
-				__func__, lost);
-		return 1;
+	if (time_is_before_jiffies(rs->begin + rs->interval)) {
+		if (rs->missed)
+			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+				__func__, rs->missed);
+		rs->begin = 0;
+		rs->printed = 0;
+		rs->missed = 0;
 	}
-	missed++;
+	if (rs->burst && rs->burst > rs->printed)
+		goto print;
+
+	rs->missed++;
 	spin_unlock_irqrestore(&ratelimit_lock, flags);
 	return 0;
+
+print:
+	rs->printed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
 }
 EXPORT_SYMBOL(__ratelimit);
-- 
cgit v1.2.3


From 4ae537892ab9858f71c78701f4651ad1ca531a1b Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:58 -0700
Subject: idr: rename some of the idr APIs internal routines

This is a trivial patch that renames:

   . alloc_layer to get_from_free_list since it idr_pre_get that actually
     allocates memory.
   . free_layer to move_to_free_list since memory is not actually freed there.

This makes things more clear for the next patches.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 7a02e173f027..8170ace154fb 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -35,7 +35,7 @@
 
 static struct kmem_cache *idr_layer_cache;
 
-static struct idr_layer *alloc_layer(struct idr *idp)
+static struct idr_layer *get_from_free_list(struct idr *idp)
 {
 	struct idr_layer *p;
 	unsigned long flags;
@@ -51,14 +51,14 @@ static struct idr_layer *alloc_layer(struct idr *idp)
 }
 
 /* only called when idp->lock is held */
-static void __free_layer(struct idr *idp, struct idr_layer *p)
+static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
 {
 	p->ary[0] = idp->id_free;
 	idp->id_free = p;
 	idp->id_free_cnt++;
 }
 
-static void free_layer(struct idr *idp, struct idr_layer *p)
+static void move_to_free_list(struct idr *idp, struct idr_layer *p)
 {
 	unsigned long flags;
 
@@ -66,7 +66,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p)
 	 * Depends on the return element being zeroed.
 	 */
 	spin_lock_irqsave(&idp->lock, flags);
-	__free_layer(idp, p);
+	__move_to_free_list(idp, p);
 	spin_unlock_irqrestore(&idp->lock, flags);
 }
 
@@ -109,7 +109,7 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 		new = kmem_cache_alloc(idr_layer_cache, gfp_mask);
 		if (new == NULL)
 			return (0);
-		free_layer(idp, new);
+		move_to_free_list(idp, new);
 	}
 	return 1;
 }
@@ -167,7 +167,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 		 * Create the layer below if it is missing.
 		 */
 		if (!p->ary[m]) {
-			if (!(new = alloc_layer(idp)))
+			new = get_from_free_list(idp);
+			if (!new)
 				return -1;
 			p->ary[m] = new;
 			p->count++;
@@ -192,7 +193,7 @@ build_up:
 	p = idp->top;
 	layers = idp->layers;
 	if (unlikely(!p)) {
-		if (!(p = alloc_layer(idp)))
+		if (!(p = get_from_free_list(idp)))
 			return -1;
 		layers = 1;
 	}
@@ -204,7 +205,7 @@ build_up:
 		layers++;
 		if (!p->count)
 			continue;
-		if (!(new = alloc_layer(idp))) {
+		if (!(new = get_from_free_list(idp))) {
 			/*
 			 * The allocation failed.  If we built part of
 			 * the structure tear it down.
@@ -214,7 +215,7 @@ build_up:
 				p = p->ary[0];
 				new->ary[0] = NULL;
 				new->bitmap = new->count = 0;
-				__free_layer(idp, new);
+				__move_to_free_list(idp, new);
 			}
 			spin_unlock_irqrestore(&idp->lock, flags);
 			return -1;
@@ -351,7 +352,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 		__clear_bit(n, &p->bitmap);
 		p->ary[n] = NULL;
 		while(*paa && ! --((**paa)->count)){
-			free_layer(idp, **paa);
+			move_to_free_list(idp, **paa);
 			**paa-- = NULL;
 		}
 		if (!*paa)
@@ -378,12 +379,12 @@ void idr_remove(struct idr *idp, int id)
 
 		p = idp->top->ary[0];
 		idp->top->bitmap = idp->top->count = 0;
-		free_layer(idp, idp->top);
+		move_to_free_list(idp, idp->top);
 		idp->top = p;
 		--idp->layers;
 	}
 	while (idp->id_free_cnt >= IDR_FREE_MAX) {
-		p = alloc_layer(idp);
+		p = get_from_free_list(idp);
 		kmem_cache_free(idr_layer_cache, p);
 	}
 	return;
@@ -426,7 +427,7 @@ void idr_remove_all(struct idr *idp)
 		while (n < fls(id)) {
 			if (p) {
 				memset(p, 0, sizeof *p);
-				free_layer(idp, p);
+				move_to_free_list(idp, p);
 			}
 			n += IDR_BITS;
 			p = *--paa;
@@ -444,7 +445,7 @@ EXPORT_SYMBOL(idr_remove_all);
 void idr_destroy(struct idr *idp)
 {
 	while (idp->id_free_cnt) {
-		struct idr_layer *p = alloc_layer(idp);
+		struct idr_layer *p = get_from_free_list(idp);
 		kmem_cache_free(idr_layer_cache, p);
 	}
 }
@@ -749,7 +750,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 	 * allocation.
 	 */
 	if (ida->idr.id_free_cnt || ida->free_bitmap) {
-		struct idr_layer *p = alloc_layer(&ida->idr);
+		struct idr_layer *p = get_from_free_list(&ida->idr);
 		if (p)
 			kmem_cache_free(idr_layer_cache, p);
 	}
-- 
cgit v1.2.3


From f098ad655f4dd8e3da98ffbeda9cedcc4459c01a Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:59 -0700
Subject: idr: fix a printk call

Fix the incomplete printk call.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 8170ace154fb..9d905b131ecb 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -326,7 +326,8 @@ EXPORT_SYMBOL(idr_get_new);
 
 static void idr_remove_warning(int id)
 {
-	printk("idr_remove called for id=%d which is not allocated.\n", id);
+	printk(KERN_WARNING
+		"idr_remove called for id=%d which is not allocated.\n", id);
 	dump_stack();
 }
 
-- 
cgit v1.2.3


From 944ca05c7b4972f2ebf37262e0f4933d178ad6db Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:59 -0700
Subject: idr: error checking factorization

Do some code factorization in the return code analysis.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 9d905b131ecb..80ba06f29d36 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -143,7 +143,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			/* if already at the top layer, we need to grow */
 			if (!(p = pa[l])) {
 				*starting_id = id;
-				return -2;
+				return IDR_NEED_TO_GROW;
 			}
 
 			/* If we need to go up one layer, continue the
@@ -160,7 +160,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = ((id >> sh) ^ n ^ m) << sh;
 		}
 		if ((id >= MAX_ID_BIT) || (id < 0))
-			return -3;
+			return IDR_NOMORE_SPACE;
 		if (l == 0)
 			break;
 		/*
@@ -229,7 +229,7 @@ build_up:
 	idp->top = p;
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
-	if (v == -2)
+	if (v == IDR_NEED_TO_GROW)
 		goto build_up;
 	return(v);
 }
@@ -278,12 +278,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -313,12 +309,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -696,12 +688,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
  restart:
 	/* get vacant slot */
 	t = idr_get_empty_slot(&ida->idr, idr_id, pa);
-	if (t < 0) {
-		if (t == -1)
-			return -EAGAIN;
-		else /* will be -3 */
-			return -ENOSPC;
-	}
+	if (t < 0)
+		return _idr_rc_to_errno(t);
 
 	if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
 		return -ENOSPC;
-- 
cgit v1.2.3


From 3219b3b7456d5cf15ba7b1fe7b1bcf15ce8840e2 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:00 -0700
Subject: idr: make idr_get_new* rcu-safe

Make the idr_get_new* routines rcu-safe.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 80ba06f29d36..44ab3b2a4eba 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,6 +6,8 @@
  * Modified by George Anzinger to reuse immediately and to use
  * find bit instructions.  Also removed _irq on spinlocks.
  *
+ * Modified by Nadia Derbey to make it RCU safe.
+ *
  * Small id to pointer translation service.
  *
  * It uses a radix tree like structure as a sparse array indexed
@@ -96,7 +98,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
  * @gfp_mask:	memory allocation flags
  *
  * This function should be called prior to locking and calling the
- * following function.  It preallocates enough memory to satisfy
+ * idr_get_new* functions. It preallocates enough memory to satisfy
  * the worst possible allocation.
  *
  * If the system is REALLY out of memory this function returns 0,
@@ -170,7 +172,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			new = get_from_free_list(idp);
 			if (!new)
 				return -1;
-			p->ary[m] = new;
+			rcu_assign_pointer(p->ary[m], new);
 			p->count++;
 		}
 		pa[l--] = p;
@@ -226,7 +228,7 @@ build_up:
 			__set_bit(0, &new->bitmap);
 		p = new;
 	}
-	idp->top = p;
+	rcu_assign_pointer(idp->top, p);
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
 	if (v == IDR_NEED_TO_GROW)
@@ -245,7 +247,8 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
 		 * Successfully found an empty slot.  Install the user
 		 * pointer and mark the slot full.
 		 */
-		pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr;
+		rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
+				(struct idr_layer *)ptr);
 		pa[0]->count++;
 		idr_mark_full(pa, id);
 	}
@@ -710,7 +713,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 			return -EAGAIN;
 
 		memset(bitmap, 0, sizeof(struct ida_bitmap));
-		pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap;
+		rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
+				(void *)bitmap);
 		pa[0]->count++;
 	}
 
-- 
cgit v1.2.3


From f9c46d6ea5ce138a886c3a0f10a46130afab75f5 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:01 -0700
Subject: idr: make idr_find rcu-safe

Make idr_find rcu-safe: it can now be called inside an rcu_read critical
section.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 44ab3b2a4eba..21e12af1f231 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -456,7 +456,8 @@ EXPORT_SYMBOL(idr_destroy);
  * return indicates that @id is not valid or you passed %NULL in
  * idr_get_new().
  *
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
  */
 void *idr_find(struct idr *idp, int id)
 {
@@ -464,7 +465,7 @@ void *idr_find(struct idr *idp, int id)
 	struct idr_layer *p;
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 
 	/* Mask off upper bits we don't use for the search. */
 	id &= MAX_ID_MASK;
@@ -474,7 +475,7 @@ void *idr_find(struct idr *idp, int id)
 
 	while (n > 0 && p) {
 		n -= IDR_BITS;
-		p = p->ary[(id >> n) & IDR_MASK];
+		p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 	}
 	return((void *)p);
 }
@@ -507,7 +508,7 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 	max = 1 << n;
 
 	id = 0;
@@ -515,7 +516,7 @@ int idr_for_each(struct idr *idp,
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = p->ary[(id >> n) & IDR_MASK];
+			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
-- 
cgit v1.2.3


From cf481c20c476ad2c0febdace9ce23f5a4db19582 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:02 -0700
Subject: idr: make idr_remove rcu-safe

Introduce the free_layer() routine: it is the one that actually frees memory
after a grace period has elapsed.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 14 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 21e12af1f231..3476f8203e97 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -52,6 +52,19 @@ static struct idr_layer *get_from_free_list(struct idr *idp)
 	return(p);
 }
 
+static void idr_layer_rcu_free(struct rcu_head *head)
+{
+	struct idr_layer *layer;
+
+	layer = container_of(head, struct idr_layer, rcu_head);
+	kmem_cache_free(idr_layer_cache, layer);
+}
+
+static inline void free_layer(struct idr_layer *p)
+{
+	call_rcu(&p->rcu_head, idr_layer_rcu_free);
+}
+
 /* only called when idp->lock is held */
 static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
 {
@@ -331,6 +344,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 	struct idr_layer *p = idp->top;
 	struct idr_layer **pa[MAX_LEVEL];
 	struct idr_layer ***paa = &pa[0];
+	struct idr_layer *to_free;
 	int n;
 
 	*paa = NULL;
@@ -346,13 +360,18 @@ static void sub_remove(struct idr *idp, int shift, int id)
 	n = id & IDR_MASK;
 	if (likely(p != NULL && test_bit(n, &p->bitmap))){
 		__clear_bit(n, &p->bitmap);
-		p->ary[n] = NULL;
+		rcu_assign_pointer(p->ary[n], NULL);
+		to_free = NULL;
 		while(*paa && ! --((**paa)->count)){
-			move_to_free_list(idp, **paa);
+			if (to_free)
+				free_layer(to_free);
+			to_free = **paa;
 			**paa-- = NULL;
 		}
 		if (!*paa)
 			idp->layers = 0;
+		if (to_free)
+			free_layer(to_free);
 	} else
 		idr_remove_warning(id);
 }
@@ -365,22 +384,34 @@ static void sub_remove(struct idr *idp, int shift, int id)
 void idr_remove(struct idr *idp, int id)
 {
 	struct idr_layer *p;
+	struct idr_layer *to_free;
 
 	/* Mask off upper bits we don't use for the search. */
 	id &= MAX_ID_MASK;
 
 	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
 	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
-	    idp->top->ary[0]) {  // We can drop a layer
-
+	    idp->top->ary[0]) {
+		/*
+		 * Single child at leftmost slot: we can shrink the tree.
+		 * This level is not needed anymore since when layers are
+		 * inserted, they are inserted at the top of the existing
+		 * tree.
+		 */
+		to_free = idp->top;
 		p = idp->top->ary[0];
-		idp->top->bitmap = idp->top->count = 0;
-		move_to_free_list(idp, idp->top);
-		idp->top = p;
+		rcu_assign_pointer(idp->top, p);
 		--idp->layers;
+		to_free->bitmap = to_free->count = 0;
+		free_layer(to_free);
 	}
 	while (idp->id_free_cnt >= IDR_FREE_MAX) {
 		p = get_from_free_list(idp);
+		/*
+		 * Note: we don't call the rcu callback here, since the only
+		 * layers that fall into the freelist are those that have been
+		 * preallocated.
+		 */
 		kmem_cache_free(idr_layer_cache, p);
 	}
 	return;
@@ -421,15 +452,13 @@ void idr_remove_all(struct idr *idp)
 
 		id += 1 << n;
 		while (n < fls(id)) {
-			if (p) {
-				memset(p, 0, sizeof *p);
-				move_to_free_list(idp, p);
-			}
+			if (p)
+				free_layer(p);
 			n += IDR_BITS;
 			p = *--paa;
 		}
 	}
-	idp->top = NULL;
+	rcu_assign_pointer(idp->top, NULL);
 	idp->layers = 0;
 }
 EXPORT_SYMBOL(idr_remove_all);
@@ -546,7 +575,7 @@ EXPORT_SYMBOL(idr_for_each);
  * A -ENOENT return indicates that @id was not found.
  * A -EINVAL return indicates that @id was not within valid constraints.
  *
- * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
+ * The caller must serialize with writers.
  */
 void *idr_replace(struct idr *idp, void *ptr, int id)
 {
@@ -572,7 +601,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 		return ERR_PTR(-ENOENT);
 
 	old_p = p->ary[n];
-	p->ary[n] = ptr;
+	rcu_assign_pointer(p->ary[n], ptr);
 
 	return old_p;
 }
-- 
cgit v1.2.3


From 3bc9f79ee1ddc913be0a6d3592036683ef8a3148 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 25 Jul 2008 14:57:58 +0200
Subject: iommu: add iommu_num_pages helper function

Calculating the number of pages from given address and length numbers is a task
required in multiple IOMMU implementations. So implement this as a generic
function into the IOMMU helper code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/iommu-helper.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'lib')

diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77a..889ddce2021e 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,3 +80,11 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+	return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From 0bc3cc03fa6e1c20aecb5a33356bcaae410640b9 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 24 Jul 2008 18:21:31 -0700
Subject: cpumask: change cpumask_of_cpu_ptr to use new cpumask_of_cpu

  * Replace previous instances of the cpumask_of_cpu_ptr* macros
    with a the new (lvalue capable) generic cpumask_of_cpu().

Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/smp_processor_id.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index c4381d9516f6..0f8fc22ed103 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -11,7 +11,6 @@ notrace unsigned int debug_smp_processor_id(void)
 {
 	unsigned long preempt_count = preempt_count();
 	int this_cpu = raw_smp_processor_id();
-	cpumask_of_cpu_ptr_declare(this_mask);
 
 	if (likely(preempt_count))
 		goto out;
@@ -23,9 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	cpumask_of_cpu_ptr_next(this_mask, this_cpu);
-
-	if (cpus_equal(current->cpus_allowed, *this_mask))
+	if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu)))
 		goto out;
 
 	/*
-- 
cgit v1.2.3


From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Jul 2008 19:44:49 -0700
Subject: dma-mapping: add the device argument to dma_mapping_error()

Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER
architecture does:

This enables us to cleanly fix the Calgary IOMMU issue that some devices
are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423).

I think that per-device dma_mapping_ops support would be also helpful for
KVM people to support PCI passthrough but Andi thinks that this makes it
difficult to support the PCI passthrough (see the above thread).  So I
CC'ed this to KVM camp.  Comments are appreciated.

A pointer to dma_mapping_ops to struct dev_archdata is added.  If the
pointer is non NULL, DMA operations in asm/dma-mapping.h use it.  If it's
NULL, the system-wide dma_ops pointer is used as before.

If it's useful for KVM people, I plan to implement a mechanism to register
a hook called when a new pci (or dma capable) device is created (it works
with hot plugging).  It enables IOMMUs to set up an appropriate
dma_mapping_ops per device.

The major obstacle is that dma_mapping_error doesn't take a pointer to the
device unlike other DMA operations.  So x86 can't have dma_mapping_ops per
device.  Note all the POWER IOMMUs use the same dma_mapping_error function
so this is not a problem for POWER but x86 IOMMUs use different
dma_mapping_error functions.

The first patch adds the device argument to dma_mapping_error.  The patch
is trivial but large since it touches lots of drivers and dma-mapping.h in
all the architecture.

This patch:

dma_mapping_error() doesn't take a pointer to the device unlike other DMA
operations.  So we can't have dma_mapping_ops per device.

Note that POWER already has dma_mapping_ops per device but all the POWER
IOMMUs use the same dma_mapping_error function.  x86 IOMMUs use device
argument.

[akpm@linux-foundation.org: fix sge]
[akpm@linux-foundation.org: fix svc_rdma]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix bnx2x]
[akpm@linux-foundation.org: fix s2io]
[akpm@linux-foundation.org: fix pasemi_mac]
[akpm@linux-foundation.org: fix sdhci]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix sparc]
[akpm@linux-foundation.org: fix ibmvscsi]
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Avi Kivity <avi@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/swiotlb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d568894df8cc..977edbdbc1de 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -492,7 +492,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 */
 		dma_addr_t handle;
 		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (swiotlb_dma_mapping_error(handle))
+		if (swiotlb_dma_mapping_error(hwdev, handle))
 			return NULL;
 
 		ret = bus_to_virt(handle);
@@ -824,7 +824,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 }
 
 int
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
 	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
 }
-- 
cgit v1.2.3


From 47feff2c8eefe85099f87c43d3096855f0085ca0 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:29 -0700
Subject: radix-tree: add gang_lookup_slot, gang_lookup_slot_tag

Introduce gang_lookup_slot() and gang_lookup_slot_tag() functions, which
are used by lockless pagecache.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 155 insertions(+), 23 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 56ec21a7f73d..9c4f1ffa2864 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -359,18 +359,17 @@ EXPORT_SYMBOL(radix_tree_insert);
  *	Returns:  the slot corresponding to the position @index in the
  *	radix tree @root. This is useful for update-if-exists operations.
  *
- *	This function cannot be called under rcu_read_lock, it must be
- *	excluded from writers, as must the returned slot for subsequent
- *	use by radix_tree_deref_slot() and radix_tree_replace slot.
- *	Caller must hold tree write locked across slot lookup and
- *	replace.
+ *	This function can be called under rcu_read_lock iff the slot is not
+ *	modified by radix_tree_replace_slot, otherwise it must be called
+ *	exclusive from other writers. Any dereference of the slot must be done
+ *	using radix_tree_deref_slot.
  */
 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 {
 	unsigned int height, shift;
 	struct radix_tree_node *node, **slot;
 
-	node = root->rnode;
+	node = rcu_dereference(root->rnode);
 	if (node == NULL)
 		return NULL;
 
@@ -390,7 +389,7 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 	do {
 		slot = (struct radix_tree_node **)
 			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
-		node = *slot;
+		node = rcu_dereference(*slot);
 		if (node == NULL)
 			return NULL;
 
@@ -667,7 +666,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_next_hole);
 
 static unsigned int
-__lookup(struct radix_tree_node *slot, void **results, unsigned long index,
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index)
 {
 	unsigned int nr_found = 0;
@@ -701,11 +700,9 @@ __lookup(struct radix_tree_node *slot, void **results, unsigned long index,
 
 	/* Bottom level: grab some items */
 	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
-		struct radix_tree_node *node;
 		index++;
-		node = slot->slots[i];
-		if (node) {
-			results[nr_found++] = rcu_dereference(node);
+		if (slot->slots[i]) {
+			results[nr_found++] = &(slot->slots[i]);
 			if (nr_found == max_items)
 				goto out;
 		}
@@ -759,13 +756,22 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 
 	ret = 0;
 	while (ret < max_items) {
-		unsigned int nr_found;
+		unsigned int nr_found, slots_found, i;
 		unsigned long next_index;	/* Index of next search */
 
 		if (cur_index > max_index)
 			break;
-		nr_found = __lookup(node, results + ret, cur_index,
+		slots_found = __lookup(node, (void ***)results + ret, cur_index,
 					max_items - ret, &next_index);
+		nr_found = 0;
+		for (i = 0; i < slots_found; i++) {
+			struct radix_tree_node *slot;
+			slot = *(((void ***)results)[ret + i]);
+			if (!slot)
+				continue;
+			results[ret + nr_found] = rcu_dereference(slot);
+			nr_found++;
+		}
 		ret += nr_found;
 		if (next_index == 0)
 			break;
@@ -776,12 +782,71 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup);
 
+/**
+ *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
+ *	@root:		radix tree root
+ *	@results:	where the results of the lookup are placed
+ *	@first_index:	start the lookup from this key
+ *	@max_items:	place up to this many items at *results
+ *
+ *	Performs an index-ascending scan of the tree for present items.  Places
+ *	their slots at *@results and returns the number of items which were
+ *	placed at *@results.
+ *
+ *	The implementation is naive.
+ *
+ *	Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
+ *	be dereferenced with radix_tree_deref_slot, and if using only RCU
+ *	protection, radix_tree_deref_slot may fail requiring a retry.
+ */
+unsigned int
+radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+			unsigned long first_index, unsigned int max_items)
+{
+	unsigned long max_index;
+	struct radix_tree_node *node;
+	unsigned long cur_index = first_index;
+	unsigned int ret;
+
+	node = rcu_dereference(root->rnode);
+	if (!node)
+		return 0;
+
+	if (!radix_tree_is_indirect_ptr(node)) {
+		if (first_index > 0)
+			return 0;
+		results[0] = (void **)&root->rnode;
+		return 1;
+	}
+	node = radix_tree_indirect_to_ptr(node);
+
+	max_index = radix_tree_maxindex(node->height);
+
+	ret = 0;
+	while (ret < max_items) {
+		unsigned int slots_found;
+		unsigned long next_index;	/* Index of next search */
+
+		if (cur_index > max_index)
+			break;
+		slots_found = __lookup(node, results + ret, cur_index,
+					max_items - ret, &next_index);
+		ret += slots_found;
+		if (next_index == 0)
+			break;
+		cur_index = next_index;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
+
 /*
  * FIXME: the two tag_get()s here should use find_next_bit() instead of
  * open-coding the search.
  */
 static unsigned int
-__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
+__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index, unsigned int tag)
 {
 	unsigned int nr_found = 0;
@@ -811,11 +876,9 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
 			unsigned long j = index & RADIX_TREE_MAP_MASK;
 
 			for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
-				struct radix_tree_node *node;
 				index++;
 				if (!tag_get(slot, tag, j))
 					continue;
-				node = slot->slots[j];
 				/*
 				 * Even though the tag was found set, we need to
 				 * recheck that we have a non-NULL node, because
@@ -826,9 +889,8 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
 				 * lookup ->slots[x] without a lock (ie. can't
 				 * rely on its value remaining the same).
 				 */
-				if (node) {
-					node = rcu_dereference(node);
-					results[nr_found++] = node;
+				if (slot->slots[j]) {
+					results[nr_found++] = &(slot->slots[j]);
 					if (nr_found == max_items)
 						goto out;
 				}
@@ -887,13 +949,22 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 
 	ret = 0;
 	while (ret < max_items) {
-		unsigned int nr_found;
+		unsigned int nr_found, slots_found, i;
 		unsigned long next_index;	/* Index of next search */
 
 		if (cur_index > max_index)
 			break;
-		nr_found = __lookup_tag(node, results + ret, cur_index,
-					max_items - ret, &next_index, tag);
+		slots_found = __lookup_tag(node, (void ***)results + ret,
+				cur_index, max_items - ret, &next_index, tag);
+		nr_found = 0;
+		for (i = 0; i < slots_found; i++) {
+			struct radix_tree_node *slot;
+			slot = *(((void ***)results)[ret + i]);
+			if (!slot)
+				continue;
+			results[ret + nr_found] = rcu_dereference(slot);
+			nr_found++;
+		}
 		ret += nr_found;
 		if (next_index == 0)
 			break;
@@ -904,6 +975,67 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
 
+/**
+ *	radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
+ *					  radix tree based on a tag
+ *	@root:		radix tree root
+ *	@results:	where the results of the lookup are placed
+ *	@first_index:	start the lookup from this key
+ *	@max_items:	place up to this many items at *results
+ *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ *	Performs an index-ascending scan of the tree for present items which
+ *	have the tag indexed by @tag set.  Places the slots at *@results and
+ *	returns the number of slots which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
+		unsigned long first_index, unsigned int max_items,
+		unsigned int tag)
+{
+	struct radix_tree_node *node;
+	unsigned long max_index;
+	unsigned long cur_index = first_index;
+	unsigned int ret;
+
+	/* check the root's tag bit */
+	if (!root_tag_get(root, tag))
+		return 0;
+
+	node = rcu_dereference(root->rnode);
+	if (!node)
+		return 0;
+
+	if (!radix_tree_is_indirect_ptr(node)) {
+		if (first_index > 0)
+			return 0;
+		results[0] = (void **)&root->rnode;
+		return 1;
+	}
+	node = radix_tree_indirect_to_ptr(node);
+
+	max_index = radix_tree_maxindex(node->height);
+
+	ret = 0;
+	while (ret < max_items) {
+		unsigned int slots_found;
+		unsigned long next_index;	/* Index of next search */
+
+		if (cur_index > max_index)
+			break;
+		slots_found = __lookup_tag(node, results + ret,
+				cur_index, max_items - ret, &next_index, tag);
+		ret += slots_found;
+		if (next_index == 0)
+			break;
+		cur_index = next_index;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
+
+
 /**
  *	radix_tree_shrink    -    shrink height of a radix tree to minimal
  *	@root		radix tree root
-- 
cgit v1.2.3


From 51cc50685a4275c6a02653670af9f108a64e01cf Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 19:45:34 -0700
Subject: SL*B: drop kmem cache argument from constructor

Kmem cache passed to constructor is only needed for constructors that are
themselves multiplexeres.  Nobody uses this "feature", nor does anybody uses
passed kmem cache in non-trivial way, so pass only pointer to object.

Non-trivial places are:
	arch/powerpc/mm/init_64.c
	arch/powerpc/mm/hugetlbpage.c

This is flag day, yes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Jon Tollefson <kniht@linux.vnet.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Matt Mackall <mpm@selenic.com>
[akpm@linux-foundation.org: fix arch/powerpc/mm/hugetlbpage.c]
[akpm@linux-foundation.org: fix mm/slab.c]
[akpm@linux-foundation.org: fix ubifs]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c        | 2 +-
 lib/radix-tree.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 3476f8203e97..e728c7fccc4d 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -607,7 +607,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 }
 EXPORT_SYMBOL(idr_replace);
 
-static void idr_cache_ctor(struct kmem_cache *idr_layer_cache, void *idr_layer)
+static void idr_cache_ctor(void *idr_layer)
 {
 	memset(idr_layer, 0, sizeof(struct idr_layer));
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9c4f1ffa2864..be86b32bc874 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1183,7 +1183,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
 EXPORT_SYMBOL(radix_tree_tagged);
 
 static void
-radix_tree_node_ctor(struct kmem_cache *cachep, void *node)
+radix_tree_node_ctor(void *node)
 {
 	memset(node, 0, sizeof(struct radix_tree_node));
 }
-- 
cgit v1.2.3


From 5cd2b459d326a424671dcd95f038649f7bf7cb96 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 19:45:39 -0700
Subject: Use WARN() in lib/

Use WARN() instead of a printk+WARN_ON() pair; this way the message becomes
part of the warning section for better reporting/collection.  In addition, one
of the if() clauses collapes into the WARN() entirely now.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/debugobjects.c   | 15 +++++----------
 lib/iomap.c          |  3 +--
 lib/kobject_uevent.c |  6 ++----
 lib/plist.c          | 13 +++++++------
 4 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'lib')

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index f86196390cfd..45a6bde762d1 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -205,9 +205,8 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
 
 	if (limit < 5 && obj->descr != descr_test) {
 		limit++;
-		printk(KERN_ERR "ODEBUG: %s %s object type: %s\n", msg,
+		WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg,
 		       obj_states[obj->state], obj->descr->name);
-		WARN_ON(1);
 	}
 	debug_objects_warnings++;
 }
@@ -733,26 +732,22 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 
 	obj = lookup_object(addr, db);
 	if (!obj && state != ODEBUG_STATE_NONE) {
-		printk(KERN_ERR "ODEBUG: selftest object not found\n");
-		WARN_ON(1);
+		WARN(1, KERN_ERR "ODEBUG: selftest object not found\n");
 		goto out;
 	}
 	if (obj && obj->state != state) {
-		printk(KERN_ERR "ODEBUG: selftest wrong state: %d != %d\n",
+		WARN(1, KERN_ERR "ODEBUG: selftest wrong state: %d != %d\n",
 		       obj->state, state);
-		WARN_ON(1);
 		goto out;
 	}
 	if (fixups != debug_objects_fixups) {
-		printk(KERN_ERR "ODEBUG: selftest fixups failed %d != %d\n",
+		WARN(1, KERN_ERR "ODEBUG: selftest fixups failed %d != %d\n",
 		       fixups, debug_objects_fixups);
-		WARN_ON(1);
 		goto out;
 	}
 	if (warnings != debug_objects_warnings) {
-		printk(KERN_ERR "ODEBUG: selftest warnings failed %d != %d\n",
+		WARN(1, KERN_ERR "ODEBUG: selftest warnings failed %d != %d\n",
 		       warnings, debug_objects_warnings);
-		WARN_ON(1);
 		goto out;
 	}
 	res = 0;
diff --git a/lib/iomap.c b/lib/iomap.c
index 37a3ea4cac9f..d32229385151 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -40,8 +40,7 @@ static void bad_io_access(unsigned long port, const char *access)
 	static int count = 10;
 	if (count) {
 		count--;
-		printk(KERN_ERR "Bad IO access at port %#lx (%s)\n", port, access);
-		WARN_ON(1);
+		WARN(1, KERN_ERR "Bad IO access at port %#lx (%s)\n", port, access);
 	}
 }
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9f8d599459d1..3f914725bda8 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -285,8 +285,7 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 	int len;
 
 	if (env->envp_idx >= ARRAY_SIZE(env->envp)) {
-		printk(KERN_ERR "add_uevent_var: too many keys\n");
-		WARN_ON(1);
+		WARN(1, KERN_ERR "add_uevent_var: too many keys\n");
 		return -ENOMEM;
 	}
 
@@ -297,8 +296,7 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 	va_end(args);
 
 	if (len >= (sizeof(env->buf) - env->buflen)) {
-		printk(KERN_ERR "add_uevent_var: buffer size too small\n");
-		WARN_ON(1);
+		WARN(1, KERN_ERR "add_uevent_var: buffer size too small\n");
 		return -ENOMEM;
 	}
 
diff --git a/lib/plist.c b/lib/plist.c
index 3074a02272f3..d6c64a824e1d 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -31,12 +31,13 @@
 static void plist_check_prev_next(struct list_head *t, struct list_head *p,
 				  struct list_head *n)
 {
-	if (n->prev != p || p->next != n) {
-		printk("top: %p, n: %p, p: %p\n", t, t->next, t->prev);
-		printk("prev: %p, n: %p, p: %p\n", p, p->next, p->prev);
-		printk("next: %p, n: %p, p: %p\n", n, n->next, n->prev);
-		WARN_ON(1);
-	}
+	WARN(n->prev != p || p->next != n,
+			"top: %p, n: %p, p: %p\n"
+			"prev: %p, n: %p, p: %p\n"
+			"next: %p, n: %p, p: %p\n",
+			 t, t->next, t->prev,
+			p, p->next, p->prev,
+			n, n->next, n->prev);
 }
 
 static void plist_check_list(struct list_head *top)
-- 
cgit v1.2.3


From bbc698636ed48b6fcd323964e0f847a6a796325d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:59 -0700
Subject: task_current_syscall

This adds the new function task_current_syscall() on machines where the
asm/syscall.h interface is supported (CONFIG_HAVE_ARCH_TRACEHOOK).  It's
exported for modules to use in the future.  This function safely samples
the state of a blocked thread to collect what system call it is blocked
in, and the six system call argument registers.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Makefile  |  2 ++
 lib/syscall.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 lib/syscall.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 9085ad6fa53d..942c7250f603 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -78,6 +78,8 @@ lib-$(CONFIG_GENERIC_BUG) += bug.o
 
 obj-$(CONFIG_HAVE_LMB) += lmb.o
 
+obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/syscall.c b/lib/syscall.c
new file mode 100644
index 000000000000..a4f7067f72fa
--- /dev/null
+++ b/lib/syscall.c
@@ -0,0 +1,75 @@
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <asm/syscall.h>
+
+static int collect_syscall(struct task_struct *target, long *callno,
+			   unsigned long args[6], unsigned int maxargs,
+			   unsigned long *sp, unsigned long *pc)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	if (unlikely(!regs))
+		return -EAGAIN;
+
+	*sp = user_stack_pointer(regs);
+	*pc = instruction_pointer(regs);
+
+	*callno = syscall_get_nr(target, regs);
+	if (*callno != -1L && maxargs > 0)
+		syscall_get_arguments(target, regs, 0, maxargs, args);
+
+	return 0;
+}
+
+/**
+ * task_current_syscall - Discover what a blocked task is doing.
+ * @target:		thread to examine
+ * @callno:		filled with system call number or -1
+ * @args:		filled with @maxargs system call arguments
+ * @maxargs:		number of elements in @args to fill
+ * @sp:			filled with user stack pointer
+ * @pc:			filled with user PC
+ *
+ * If @target is blocked in a system call, returns zero with *@callno
+ * set to the the call's number and @args filled in with its arguments.
+ * Registers not used for system call arguments may not be available and
+ * it is not kosher to use &struct user_regset calls while the system
+ * call is still in progress.  Note we may get this result if @target
+ * has finished its system call but not yet returned to user mode, such
+ * as when it's stopped for signal handling or syscall exit tracing.
+ *
+ * If @target is blocked in the kernel during a fault or exception,
+ * returns zero with *@callno set to -1 and does not fill in @args.
+ * If so, it's now safe to examine @target using &struct user_regset
+ * get() calls as long as we're sure @target won't return to user mode.
+ *
+ * Returns -%EAGAIN if @target does not remain blocked.
+ *
+ * Returns -%EINVAL if @maxargs is too large (maximum is six).
+ */
+int task_current_syscall(struct task_struct *target, long *callno,
+			 unsigned long args[6], unsigned int maxargs,
+			 unsigned long *sp, unsigned long *pc)
+{
+	long state;
+	unsigned long ncsw;
+
+	if (unlikely(maxargs > 6))
+		return -EINVAL;
+
+	if (target == current)
+		return collect_syscall(target, callno, args, maxargs, sp, pc);
+
+	state = target->state;
+	if (unlikely(!state))
+		return -EAGAIN;
+
+	ncsw = wait_task_inactive(target, state);
+	if (unlikely(!ncsw) ||
+	    unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+	    unlikely(wait_task_inactive(target, state) != ncsw))
+		return -EAGAIN;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(task_current_syscall);
-- 
cgit v1.2.3


From 454c63b02e530f10b4345343f63596dd705888d0 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Fri, 25 Jul 2008 19:46:07 -0700
Subject: lib: generic show_mem()

This implements a platform-independent version of show_mem().

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Mikael Starvik <starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Makefile   |  2 +-
 lib/show_mem.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 lib/show_mem.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 942c7250f603..3b1f94bbe9de 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
-	 proportions.o prio_heap.o ratelimit.o
+	 proportions.o prio_heap.o ratelimit.o show_mem.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/show_mem.c b/lib/show_mem.c
new file mode 100644
index 000000000000..238e72a18ce1
--- /dev/null
+++ b/lib/show_mem.c
@@ -0,0 +1,63 @@
+/*
+ * Generic show_mem() implementation
+ *
+ * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de>
+ * All code subject to the GPL version 2.
+ */
+
+#include <linux/mm.h>
+#include <linux/nmi.h>
+#include <linux/quicklist.h>
+
+void show_mem(void)
+{
+	pg_data_t *pgdat;
+	unsigned long total = 0, reserved = 0, shared = 0,
+		nonshared = 0, highmem = 0;
+
+	printk(KERN_INFO "Mem-Info:\n");
+	show_free_areas();
+
+	for_each_online_pgdat(pgdat) {
+		unsigned long i, flags;
+
+		pgdat_resize_lock(pgdat, &flags);
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			struct page *page;
+			unsigned long pfn = pgdat->node_start_pfn + i;
+
+			if (unlikely(!(i % MAX_ORDER_NR_PAGES)))
+				touch_nmi_watchdog();
+
+			if (!pfn_valid(pfn))
+				continue;
+
+			page = pfn_to_page(pfn);
+
+			if (PageHighMem(page))
+				highmem++;
+
+			if (PageReserved(page))
+				reserved++;
+			else if (page_count(page) == 1)
+				nonshared++;
+			else if (page_count(page) > 1)
+				shared += page_count(page) - 1;
+
+			total++;
+		}
+		pgdat_resize_unlock(pgdat, &flags);
+	}
+
+	printk(KERN_INFO "%lu pages RAM\n", total);
+#ifdef CONFIG_HIGHMEM
+	printk(KERN_INFO "%lu pages HighMem\n", highmem);
+#endif
+	printk(KERN_INFO "%lu pages reserved\n", reserved);
+	printk(KERN_INFO "%lu pages shared\n", shared);
+	printk(KERN_INFO "%lu pages non-shared\n", nonshared);
+#ifdef CONFIG_QUICKLIST
+	printk(KERN_INFO "%lu pages in pagetable cache\n",
+		quicklist_total_size());
+#endif
+}
-- 
cgit v1.2.3


From 4d9c377c81d37740b25cacf025f95c084eafabbb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 28 Jul 2008 15:46:21 -0700
Subject: __ratelimit() cpu flags can't be static

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ratelimit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 35136671b215..26187edcc7ea 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 
 static DEFINE_SPINLOCK(ratelimit_lock);
-static unsigned long flags;
 
 /*
  * __ratelimit - rate limiting
@@ -26,6 +25,8 @@ static unsigned long flags;
  */
 int __ratelimit(struct ratelimit_state *rs)
 {
+	unsigned long flags;
+
 	if (!rs->interval)
 		return 1;
 
-- 
cgit v1.2.3


From 8978b74253280d59e97cf49a3ec2c0cbccd5b801 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 29 Jul 2008 13:38:53 +0900
Subject: generic, x86: fix add iommu_num_pages helper function

This IOMMU helper function doesn't work for some architectures:

  http://marc.info/?l=linux-kernel&m=121699304403202&w=2

It also breaks POWER and SPARC builds:

  http://marc.info/?l=linux-kernel&m=121730388001890&w=2

Currently, only x86 IOMMUs use this so let's move it to x86 for
now.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/iommu-helper.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'lib')

diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 889ddce2021e..a3b8d4c3f77a 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,11 +80,3 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
-
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
-{
-	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
-
-	return size >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From 697f8d0348a652593d195a13dd1067d9df911a82 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 30 Jul 2008 16:29:19 -0700
Subject: random32: seeding improvement

The rationale is:
   * use u32 consistently
   * no need to do LCG on values from (better) get_random_bytes
   * use more data from get_random_bytes for secondary seeding
   * don't reduce state space on srandom32()
   * enforce state variable initialization restrictions

Note: the second paper has a version of random32() with even longer period
and a version of random64() if needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/random32.c | 48 +++++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/random32.c b/lib/random32.c
index ca87d86992bd..217d5c4b666d 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -56,23 +56,12 @@ static u32 __random32(struct rnd_state *state)
 	return (state->s1 ^ state->s2 ^ state->s3);
 }
 
-static void __set_random32(struct rnd_state *state, unsigned long s)
+/*
+ * Handle minimum values for seeds
+ */
+static inline u32 __seed(u32 x, u32 m)
 {
-	if (s == 0)
-		s = 1;      /* default seed is 1 */
-
-#define LCG(n) (69069 * n)
-	state->s1 = LCG(s);
-	state->s2 = LCG(state->s1);
-	state->s3 = LCG(state->s2);
-
-	/* "warm it up" */
-	__random32(state);
-	__random32(state);
-	__random32(state);
-	__random32(state);
-	__random32(state);
-	__random32(state);
+	return (x < m) ? x + m : x;
 }
 
 /**
@@ -107,7 +96,7 @@ void srandom32(u32 entropy)
 	 */
 	for_each_possible_cpu (i) {
 		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		__set_random32(state, state->s1 ^ entropy);
+		state->s1 = __seed(state->s1 ^ entropy, 1);
 	}
 }
 EXPORT_SYMBOL(srandom32);
@@ -122,7 +111,19 @@ static int __init random32_init(void)
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
-		__set_random32(state, i + jiffies);
+
+#define LCG(x)	((x) * 69069)	/* super-duper LCG */
+		state->s1 = __seed(LCG(i + jiffies), 1);
+		state->s2 = __seed(LCG(state->s1), 7);
+		state->s3 = __seed(LCG(state->s2), 15);
+
+		/* "warm it up" */
+		__random32(state);
+		__random32(state);
+		__random32(state);
+		__random32(state);
+		__random32(state);
+		__random32(state);
 	}
 	return 0;
 }
@@ -135,13 +136,18 @@ core_initcall(random32_init);
 static int __init random32_reseed(void)
 {
 	int i;
-	unsigned long seed;
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
+		u32 seeds[3];
+
+		get_random_bytes(&seeds, sizeof(seeds));
+		state->s1 = __seed(seeds[0], 1);
+		state->s2 = __seed(seeds[1], 7);
+		state->s3 = __seed(seeds[2], 15);
 
-		get_random_bytes(&seed, sizeof(seed));
-		__set_random32(state, seed);
+		/* mix it in */
+		__random32(state);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From e0fdace10e75dac67d906213b780ff1b1a4cc360 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Fri, 1 Aug 2008 01:11:22 -0700
Subject: debug_locks: set oops_in_progress if we will log messages.

Otherwise lock debugging messages on runqueue locks can deadlock the
system due to the wakeups performed by printk().

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/debug_locks.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib')

diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 0ef01d14727c..0218b4693dd8 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,6 +8,7 @@
  *
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
+#include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -37,6 +38,7 @@ int debug_locks_off(void)
 {
 	if (xchg(&debug_locks, 0)) {
 		if (!debug_locks_silent) {
+			oops_in_progress = 1;
 			console_verbose();
 			return 1;
 		}
-- 
cgit v1.2.3


From 5f5ddfb3605d2a4f555a7ff034859e623eafcd27 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 1 Aug 2008 08:39:34 -0500
Subject: kgdb: remove the requirement for CONFIG_FRAME_POINTER

There is no technical reason that the kgdb core requires frame
pointers.  It is up to the end user of KGDB to decide if they need
them or not.

[ anemo@mba.ocn.ne.jp: removed frame pointers on mips ]

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 lib/Kconfig.kgdb | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 2cfd2721f7ed..9b5d1d7f2ef7 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -4,14 +4,17 @@ config HAVE_ARCH_KGDB
 
 menuconfig KGDB
 	bool "KGDB: kernel debugging with remote gdb"
-	select FRAME_POINTER
 	depends on HAVE_ARCH_KGDB
 	depends on DEBUG_KERNEL && EXPERIMENTAL
 	help
 	  If you say Y here, it will be possible to remotely debug the
-	  kernel using gdb.  Documentation of kernel debugger is available
-	  at http://kgdb.sourceforge.net as well as in DocBook form
-	  in Documentation/DocBook/.  If unsure, say N.
+	  kernel using gdb.  It is recommended but not required, that
+	  you also turn on the kernel config option
+	  CONFIG_FRAME_POINTER to aid in producing more reliable stack
+	  backtraces in the external debugger.  Documentation of
+	  kernel debugger is available at http://kgdb.sourceforge.net
+	  as well as in DocBook form in Documentation/DocBook/.  If
+	  unsure, say N.
 
 if KGDB
 
-- 
cgit v1.2.3


From 29a6d39bf3a890ad1d29e66baa9f4bc8d9334f3a Mon Sep 17 00:00:00 2001
From: Yi Yang <yi.y.yang@intel.com>
Date: Tue, 12 Aug 2008 15:08:58 -0700
Subject: lib/vsprintf.c: wrong conversion function used

Fix wrong conversion function used by strict_strtou*

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Reported-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 1dc2d1d18fa8..d8d1d1142248 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -220,7 +220,7 @@ int strict_strtou##type(const char *cp, unsigned int base, valtype *res)\
 	if (len == 0)							\
 		return -EINVAL;						\
 									\
-	val = simple_strtoul(cp, &tail, base);				\
+	val = simple_strtou##type(cp, &tail, base);			\
 	if ((*tail == '\0') ||						\
 		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {\
 		*res = val;						\
-- 
cgit v1.2.3


From 50ac2d694f2dd1658341cf97bcf2ffb836d772cb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 12 Aug 2008 15:09:02 -0700
Subject: seq_file: add seq_cpumask(), seq_nodemask()

Short enough reads from /proc/irq/*/smp_affinity return -EINVAL for no
good reason.

This became noticed with NR_CPUS=4096 patches, when length of printed
representation of cpumask becase 1152, but cat(1) continued to read with
1024-byte chunks.  bitmap_scnprintf() in good faith fills buffer, returns
1023, check returns -EINVAL.

Fix it by switching to seq_file, so handler will just fill buffer and
doesn't care about offsets, length, filling EOF and all this crap.

For that add seq_bitmap(), and wrappers around it -- seq_cpumask() and
seq_nodemask().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Paul Jackson <pj@sgi.com>
Cc: Mike Travis <travis@sgi.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 482df94ea21e..06fb57c86de0 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -315,6 +315,17 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
 }
 EXPORT_SYMBOL(bitmap_scnprintf);
 
+/**
+ * bitmap_scnprintf_len - return buffer length needed to convert
+ * bitmap to an ASCII hex string
+ * @nr_bits: number of bits to be converted
+ */
+int bitmap_scnprintf_len(unsigned int nr_bits)
+{
+	unsigned int nr_nibbles = ALIGN(nr_bits, 4) / 4;
+	return nr_nibbles + ALIGN(nr_nibbles, CHUNKSZ / 4) / (CHUNKSZ / 4) - 1;
+}
+
 /**
  * __bitmap_parse - convert an ASCII hex string into a bitmap.
  * @buf: pointer to buffer containing string.
-- 
cgit v1.2.3


From 3794f3e812ef707a4f7931742274d1d0ca6597b4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 12 Aug 2008 15:09:06 -0700
Subject: docsrc: build Documentation/ sources

Currently source files in the Documentation/ sub-dir can easily bit-rot
since they are not generally buildable, either because they are hidden in
text files or because there are no Makefile rules for them.  This needs to
be fixed so that the source files remain usable and good examples of code
instead of bad examples.

Add the ability to build source files that are in the Documentation/ dir.
Add to Kconfig as "BUILD_DOCSRC" config symbol.

Use "CONFIG_BUILD_DOCSRC=1 make ..." to build objects from the
Documentation/ sources.  Or enable BUILD_DOCSRC in the *config system.
However, this symbol depends on HEADERS_CHECK since the header files need
to be installed (for userspace builds).

Built (using cross-tools) for x86-64, i386, alpha, ia64, sparc32,
sparc64, powerpc, sh, m68k, & mips.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e1d4764435ed..800ac8485544 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -735,6 +735,15 @@ config FIREWIRE_OHCI_REMOTE_DMA
 
 	  If unsure, say N.
 
+menuconfig BUILD_DOCSRC
+	bool "Build targets in Documentation/ tree"
+	depends on HEADERS_CHECK
+	help
+	  This option attempts to build objects from the source files in the
+	  kernel Documentation/ tree.
+
+	  Say N if you are unsure.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
-- 
cgit v1.2.3