From d705ae6b133f9f6a8beee617b1224b6a5c99c5da Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Feb 2012 09:45:49 +0100
Subject: block: replace icq->changed with icq->flags

icq->changed was used for ICQ_*_CHANGED bits.  Rename it to flags and
access it under ioc->lock instead of using atomic bitops.
ioc_get_changed() is added so that the changed part can be fetched and
cleared as before.

icq->flags will be used to carry other flags.

Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 119773eebe31..17839c7b9614 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -6,8 +6,10 @@
 #include <linux/workqueue.h>
 
 enum {
-	ICQ_IOPRIO_CHANGED,
-	ICQ_CGROUP_CHANGED,
+	ICQ_IOPRIO_CHANGED	= 1 << 0,
+	ICQ_CGROUP_CHANGED	= 1 << 1,
+
+	ICQ_CHANGED_MASK	= ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED,
 };
 
 /*
@@ -88,7 +90,7 @@ struct io_cq {
 		struct rcu_head		__rcu_head;
 	};
 
-	unsigned long		changed;
+	unsigned int		flags;
 };
 
 /*
@@ -139,6 +141,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
 void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
 void ioc_cgroup_changed(struct io_context *ioc);
+unsigned int icq_get_changed(struct io_cq *icq);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
-- 
cgit v1.2.3


From 621032ad6eaabf2fe771c4fa0d8f58e1fcfcdba6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Feb 2012 09:45:53 +0100
Subject: block: exit_io_context() should call elevator_exit_icq_fn()

While updating locking, b2efa05265 "block, cfq: unlink
cfq_io_context's immediately" moved elevator_exit_icq_fn() invocation
from exit_io_context() to the final ioc put.  While this doesn't cause
catastrophic failure, it effectively removes task exit notification to
elevator and cause noticeable IO performance degradation with CFQ.

On task exit, CFQ used to immediately expire the slice if it was being
used by the exiting task as no more IO would be issued by the task;
however, after b2efa05265, the notification is lost and disk could sit
idle needlessly, leading to noticeable IO performance degradation for
certain workloads.

This patch renames ioc_exit_icq() to ioc_destroy_icq(), separates
elevator_exit_icq_fn() invocation into ioc_exit_icq() and invokes it
from exit_io_context().  ICQ_EXITED flag is added to avoid invoking
the callback more than once for the same icq.

Walking icq_list from ioc side and invoking elevator callback requires
reverse double locking.  This may be better implemented using RCU;
unfortunately, using RCU isn't trivial.  e.g. RCU protection would
need to cover request_queue and queue_lock switch on cleanup makes
grabbing queue_lock from RCU unsafe.  Reverse double locking should
do, at least for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-bisected-by: Shaohua Li <shli@kernel.org>
LKML-Reference: <CANejiEVzs=pUhQSTvUppkDcc2TNZyfohBRLygW5zFmXyk5A-xQ@mail.gmail.com>
Tested-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/iocontext.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 17839c7b9614..1a3018063034 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -8,6 +8,7 @@
 enum {
 	ICQ_IOPRIO_CHANGED	= 1 << 0,
 	ICQ_CGROUP_CHANGED	= 1 << 1,
+	ICQ_EXITED		= 1 << 2,
 
 	ICQ_CHANGED_MASK	= ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED,
 };
-- 
cgit v1.2.3


From 2261cc627f5453004042b4f694612edae27e492e Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 15 Feb 2012 10:47:42 -0800
Subject: dt: add empty of_find_compatible_node function

Add empty of_find_compatible_node function for !CONFIG_OF build.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index a75a831e2057..92cf6ad35e0e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -281,6 +281,14 @@ static inline struct property *of_find_property(const struct device_node *np,
 	return NULL;
 }
 
+static inline struct device_node *of_find_compatible_node(
+						struct device_node *from,
+						const char *type,
+						const char *compat)
+{
+	return NULL;
+}
+
 static inline int of_property_read_u32_array(const struct device_node *np,
 					     const char *propname,
 					     u32 *out_values, size_t sz)
-- 
cgit v1.2.3


From 7d96b3e55ad45ebe4ff1a1daad27ac1fff8682ec Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Sun, 19 Feb 2012 18:29:11 +0400
Subject: percpu: fix generic definition of __this_cpu_add_and_return()

This patch adds missed "__" into function prefix.
Otherwise on all archectures (except x86) it expands to irq/preemtion-safe
variant: _this_cpu_generic_add_return(), which do extra irq-save/irq-restore.
Optimal generic implementation is __this_cpu_generic_add_return().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 32cd1f67462e..3b609eb9cd7d 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -718,7 +718,8 @@ do {									\
 # ifndef __this_cpu_add_return_8
 #  define __this_cpu_add_return_8(pcp, val)	__this_cpu_generic_add_return(pcp, val)
 # endif
-# define __this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+# define __this_cpu_add_return(pcp, val)	\
+	__pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
 #endif
 
 #define __this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
-- 
cgit v1.2.3


From e920d5971d706290c5a6281f719e16c25021f964 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 15 Feb 2012 16:54:38 +0800
Subject: percpu: use raw_local_irq_* in _this_cpu op

It doesn't make sense to trace irq off or do irq flags
lock proving inside 'this_cpu' operations, so replace local_irq_*
with raw_local_irq_* in 'this_cpu' op.

Also the patch fixes onelockdep warning[1] by the replacement, see
below:

In commit: 933393f58fef9963eac61db8093689544e29a600(percpu:
Remove irqsafe_cpu_xxx variants), local_irq_save/restore(flags) are
added inside this_cpu_inc operation, so that trace_hardirqs_off_caller
will be called by trace_hardirqs_on_caller directly because
__debug_atomic_inc is implemented as this_cpu_inc, which may trigger
the lockdep warning[1], for example in the below ARM scenary:

	kernel_thread_helper	/*irq disabled*/
		->trace_hardirqs_on_caller	/*hardirqs_enabled was set*/
			->trace_hardirqs_off_caller	/*hardirqs_enabled cleared*/
				__this_cpu_add(redundant_hardirqs_on)
			->trace_hardirqs_off_caller	/*irq disabled, so call here*/

The 'unannotated irqs-on' warning will be triggered somewhere because
irq is just enabled after the irq trace in kernel_thread_helper.

[1],
[    0.162841] ------------[ cut here ]------------
[    0.167694] WARNING: at kernel/lockdep.c:3493 check_flags+0xc0/0x1d0()
[    0.174468] Modules linked in:
[    0.177703] Backtrace:
[    0.180328] [<c00171f0>] (dump_backtrace+0x0/0x110) from [<c0412320>] (dump_stack+0x18/0x1c)
[    0.189086]  r6:c051f778 r5:00000da5 r4:00000000 r3:60000093
[    0.195007] [<c0412308>] (dump_stack+0x0/0x1c) from [<c00410e8>] (warn_slowpath_common+0x54/0x6c)
[    0.204223] [<c0041094>] (warn_slowpath_common+0x0/0x6c) from [<c0041124>] (warn_slowpath_null+0x24/0x2c)
[    0.214111]  r8:00000000 r7:00000000 r6:ee069598 r5:60000013 r4:ee082000
[    0.220825] r3:00000009
[    0.223693] [<c0041100>] (warn_slowpath_null+0x0/0x2c) from [<c0088f38>] (check_flags+0xc0/0x1d0)
[    0.232910] [<c0088e78>] (check_flags+0x0/0x1d0) from [<c008d348>] (lock_acquire+0x4c/0x11c)
[    0.241668] [<c008d2fc>] (lock_acquire+0x0/0x11c) from [<c0415aa4>] (_raw_spin_lock+0x3c/0x74)
[    0.250610] [<c0415a68>] (_raw_spin_lock+0x0/0x74) from [<c010a844>] (set_task_comm+0x20/0xc0)
[    0.259521]  r6:ee069588 r5:ee0691c0 r4:ee082000
[    0.264404] [<c010a824>] (set_task_comm+0x0/0xc0) from [<c0060780>] (kthreadd+0x28/0x108)
[    0.272857]  r8:00000000 r7:00000013 r6:c0044a08 r5:ee0691c0 r4:ee082000
[    0.279571] r3:ee083fe0
[    0.282470] [<c0060758>] (kthreadd+0x0/0x108) from [<c0044a08>] (do_exit+0x0/0x6dc)
[    0.290405]  r5:c0060758 r4:00000000
[    0.294189] ---[ end trace 1b75b31a2719ed1c ]---
[    0.299041] possible reason: unannotated irqs-on.
[    0.303955] irq event stamp: 5
[    0.307159] hardirqs last  enabled at (4): [<c001331c>] no_work_pending+0x8/0x2c
[    0.314880] hardirqs last disabled at (5): [<c0089b08>] trace_hardirqs_on_caller+0x60/0x26c
[    0.323547] softirqs last  enabled at (0): [<c003f754>] copy_process+0x33c/0xef4
[    0.331207] softirqs last disabled at (0): [<  (null)>]   (null)
[    0.337585] CPU0: thread -1, cpu 0, socket 0, mpidr 80000000

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3b609eb9cd7d..594c0040fdd8 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -348,9 +348,9 @@ do {									\
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	*__this_cpu_ptr(&(pcp)) op val;					\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 } while (0)
 
 #ifndef this_cpu_write
@@ -449,10 +449,10 @@ do {									\
 ({									\
 	typeof(pcp) ret__;						\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	__this_cpu_add(pcp, val);					\
 	ret__ = __this_cpu_read(pcp);					\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -479,10 +479,10 @@ do {									\
 #define _this_cpu_generic_xchg(pcp, nval)				\
 ({	typeof(pcp) ret__;						\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	ret__ = __this_cpu_read(pcp);					\
 	__this_cpu_write(pcp, nval);					\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -507,11 +507,11 @@ do {									\
 ({									\
 	typeof(pcp) ret__;						\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	ret__ = __this_cpu_read(pcp);					\
 	if (ret__ == (oval))						\
 		__this_cpu_write(pcp, nval);				\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
@@ -544,10 +544,10 @@ do {									\
 ({									\
 	int ret__;							\
 	unsigned long flags;						\
-	local_irq_save(flags);						\
+	raw_local_irq_save(flags);					\
 	ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
 			oval1, oval2, nval1, nval2);			\
-	local_irq_restore(flags);					\
+	raw_local_irq_restore(flags);					\
 	ret__;								\
 })
 
-- 
cgit v1.2.3


From 7e55d0527e4925a49464a5b26fdabae1f7a91a77 Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@st.com>
Date: Thu, 23 Feb 2012 04:41:05 +0100
Subject: ARM: 7339/1: amba/serial.h: Include types.h for resolving dependency
 of type bool

serial.h uses bool, but its definition is missing, as it doesn't include
types.h. Fix this by including types.h

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/serial.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 514ed45c462e..d117b29d1062 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -23,6 +23,8 @@
 #ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H
 #define ASM_ARM_HARDWARE_SERIAL_AMBA_H
 
+#include <linux/types.h>
+
 /* -------------------------------------------------------------------------------
  *  From AMBA UART (PL010) Block Specification
  * -------------------------------------------------------------------------------
-- 
cgit v1.2.3


From ecb971923614775a118bc05ad16b2bde450cac7d Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 27 Feb 2012 17:52:52 -0500
Subject: tcp: fix comment for tp->highest_sack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There was an off-by-one error in the comments describing the
highest_sack field in struct tcp_sock. The comments previously claimed
that it was the "start sequence of the highest skb with SACKed
bit". This commit fixes the comments to note that it is the "start
sequence of the skb just *after* the highest skb with SACKed bit".

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 3 ++-
 include/net/tcp.h   | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 46a85c9e1f25..3c7ffdb40dc6 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -412,7 +412,8 @@ struct tcp_sock {
 
 	struct tcp_sack_block recv_sack_cache[4];
 
-	struct sk_buff *highest_sack;   /* highest skb with SACK received
+	struct sk_buff *highest_sack;   /* skb just after the highest
+					 * skb with SACKed bit set
 					 * (validity guaranteed only if
 					 * sacked_out > 0)
 					 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 42c29bfbcee3..2d80c291fffb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1364,8 +1364,9 @@ static inline void tcp_push_pending_frames(struct sock *sk)
 	}
 }
 
-/* Start sequence of the highest skb with SACKed bit, valid only if
- * sacked > 0 or when the caller has ensured validity by itself.
+/* Start sequence of the skb just after the highest skb with SACKed
+ * bit, valid only if sacked_out > 0 or when the caller has ensured
+ * validity by itself.
  */
 static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
 {
-- 
cgit v1.2.3


From fe316bf2d5847bc5dd975668671a7b1067603bc7 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Fri, 2 Mar 2012 10:38:33 +0100
Subject: block: Fix NULL pointer dereference in sd_revalidate_disk

Since 2.6.39 (1196f8b), when a driver returns -ENOMEDIUM for open(),
__blkdev_get() calls rescan_partitions() to remove
in-kernel partition structures and raise KOBJ_CHANGE uevent.

However it ends up calling driver's revalidate_disk without open
and could cause oops.

In the case of SCSI:

  process A                  process B
  ----------------------------------------------
  sys_open
    __blkdev_get
      sd_open
        returns -ENOMEDIUM
                             scsi_remove_device
                               <scsi_device torn down>
      rescan_partitions
        sd_revalidate_disk
          <oops>
Oopses are reported here:
http://marc.info/?l=linux-scsi&m=132388619710052

This patch separates the partition invalidation from rescan_partitions()
and use it for -ENOMEDIUM case.

Reported-by: Huajun Li <huajun.li.lee@gmail.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fe23ee768589..e61d3192448e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf);
 
 extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
+extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
 						     int partno, sector_t start,
 						     sector_t len, int flags,
-- 
cgit v1.2.3


From 62d3c5439c534b0e6c653fc63e6d8c67be3a57b1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 2 Mar 2012 10:51:00 +0100
Subject: Block: use a freezable workqueue for disk-event polling

This patch (as1519) fixes a bug in the block layer's disk-events
polling.  The polling is done by a work routine queued on the
system_nrt_wq workqueue.  Since that workqueue isn't freezable, the
polling continues even in the middle of a system sleep transition.

Obviously, polling a suspended drive for media changes and such isn't
a good thing to do; in the case of USB mass-storage devices it can
lead to real problems requiring device resets and even re-enumeration.

The patch fixes things by creating a new system-wide, non-reentrant,
freezable workqueue and using it for disk-events polling.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: <stable@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/workqueue.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index eb8b9f15f2e0..af155450cabb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -289,12 +289,16 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
+ *
+ * system_nrt_freezable_wq is equivalent to system_nrt_wq except that
+ * it's freezable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
+extern struct workqueue_struct *system_nrt_freezable_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
-- 
cgit v1.2.3


From adb795062f89b8d67d295ee25e04034bccce6779 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Wed, 29 Feb 2012 00:41:12 +0400
Subject: percpu: fix __this_cpu_{sub,inc,dec}_return() definition

This patch adds missed "__" prefixes, otherwise these functions
works as irq/preemption safe.

Reported-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 594c0040fdd8..21638ae14e07 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -722,9 +722,9 @@ do {									\
 	__pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
 #endif
 
-#define __this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(val))
-#define __this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
-#define __this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
+#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(val))
+#define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
 
 #define __this_cpu_generic_xchg(pcp, nval)				\
 ({	typeof(pcp) ret__;						\
-- 
cgit v1.2.3


From 5483f18e986ed5267b923bec12b407845181350b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 4 Mar 2012 15:51:42 -0800
Subject: vfs: move dentry_cmp from <linux/dcache.h> to fs/dcache.c

It's only used inside fs/dcache.c, and we're going to play games with it
for the word-at-a-time patches.  This time we really don't even want to
export it, because it really is an internal function to fs/dcache.c, and
has been since it was introduced.

Having it in that extremely hot header file (it's included in pretty
much everything, thanks to <linux/fs.h>) is a disaster for testing
different versions, and is utterly pointless.

We really should have some kind of header file diet thing, where we
figure out which parts of header files are really better off private and
only result in more expensive compiles.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4270bedd2308..ff5f5256d175 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -47,26 +47,6 @@ struct dentry_stat_t {
 };
 extern struct dentry_stat_t dentry_stat;
 
-/*
- * Compare 2 name strings, return 0 if they match, otherwise non-zero.
- * The strings are both count bytes long, and count is non-zero.
- */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-				const unsigned char *ct, size_t tcount)
-{
-	if (scount != tcount)
-		return 1;
-
-	do {
-		if (*cs != *ct)
-			return 1;
-		cs++;
-		ct++;
-		tcount--;
-	} while (tcount);
-	return 0;
-}
-
 /* Name hashing routines. Initial hash value */
 /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
 #define init_name_hash()		0
-- 
cgit v1.2.3


From c22ab332902333f83766017478c1ef6607ace681 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Mon, 5 Mar 2012 14:59:10 -0800
Subject: kmsg_dump: don't run on non-error paths by default

Since commit 04c6862c055f ("kmsg_dump: add kmsg_dump() calls to the
reboot, halt, poweroff and emergency_restart paths"), kmsg_dump() gets
run on normal paths including poweroff and reboot.

This is less than ideal given pstore implementations that can only
represent single backtraces, since a reboot may overwrite a stored oops
before it's been picked up by userspace.  In addition, some pstore
backends may have low performance and provide a significant delay in
reboot as a result.

This patch adds a printk.always_kmsg_dump kernel parameter (which can also
be changed from userspace).  Without it, the code will only be run on
failure paths rather than on normal paths.  The option can be enabled in
environments where there's a desire to attempt to audit whether or not a
reboot was cleanly requested or not.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Marco Stornelli <marco.stornelli@gmail.com>
Cc: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmsg_dump.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index fee66317e071..35f7237ec972 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -15,13 +15,18 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 
+/*
+ * Keep this list arranged in rough order of priority. Anything listed after
+ * KMSG_DUMP_OOPS will not be logged by default unless printk.always_kmsg_dump
+ * is passed to the kernel.
+ */
 enum kmsg_dump_reason {
-	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
+	KMSG_DUMP_OOPS,
+	KMSG_DUMP_EMERG,
 	KMSG_DUMP_RESTART,
 	KMSG_DUMP_HALT,
 	KMSG_DUMP_POWEROFF,
-	KMSG_DUMP_EMERG,
 };
 
 /**
-- 
cgit v1.2.3


From c415c3b47ea2754659d915cca387a20999044163 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 5 Mar 2012 14:59:13 -0800
Subject: vfork: introduce complete_vfork_done()

No functional changes.

Move the clear-and-complete-vfork_done code into the new trivial helper,
complete_vfork_done().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6bfd88..1b25a37f2aee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,6 +2291,7 @@ extern int do_execve(const char *,
 		     const char __user * const __user *,
 		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+extern void complete_vfork_done(struct task_struct *tsk);
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
-- 
cgit v1.2.3


From d68b46fe16ad59b3a5f51ec73daaa5dc06753798 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 5 Mar 2012 14:59:13 -0800
Subject: vfork: make it killable

Make vfork() killable.

Change do_fork(CLONE_VFORK) to do wait_for_completion_killable().  If it
fails we do not return to the user-mode and never touch the memory shared
with our child.

However, in this case we should clear child->vfork_done before return, we
use task_lock() in do_fork()->wait_for_vfork_done() and
complete_vfork_done() to serialize with each other.

Note: now that we use task_lock() we don't really need completion, we
could turn task->vfork_done into "task_struct *wake_up_me" but this needs
some complications.

NOTE: this and the next patches do not affect in-kernel users of
CLONE_VFORK, kernel threads run with all signals ignored including
SIGKILL/SIGSTOP.

However this is obviously the user-visible change.  Not only a fatal
signal can kill the vforking parent, a sub-thread can do execve or
exit_group() and kill the thread sleeping in vfork().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1b25a37f2aee..b6467711f12e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2372,7 +2372,7 @@ static inline int thread_group_empty(struct task_struct *p)
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[].
+ * ->cgroup.subsys[]. And ->vfork_done.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
-- 
cgit v1.2.3


From 57b59c4a1400fa6c34764eab2e35a8762dc05a09 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 5 Mar 2012 14:59:13 -0800
Subject: coredump_wait: don't call complete_vfork_done()

Now that CLONE_VFORK is killable, coredump_wait() no longer needs
complete_vfork_done().  zap_threads() should find and kill all tasks with
the same ->mm, this includes our parent if ->vfork_done is set.

mm_release() becomes the only caller, unexport complete_vfork_done().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6467711f12e..11fcafaf4ae4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,7 +2291,6 @@ extern int do_execve(const char *,
 		     const char __user * const __user *,
 		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
-extern void complete_vfork_done(struct task_struct *tsk);
 struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
-- 
cgit v1.2.3


From 6e27f63edbd7ab893258e16500171dd1270a1369 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 5 Mar 2012 14:59:14 -0800
Subject: vfork: kill PF_STARTING

Previously it was (ab)used by utrace.  Then it was wrongly used by the
scheduler code.

Currently it is not used, kill it before it finds the new erroneous user.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11fcafaf4ae4..0657368bd78f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1777,7 +1777,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * Per process flags
  */
-#define PF_STARTING	0x00000002	/* being created */
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-- 
cgit v1.2.3


From 7512102cf64d36e3c7444480273623c7aab3563f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 5 Mar 2012 14:59:18 -0800
Subject: memcg: fix GPF when cgroup removal races with last exit

When moving tasks from old memcg (with move_charge_at_immigrate on new
memcg), followed by removal of old memcg, hit General Protection Fault in
mem_cgroup_lru_del_list() (called from release_pages called from
free_pages_and_swap_cache from tlb_flush_mmu from tlb_finish_mmu from
exit_mmap from mmput from exit_mm from do_exit).

Somewhat reproducible, takes a few hours: the old struct mem_cgroup has
been freed and poisoned by SLAB_DEBUG, but mem_cgroup_lru_del_list() is
still trying to update its stats, and take page off lru before freeing.

A task, or a charge, or a page on lru: each secures a memcg against
removal.  In this case, the last task has been moved out of the old memcg,
and it is exiting: anonymous pages are uncharged one by one from the
memcg, as they are zapped from its pagetables, so the charge gets down to
0; but the pages themselves are queued in an mmu_gather for freeing.

Most of those pages will be on lru (and force_empty is careful to
lru_add_drain_all, to add pages from pagevec to lru first), but not
necessarily all: perhaps some have been isolated for page reclaim, perhaps
some isolated for other reasons.  So, force_empty may find no task, no
charge and no page on lru, and let the removal proceed.

There would still be no problem if these pages were immediately freed; but
typically (and the put_page_testzero protocol demands it) they have to be
added back to lru before they are found freeable, then removed from lru
and freed.  We don't see the issue when adding, because the
mem_cgroup_iter() loops keep their own reference to the memcg being
scanned; but when it comes to mem_cgroup_lru_del_list().

I believe this was not an issue in v3.2: there, PageCgroupAcctLRU and
PageCgroupUsed flags were used (like a trick with mirrors) to deflect view
of pc->mem_cgroup to the stable root_mem_cgroup when neither set.
38c5d72f3ebe ("memcg: simplify LRU handling by new rule") mercifully
removed those convolutions, but left this General Protection Fault.

But it's surprisingly easy to restore the old behaviour: just check
PageCgroupUsed in mem_cgroup_lru_add_list() (which decides on which lruvec
to add), and reset pc to root_mem_cgroup if page is uncharged.  A risky
change?  just going back to how it worked before; testing, and an audit of
uses of pc->mem_cgroup, show no problem.

And there's a nice bonus: with mem_cgroup_lru_add_list() itself making
sure that an uncharged page goes to root lru, mem_cgroup_reset_owner() no
longer has any purpose, and we can safely revert 4e5f01c2b9b9 ("memcg:
clear pc->mem_cgroup if necessary").

Calling update_page_reclaim_stat() after add_page_to_lru_list() in swap.c
is not strictly necessary: the lru_lock there, with RCU before memcg
structures are freed, makes mem_cgroup_get_reclaim_stat_from_page safe
without that; but it seems cleaner to rely on one dependency less.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d34356fe644..b80de520670b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -129,7 +129,6 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
-extern void mem_cgroup_reset_owner(struct page *page);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
@@ -392,10 +391,6 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
 				struct page *newpage)
 {
 }
-
-static inline void mem_cgroup_reset_owner(struct page *page)
-{
-}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
-- 
cgit v1.2.3


From 5faa5df1fa2024bd750089ff21dcc4191798263d Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 6 Mar 2012 21:20:26 +0000
Subject: inetpeer: Invalidate the inetpeer tree along with the routing cache

We initialize the routing metrics with the values cached on the
inetpeer in rt_init_metrics(). So if we have the metrics cached on the
inetpeer, we ignore the user configured fib_metrics.

To fix this issue, we replace the old tree with a fresh initialized
inet_peer_base. The old tree is removed later with a delayed work queue.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 06b795dd5906..ff04a33acf00 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,6 +41,7 @@ struct inet_peer {
 	u32			pmtu_orig;
 	u32			pmtu_learned;
 	struct inetpeer_addr_base redirect_learned;
+	struct list_head	gc_list;
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
 	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
@@ -96,6 +97,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
+extern void inetpeer_invalidate_tree(int family);
+
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
-- 
cgit v1.2.3


From ac3f48de09d8f4b73397047e413fadff7f65cfa7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 6 Mar 2012 21:21:10 +0000
Subject: route: Remove redirect_genid

As we invalidate the inetpeer tree along with the routing cache now,
we don't need a genid to reset the redirect handling when the routing
cache is flushed.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index ff04a33acf00..b94765e38e80 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -35,7 +35,6 @@ struct inet_peer {
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
-	int			redirect_genid;
 	unsigned long		rate_last;
 	unsigned long		pmtu_expires;
 	u32			pmtu_orig;
-- 
cgit v1.2.3


From e34828298ec542294f4b798606ee73e462d322f5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 29 Feb 2012 22:16:13 +0900
Subject: sh: introduce sh_clk_ops in parallel with clk_ops

Introduce sh_clk_ops in parallel with clk_ops.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/sh_clk.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 54341d811685..706c005cd4ba 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -18,6 +18,8 @@ struct clk_mapping {
 	struct kref		ref;
 };
 
+#define sh_clk_ops clk_ops
+
 struct clk_ops {
 #ifdef CONFIG_SH_CLK_CPG_LEGACY
 	void (*init)(struct clk *clk);
-- 
cgit v1.2.3


From 84c36ffd7c580e1a63d7284e318670b082260118 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 29 Feb 2012 22:18:19 +0900
Subject: sh: remove clk_ops

Now when all clk_ops have been renamed it is
safe to rename clk_ops to sh_clk_ops.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/sh_clk.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 706c005cd4ba..0a9d8f2ac519 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -18,9 +18,8 @@ struct clk_mapping {
 	struct kref		ref;
 };
 
-#define sh_clk_ops clk_ops
 
-struct clk_ops {
+struct sh_clk_ops {
 #ifdef CONFIG_SH_CLK_CPG_LEGACY
 	void (*init)(struct clk *clk);
 #endif
@@ -39,7 +38,7 @@ struct clk {
 	unsigned short		parent_num;	/* choose between */
 	unsigned char		src_shift;	/* source clock field in the */
 	unsigned char		src_width;	/* configuration register */
-	struct clk_ops		*ops;
+	struct sh_clk_ops	*ops;
 
 	struct list_head	children;
 	struct list_head	sibling;	/* node for children */
-- 
cgit v1.2.3