From d705ae6b133f9f6a8beee617b1224b6a5c99c5da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Feb 2012 09:45:49 +0100 Subject: block: replace icq->changed with icq->flags icq->changed was used for ICQ_*_CHANGED bits. Rename it to flags and access it under ioc->lock instead of using atomic bitops. ioc_get_changed() is added so that the changed part can be fetched and cleared as before. icq->flags will be used to carry other flags. Signed-off-by: Tejun Heo Tested-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 119773eebe31..17839c7b9614 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -6,8 +6,10 @@ #include enum { - ICQ_IOPRIO_CHANGED, - ICQ_CGROUP_CHANGED, + ICQ_IOPRIO_CHANGED = 1 << 0, + ICQ_CGROUP_CHANGED = 1 << 1, + + ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, }; /* @@ -88,7 +90,7 @@ struct io_cq { struct rcu_head __rcu_head; }; - unsigned long changed; + unsigned int flags; }; /* @@ -139,6 +141,7 @@ struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); void ioc_ioprio_changed(struct io_context *ioc, int ioprio); void ioc_cgroup_changed(struct io_context *ioc); +unsigned int icq_get_changed(struct io_cq *icq); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } -- cgit v1.2.3 From 621032ad6eaabf2fe771c4fa0d8f58e1fcfcdba6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Feb 2012 09:45:53 +0100 Subject: block: exit_io_context() should call elevator_exit_icq_fn() While updating locking, b2efa05265 "block, cfq: unlink cfq_io_context's immediately" moved elevator_exit_icq_fn() invocation from exit_io_context() to the final ioc put. While this doesn't cause catastrophic failure, it effectively removes task exit notification to elevator and cause noticeable IO performance degradation with CFQ. On task exit, CFQ used to immediately expire the slice if it was being used by the exiting task as no more IO would be issued by the task; however, after b2efa05265, the notification is lost and disk could sit idle needlessly, leading to noticeable IO performance degradation for certain workloads. This patch renames ioc_exit_icq() to ioc_destroy_icq(), separates elevator_exit_icq_fn() invocation into ioc_exit_icq() and invokes it from exit_io_context(). ICQ_EXITED flag is added to avoid invoking the callback more than once for the same icq. Walking icq_list from ioc side and invoking elevator callback requires reverse double locking. This may be better implemented using RCU; unfortunately, using RCU isn't trivial. e.g. RCU protection would need to cover request_queue and queue_lock switch on cleanup makes grabbing queue_lock from RCU unsafe. Reverse double locking should do, at least for now. Signed-off-by: Tejun Heo Reported-and-bisected-by: Shaohua Li LKML-Reference: Tested-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 17839c7b9614..1a3018063034 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -8,6 +8,7 @@ enum { ICQ_IOPRIO_CHANGED = 1 << 0, ICQ_CGROUP_CHANGED = 1 << 1, + ICQ_EXITED = 1 << 2, ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, }; -- cgit v1.2.3 From 2261cc627f5453004042b4f694612edae27e492e Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 15 Feb 2012 10:47:42 -0800 Subject: dt: add empty of_find_compatible_node function Add empty of_find_compatible_node function for !CONFIG_OF build. Signed-off-by: Shawn Guo Signed-off-by: Grant Likely --- include/linux/of.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index a75a831e2057..92cf6ad35e0e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -281,6 +281,14 @@ static inline struct property *of_find_property(const struct device_node *np, return NULL; } +static inline struct device_node *of_find_compatible_node( + struct device_node *from, + const char *type, + const char *compat) +{ + return NULL; +} + static inline int of_property_read_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz) -- cgit v1.2.3 From 7d96b3e55ad45ebe4ff1a1daad27ac1fff8682ec Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 19 Feb 2012 18:29:11 +0400 Subject: percpu: fix generic definition of __this_cpu_add_and_return() This patch adds missed "__" into function prefix. Otherwise on all archectures (except x86) it expands to irq/preemtion-safe variant: _this_cpu_generic_add_return(), which do extra irq-save/irq-restore. Optimal generic implementation is __this_cpu_generic_add_return(). Signed-off-by: Konstantin Khlebnikov Acked-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 32cd1f67462e..3b609eb9cd7d 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -718,7 +718,8 @@ do { \ # ifndef __this_cpu_add_return_8 # define __this_cpu_add_return_8(pcp, val) __this_cpu_generic_add_return(pcp, val) # endif -# define __this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +# define __this_cpu_add_return(pcp, val) \ + __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) #endif #define __this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) -- cgit v1.2.3 From e920d5971d706290c5a6281f719e16c25021f964 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 15 Feb 2012 16:54:38 +0800 Subject: percpu: use raw_local_irq_* in _this_cpu op It doesn't make sense to trace irq off or do irq flags lock proving inside 'this_cpu' operations, so replace local_irq_* with raw_local_irq_* in 'this_cpu' op. Also the patch fixes onelockdep warning[1] by the replacement, see below: In commit: 933393f58fef9963eac61db8093689544e29a600(percpu: Remove irqsafe_cpu_xxx variants), local_irq_save/restore(flags) are added inside this_cpu_inc operation, so that trace_hardirqs_off_caller will be called by trace_hardirqs_on_caller directly because __debug_atomic_inc is implemented as this_cpu_inc, which may trigger the lockdep warning[1], for example in the below ARM scenary: kernel_thread_helper /*irq disabled*/ ->trace_hardirqs_on_caller /*hardirqs_enabled was set*/ ->trace_hardirqs_off_caller /*hardirqs_enabled cleared*/ __this_cpu_add(redundant_hardirqs_on) ->trace_hardirqs_off_caller /*irq disabled, so call here*/ The 'unannotated irqs-on' warning will be triggered somewhere because irq is just enabled after the irq trace in kernel_thread_helper. [1], [ 0.162841] ------------[ cut here ]------------ [ 0.167694] WARNING: at kernel/lockdep.c:3493 check_flags+0xc0/0x1d0() [ 0.174468] Modules linked in: [ 0.177703] Backtrace: [ 0.180328] [] (dump_backtrace+0x0/0x110) from [] (dump_stack+0x18/0x1c) [ 0.189086] r6:c051f778 r5:00000da5 r4:00000000 r3:60000093 [ 0.195007] [] (dump_stack+0x0/0x1c) from [] (warn_slowpath_common+0x54/0x6c) [ 0.204223] [] (warn_slowpath_common+0x0/0x6c) from [] (warn_slowpath_null+0x24/0x2c) [ 0.214111] r8:00000000 r7:00000000 r6:ee069598 r5:60000013 r4:ee082000 [ 0.220825] r3:00000009 [ 0.223693] [] (warn_slowpath_null+0x0/0x2c) from [] (check_flags+0xc0/0x1d0) [ 0.232910] [] (check_flags+0x0/0x1d0) from [] (lock_acquire+0x4c/0x11c) [ 0.241668] [] (lock_acquire+0x0/0x11c) from [] (_raw_spin_lock+0x3c/0x74) [ 0.250610] [] (_raw_spin_lock+0x0/0x74) from [] (set_task_comm+0x20/0xc0) [ 0.259521] r6:ee069588 r5:ee0691c0 r4:ee082000 [ 0.264404] [] (set_task_comm+0x0/0xc0) from [] (kthreadd+0x28/0x108) [ 0.272857] r8:00000000 r7:00000013 r6:c0044a08 r5:ee0691c0 r4:ee082000 [ 0.279571] r3:ee083fe0 [ 0.282470] [] (kthreadd+0x0/0x108) from [] (do_exit+0x0/0x6dc) [ 0.290405] r5:c0060758 r4:00000000 [ 0.294189] ---[ end trace 1b75b31a2719ed1c ]--- [ 0.299041] possible reason: unannotated irqs-on. [ 0.303955] irq event stamp: 5 [ 0.307159] hardirqs last enabled at (4): [] no_work_pending+0x8/0x2c [ 0.314880] hardirqs last disabled at (5): [] trace_hardirqs_on_caller+0x60/0x26c [ 0.323547] softirqs last enabled at (0): [] copy_process+0x33c/0xef4 [ 0.331207] softirqs last disabled at (0): [< (null)>] (null) [ 0.337585] CPU0: thread -1, cpu 0, socket 0, mpidr 80000000 Acked-by: Christoph Lameter Signed-off-by: Ming Lei Signed-off-by: Tejun Heo --- include/linux/percpu.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3b609eb9cd7d..594c0040fdd8 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -348,9 +348,9 @@ do { \ #define _this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long flags; \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ *__this_cpu_ptr(&(pcp)) op val; \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ } while (0) #ifndef this_cpu_write @@ -449,10 +449,10 @@ do { \ ({ \ typeof(pcp) ret__; \ unsigned long flags; \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ __this_cpu_add(pcp, val); \ ret__ = __this_cpu_read(pcp); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ ret__; \ }) @@ -479,10 +479,10 @@ do { \ #define _this_cpu_generic_xchg(pcp, nval) \ ({ typeof(pcp) ret__; \ unsigned long flags; \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ ret__ = __this_cpu_read(pcp); \ __this_cpu_write(pcp, nval); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ ret__; \ }) @@ -507,11 +507,11 @@ do { \ ({ \ typeof(pcp) ret__; \ unsigned long flags; \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ ret__ = __this_cpu_read(pcp); \ if (ret__ == (oval)) \ __this_cpu_write(pcp, nval); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ ret__; \ }) @@ -544,10 +544,10 @@ do { \ ({ \ int ret__; \ unsigned long flags; \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ ret__; \ }) -- cgit v1.2.3 From 7e55d0527e4925a49464a5b26fdabae1f7a91a77 Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Thu, 23 Feb 2012 04:41:05 +0100 Subject: ARM: 7339/1: amba/serial.h: Include types.h for resolving dependency of type bool serial.h uses bool, but its definition is missing, as it doesn't include types.h. Fix this by including types.h Signed-off-by: Viresh Kumar Signed-off-by: Russell King --- include/linux/amba/serial.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 514ed45c462e..d117b29d1062 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -23,6 +23,8 @@ #ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H #define ASM_ARM_HARDWARE_SERIAL_AMBA_H +#include + /* ------------------------------------------------------------------------------- * From AMBA UART (PL010) Block Specification * ------------------------------------------------------------------------------- -- cgit v1.2.3 From ecb971923614775a118bc05ad16b2bde450cac7d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 27 Feb 2012 17:52:52 -0500 Subject: tcp: fix comment for tp->highest_sack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was an off-by-one error in the comments describing the highest_sack field in struct tcp_sock. The comments previously claimed that it was the "start sequence of the highest skb with SACKed bit". This commit fixes the comments to note that it is the "start sequence of the skb just *after* the highest skb with SACKed bit". Signed-off-by: Neal Cardwell Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 46a85c9e1f25..3c7ffdb40dc6 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -412,7 +412,8 @@ struct tcp_sock { struct tcp_sack_block recv_sack_cache[4]; - struct sk_buff *highest_sack; /* highest skb with SACK received + struct sk_buff *highest_sack; /* skb just after the highest + * skb with SACKed bit set * (validity guaranteed only if * sacked_out > 0) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 42c29bfbcee3..2d80c291fffb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1364,8 +1364,9 @@ static inline void tcp_push_pending_frames(struct sock *sk) } } -/* Start sequence of the highest skb with SACKed bit, valid only if - * sacked > 0 or when the caller has ensured validity by itself. +/* Start sequence of the skb just after the highest skb with SACKed + * bit, valid only if sacked_out > 0 or when the caller has ensured + * validity by itself. */ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) { -- cgit v1.2.3 From fe316bf2d5847bc5dd975668671a7b1067603bc7 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Fri, 2 Mar 2012 10:38:33 +0100 Subject: block: Fix NULL pointer dereference in sd_revalidate_disk Since 2.6.39 (1196f8b), when a driver returns -ENOMEDIUM for open(), __blkdev_get() calls rescan_partitions() to remove in-kernel partition structures and raise KOBJ_CHANGE uevent. However it ends up calling driver's revalidate_disk without open and could cause oops. In the case of SCSI: process A process B ---------------------------------------------- sys_open __blkdev_get sd_open returns -ENOMEDIUM scsi_remove_device rescan_partitions sd_revalidate_disk Oopses are reported here: http://marc.info/?l=linux-scsi&m=132388619710052 This patch separates the partition invalidation from rescan_partitions() and use it for -ENOMEDIUM case. Reported-by: Huajun Li Signed-off-by: Jun'ichi Nomura Acked-by: Tejun Heo Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fe23ee768589..e61d3192448e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); +extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, -- cgit v1.2.3 From 62d3c5439c534b0e6c653fc63e6d8c67be3a57b1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 2 Mar 2012 10:51:00 +0100 Subject: Block: use a freezable workqueue for disk-event polling This patch (as1519) fixes a bug in the block layer's disk-events polling. The polling is done by a work routine queued on the system_nrt_wq workqueue. Since that workqueue isn't freezable, the polling continues even in the middle of a system sleep transition. Obviously, polling a suspended drive for media changes and such isn't a good thing to do; in the case of USB mass-storage devices it can lead to real problems requiring device resets and even re-enumeration. The patch fixes things by creating a new system-wide, non-reentrant, freezable workqueue and using it for disk-events polling. Signed-off-by: Alan Stern CC: Acked-by: Tejun Heo Acked-by: Rafael J. Wysocki Signed-off-by: Jens Axboe --- include/linux/workqueue.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index eb8b9f15f2e0..af155450cabb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -289,12 +289,16 @@ enum { * * system_freezable_wq is equivalent to system_wq except that it's * freezable. + * + * system_nrt_freezable_wq is equivalent to system_nrt_wq except that + * it's freezable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; +extern struct workqueue_struct *system_nrt_freezable_wq; extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, -- cgit v1.2.3 From adb795062f89b8d67d295ee25e04034bccce6779 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 29 Feb 2012 00:41:12 +0400 Subject: percpu: fix __this_cpu_{sub,inc,dec}_return() definition This patch adds missed "__" prefixes, otherwise these functions works as irq/preemption safe. Reported-by: Torsten Kaiser Signed-off-by: Konstantin Khlebnikov Signed-off-by: Tejun Heo --- include/linux/percpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 594c0040fdd8..21638ae14e07 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -722,9 +722,9 @@ do { \ __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) -#define __this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) -#define __this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(val)) +#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) #define __this_cpu_generic_xchg(pcp, nval) \ ({ typeof(pcp) ret__; \ -- cgit v1.2.3 From 5483f18e986ed5267b923bec12b407845181350b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Mar 2012 15:51:42 -0800 Subject: vfs: move dentry_cmp from to fs/dcache.c It's only used inside fs/dcache.c, and we're going to play games with it for the word-at-a-time patches. This time we really don't even want to export it, because it really is an internal function to fs/dcache.c, and has been since it was introduced. Having it in that extremely hot header file (it's included in pretty much everything, thanks to ) is a disaster for testing different versions, and is utterly pointless. We really should have some kind of header file diet thing, where we figure out which parts of header files are really better off private and only result in more expensive compiles. Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4270bedd2308..ff5f5256d175 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -47,26 +47,6 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; -/* - * Compare 2 name strings, return 0 if they match, otherwise non-zero. - * The strings are both count bytes long, and count is non-zero. - */ -static inline int dentry_cmp(const unsigned char *cs, size_t scount, - const unsigned char *ct, size_t tcount) -{ - if (scount != tcount) - return 1; - - do { - if (*cs != *ct) - return 1; - cs++; - ct++; - tcount--; - } while (tcount); - return 0; -} - /* Name hashing routines. Initial hash value */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash() 0 -- cgit v1.2.3 From c22ab332902333f83766017478c1ef6607ace681 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 5 Mar 2012 14:59:10 -0800 Subject: kmsg_dump: don't run on non-error paths by default Since commit 04c6862c055f ("kmsg_dump: add kmsg_dump() calls to the reboot, halt, poweroff and emergency_restart paths"), kmsg_dump() gets run on normal paths including poweroff and reboot. This is less than ideal given pstore implementations that can only represent single backtraces, since a reboot may overwrite a stored oops before it's been picked up by userspace. In addition, some pstore backends may have low performance and provide a significant delay in reboot as a result. This patch adds a printk.always_kmsg_dump kernel parameter (which can also be changed from userspace). Without it, the code will only be run on failure paths rather than on normal paths. The option can be enabled in environments where there's a desire to attempt to audit whether or not a reboot was cleanly requested or not. Signed-off-by: Matthew Garrett Acked-by: Seiji Aguchi Cc: Seiji Aguchi Cc: David Woodhouse Cc: Marco Stornelli Cc: Artem Bityutskiy Cc: KOSAKI Motohiro Cc: Vivek Goyal Cc: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index fee66317e071..35f7237ec972 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -15,13 +15,18 @@ #include #include +/* + * Keep this list arranged in rough order of priority. Anything listed after + * KMSG_DUMP_OOPS will not be logged by default unless printk.always_kmsg_dump + * is passed to the kernel. + */ enum kmsg_dump_reason { - KMSG_DUMP_OOPS, KMSG_DUMP_PANIC, + KMSG_DUMP_OOPS, + KMSG_DUMP_EMERG, KMSG_DUMP_RESTART, KMSG_DUMP_HALT, KMSG_DUMP_POWEROFF, - KMSG_DUMP_EMERG, }; /** -- cgit v1.2.3 From c415c3b47ea2754659d915cca387a20999044163 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: vfork: introduce complete_vfork_done() No functional changes. Move the clear-and-complete-vfork_done code into the new trivial helper, complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d379a6bfd88..1b25a37f2aee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,6 +2291,7 @@ extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); +extern void complete_vfork_done(struct task_struct *tsk); struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); -- cgit v1.2.3 From d68b46fe16ad59b3a5f51ec73daaa5dc06753798 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: vfork: make it killable Make vfork() killable. Change do_fork(CLONE_VFORK) to do wait_for_completion_killable(). If it fails we do not return to the user-mode and never touch the memory shared with our child. However, in this case we should clear child->vfork_done before return, we use task_lock() in do_fork()->wait_for_vfork_done() and complete_vfork_done() to serialize with each other. Note: now that we use task_lock() we don't really need completion, we could turn task->vfork_done into "task_struct *wake_up_me" but this needs some complications. NOTE: this and the next patches do not affect in-kernel users of CLONE_VFORK, kernel threads run with all signals ignored including SIGKILL/SIGSTOP. However this is obviously the user-visible change. Not only a fatal signal can kill the vforking parent, a sub-thread can do execve or exit_group() and kill the thread sleeping in vfork(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1b25a37f2aee..b6467711f12e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2372,7 +2372,7 @@ static inline int thread_group_empty(struct task_struct *p) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. + * ->cgroup.subsys[]. And ->vfork_done. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), -- cgit v1.2.3 From 57b59c4a1400fa6c34764eab2e35a8762dc05a09 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: coredump_wait: don't call complete_vfork_done() Now that CLONE_VFORK is killable, coredump_wait() no longer needs complete_vfork_done(). zap_threads() should find and kill all tasks with the same ->mm, this includes our parent if ->vfork_done is set. mm_release() becomes the only caller, unexport complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b6467711f12e..11fcafaf4ae4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,7 +2291,6 @@ extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -extern void complete_vfork_done(struct task_struct *tsk); struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); -- cgit v1.2.3 From 6e27f63edbd7ab893258e16500171dd1270a1369 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:14 -0800 Subject: vfork: kill PF_STARTING Previously it was (ab)used by utrace. Then it was wrongly used by the scheduler code. Currently it is not used, kill it before it finds the new erroneous user. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 11fcafaf4ae4..0657368bd78f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1777,7 +1777,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -- cgit v1.2.3 From 7512102cf64d36e3c7444480273623c7aab3563f Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 5 Mar 2012 14:59:18 -0800 Subject: memcg: fix GPF when cgroup removal races with last exit When moving tasks from old memcg (with move_charge_at_immigrate on new memcg), followed by removal of old memcg, hit General Protection Fault in mem_cgroup_lru_del_list() (called from release_pages called from free_pages_and_swap_cache from tlb_flush_mmu from tlb_finish_mmu from exit_mmap from mmput from exit_mm from do_exit). Somewhat reproducible, takes a few hours: the old struct mem_cgroup has been freed and poisoned by SLAB_DEBUG, but mem_cgroup_lru_del_list() is still trying to update its stats, and take page off lru before freeing. A task, or a charge, or a page on lru: each secures a memcg against removal. In this case, the last task has been moved out of the old memcg, and it is exiting: anonymous pages are uncharged one by one from the memcg, as they are zapped from its pagetables, so the charge gets down to 0; but the pages themselves are queued in an mmu_gather for freeing. Most of those pages will be on lru (and force_empty is careful to lru_add_drain_all, to add pages from pagevec to lru first), but not necessarily all: perhaps some have been isolated for page reclaim, perhaps some isolated for other reasons. So, force_empty may find no task, no charge and no page on lru, and let the removal proceed. There would still be no problem if these pages were immediately freed; but typically (and the put_page_testzero protocol demands it) they have to be added back to lru before they are found freeable, then removed from lru and freed. We don't see the issue when adding, because the mem_cgroup_iter() loops keep their own reference to the memcg being scanned; but when it comes to mem_cgroup_lru_del_list(). I believe this was not an issue in v3.2: there, PageCgroupAcctLRU and PageCgroupUsed flags were used (like a trick with mirrors) to deflect view of pc->mem_cgroup to the stable root_mem_cgroup when neither set. 38c5d72f3ebe ("memcg: simplify LRU handling by new rule") mercifully removed those convolutions, but left this General Protection Fault. But it's surprisingly easy to restore the old behaviour: just check PageCgroupUsed in mem_cgroup_lru_add_list() (which decides on which lruvec to add), and reset pc to root_mem_cgroup if page is uncharged. A risky change? just going back to how it worked before; testing, and an audit of uses of pc->mem_cgroup, show no problem. And there's a nice bonus: with mem_cgroup_lru_add_list() itself making sure that an uncharged page goes to root lru, mem_cgroup_reset_owner() no longer has any purpose, and we can safely revert 4e5f01c2b9b9 ("memcg: clear pc->mem_cgroup if necessary"). Calling update_page_reclaim_stat() after add_page_to_lru_list() in swap.c is not strictly necessary: the lru_lock there, with RCU before memcg structures are freed, makes mem_cgroup_get_reclaim_stat_from_page safe without that; but it seems cleaner to rely on one dependency less. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Johannes Weiner Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4d34356fe644..b80de520670b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -129,7 +129,6 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); -extern void mem_cgroup_reset_owner(struct page *page); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif @@ -392,10 +391,6 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage) { } - -static inline void mem_cgroup_reset_owner(struct page *page) -{ -} #endif /* CONFIG_CGROUP_MEM_CONT */ #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM) -- cgit v1.2.3 From 5faa5df1fa2024bd750089ff21dcc4191798263d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 6 Mar 2012 21:20:26 +0000 Subject: inetpeer: Invalidate the inetpeer tree along with the routing cache We initialize the routing metrics with the values cached on the inetpeer in rt_init_metrics(). So if we have the metrics cached on the inetpeer, we ignore the user configured fib_metrics. To fix this issue, we replace the old tree with a fresh initialized inet_peer_base. The old tree is removed later with a delayed work queue. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/inetpeer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 06b795dd5906..ff04a33acf00 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -41,6 +41,7 @@ struct inet_peer { u32 pmtu_orig; u32 pmtu_learned; struct inetpeer_addr_base redirect_learned; + struct list_head gc_list; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp @@ -96,6 +97,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); +extern void inetpeer_invalidate_tree(int family); + /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, * tcp_ts_stamp if no refcount is taken on inet_peer -- cgit v1.2.3 From ac3f48de09d8f4b73397047e413fadff7f65cfa7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 6 Mar 2012 21:21:10 +0000 Subject: route: Remove redirect_genid As we invalidate the inetpeer tree along with the routing cache now, we don't need a genid to reset the redirect handling when the routing cache is flushed. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/inetpeer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index ff04a33acf00..b94765e38e80 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -35,7 +35,6 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ - int redirect_genid; unsigned long rate_last; unsigned long pmtu_expires; u32 pmtu_orig; -- cgit v1.2.3 From e34828298ec542294f4b798606ee73e462d322f5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 29 Feb 2012 22:16:13 +0900 Subject: sh: introduce sh_clk_ops in parallel with clk_ops Introduce sh_clk_ops in parallel with clk_ops. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- include/linux/sh_clk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 54341d811685..706c005cd4ba 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -18,6 +18,8 @@ struct clk_mapping { struct kref ref; }; +#define sh_clk_ops clk_ops + struct clk_ops { #ifdef CONFIG_SH_CLK_CPG_LEGACY void (*init)(struct clk *clk); -- cgit v1.2.3 From 84c36ffd7c580e1a63d7284e318670b082260118 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 29 Feb 2012 22:18:19 +0900 Subject: sh: remove clk_ops Now when all clk_ops have been renamed it is safe to rename clk_ops to sh_clk_ops. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- include/linux/sh_clk.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 706c005cd4ba..0a9d8f2ac519 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -18,9 +18,8 @@ struct clk_mapping { struct kref ref; }; -#define sh_clk_ops clk_ops -struct clk_ops { +struct sh_clk_ops { #ifdef CONFIG_SH_CLK_CPG_LEGACY void (*init)(struct clk *clk); #endif @@ -39,7 +38,7 @@ struct clk { unsigned short parent_num; /* choose between */ unsigned char src_shift; /* source clock field in the */ unsigned char src_width; /* configuration register */ - struct clk_ops *ops; + struct sh_clk_ops *ops; struct list_head children; struct list_head sibling; /* node for children */ -- cgit v1.2.3