From 478b360a47b71f3b5030eacd3aae6acb1a32c2b6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 15 Feb 2014 23:48:45 +0100 Subject: netfilter: nf_tables: fix nf_trace always-on with XT_TRACE=n When using nftables with CONFIG_NETFILTER_XT_TARGET_TRACE=n, we get lots of "TRACE: filter:output:policy:1 IN=..." warnings as several places will leave skb->nf_trace uninitialised. Unlike iptables tracing functionality is not conditional in nftables, so always copy/zero nf_trace setting when nftables is enabled. Move this into __nf_copy() helper. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af8cbf..d40d40b2915b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2725,7 +2725,7 @@ static inline void nf_reset(struct sk_buff *skb) static inline void nf_reset_trace(struct sk_buff *skb) { -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } @@ -2742,6 +2742,9 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); #endif +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) + dst->nf_trace = src->nf_trace; +#endif } static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) -- cgit v1.2.3 From 994c41ee0ac875797b4dfef509ac7753e2649b4d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 30 Jan 2014 13:17:20 +0200 Subject: ARM: OMAP2+: clock: fix clkoutx2 with CLK_SET_RATE_PARENT If CLK_SET_RATE_PARENT is set for a clkoutx2 clock, calling clk_set_rate() on the clock "skips" the x2 multiplier as there are no set_rate and round_rate functions defined for the clkoutx2. This results in getting double the requested clock rates, breaking the display on omap3430 based devices. This got broken when d0f58bd3bba3877fb1af4664c4e33273d36f00e4 and related patches were merged for v3.14, as omapdss driver now relies more on the clk-framework and CLK_SET_RATE_PARENT. This patch implements set_rate and round_rate for clkoutx2. Tested on OMAP3430, OMAP3630, OMAP4460. Signed-off-by: Tomi Valkeinen Acked-by: Tero Kristo Signed-off-by: Paul Walmsley --- include/linux/clk/ti.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 092b64168d7f..4a21a872dbbd 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -245,6 +245,10 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, void omap2_init_clk_clkdm(struct clk_hw *clk); unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate); int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); -- cgit v1.2.3 From ee5c23176fcc820f7a56d3e86001532af0d59b1e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 13:33:24 +0100 Subject: xfrm: Clone states properly on migration We loose a lot of information of the original state if we clone it with xfrm_state_clone(). In particular, there is no crypto algorithm attached if the original state uses an aead algorithm. This patch add the missing information to the clone state. Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afa5730fb3bd..fb5654a8ca3c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1648,6 +1648,11 @@ static inline int xfrm_aevent_is_on(struct net *net) } #endif +static inline int aead_len(struct xfrm_algo_aead *alg) +{ + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + static inline int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); @@ -1686,6 +1691,12 @@ static inline int xfrm_replay_clone(struct xfrm_state *x, return 0; } +static inline struct xfrm_algo_aead *xfrm_algo_aead_clone(struct xfrm_algo_aead *orig) +{ + return kmemdup(orig, aead_len(orig), GFP_KERNEL); +} + + static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig) { return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL); -- cgit v1.2.3 From c7fbd4158433c1c910c62850247728cb05a94e42 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Thu, 20 Feb 2014 10:49:34 +0200 Subject: clk: tegra124: remove gr2d and gr3d clocks Tegra124 does not have gr2d and gr3d clocks. They have been replaced by the vic03 and gpu clocks respectively. Signed-off-by: Peter De Schrijver --- include/dt-bindings/clock/tegra124-car.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra124-car.h b/include/dt-bindings/clock/tegra124-car.h index a1116a3b54ef..8c1603b10665 100644 --- a/include/dt-bindings/clock/tegra124-car.h +++ b/include/dt-bindings/clock/tegra124-car.h @@ -36,10 +36,10 @@ #define TEGRA124_CLK_PWM 17 #define TEGRA124_CLK_I2S2 18 /* 20 (register bit affects vi and vi_sensor) */ -#define TEGRA124_CLK_GR_2D 21 +/* 21 */ #define TEGRA124_CLK_USBD 22 #define TEGRA124_CLK_ISP 23 -#define TEGRA124_CLK_GR_3D 24 +/* 26 */ /* 25 */ #define TEGRA124_CLK_DISP2 26 #define TEGRA124_CLK_DISP1 27 -- cgit v1.2.3 From cf71d2bc0b8a473209d5c770ce560853bd720d14 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 20 Feb 2014 10:19:31 +0100 Subject: sit: fix panic with route cache in ip tunnels Bug introduced by commit 7d442fab0a67 ("ipv4: Cache dst in tunnels"). Because sit code does not call ip_tunnel_init(), the dst_cache was not initialized. CC: Tom Herbert Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 48ed75c21260..e77c10405d51 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -129,6 +129,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); void ip_tunnel_setup(struct net_device *dev, int net_id); +void ip_tunnel_dst_reset_all(struct ip_tunnel *t); /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, -- cgit v1.2.3 From feb71dae1f9e0aeb056f7f639a21e620d327fc66 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:37 -0800 Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request It's almost identical to blk_mq_insert_request, so fold the two into one slightly more generic function by making the flush special case a bit smarted. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 18ba8a627f46..ff28fe37ddda 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -121,8 +121,7 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, - bool, bool); +void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v1.2.3 From d6a25b31315327eef7785b895c354cc45c3f3742 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:38 -0800 Subject: blk-mq: support partial I/O completions Add a new blk_mq_end_io_partial function to partially complete requests as needed by the SCSI layer. We do this by reusing blk_update_request to advance the bio instead of having a simplified version of it in the blk-mq code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff28fe37ddda..2ff2e8d982be 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -133,7 +133,13 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); -void blk_mq_end_io(struct request *rq, int error); +bool blk_mq_end_io_partial(struct request *rq, int error, + unsigned int nr_bytes); +static inline void blk_mq_end_io(struct request *rq, int error) +{ + bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); + BUG_ON(!done); +} void blk_mq_complete_request(struct request *rq); -- cgit v1.2.3 From f5ddcbbb40aa0ba7fbfe22355d287603dbeeaaac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2014 10:09:18 -0800 Subject: net-tcp: fastopen: fix high order allocations This patch fixes two bugs in fastopen : 1) The tcp_sendmsg(..., @size) argument was ignored. Code was relying on user not fooling the kernel with iovec mismatches 2) When MTU is about 64KB, tcp_send_syn_data() attempts order-5 allocations, which are likely to fail when memory gets fragmented. Fixes: 783237e8daf13 ("net-tcp: Fast Open client - sending SYN-data") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Tested-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 56fc366da6d5..8c4dd63134d4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1303,7 +1303,8 @@ struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; struct msghdr *data; /* data in MSG_FASTOPEN */ - u16 copied; /* queued in tcp_connect() */ + size_t size; + int copied; /* queued in tcp_connect() */ }; void tcp_free_fastopen_req(struct tcp_sock *tp); -- cgit v1.2.3 From b7e63a1079b266866a732cf699d8c4d61391bbda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Feb 2014 11:19:14 -0800 Subject: NFSv4: Fix another nfs4_sequence corruptor nfs4_release_lockowner needs to set the rpc_message reply to point to the nfs4_sequence_res in order to avoid another Oopsable situation in nfs41_assign_slot. Fixes: fbd4bfd1d9d21 (NFS: Add nfs4_sequence calls for RELEASE_LOCKOWNER) Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b2fb167b2e6d..5624e4e2763c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -467,9 +467,14 @@ struct nfs_lockt_res { }; struct nfs_release_lockowner_args { + struct nfs4_sequence_args seq_args; struct nfs_lowner lock_owner; }; +struct nfs_release_lockowner_res { + struct nfs4_sequence_res seq_res; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; -- cgit v1.2.3 From 45ab2813d40d88fc575e753c38478de242d03f88 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:37:38 -0500 Subject: tracing: Do not add event files for modules that fail tracepoints If a module fails to add its tracepoints due to module tainting, do not create the module event infrastructure in the debugfs directory. As the events will not work and worse yet, they will silently fail, making the user wonder why the events they enable do not display anything. Having a warning on module load and the events not visible to the users will make the cause of the problem much clearer. Link: http://lkml.kernel.org/r/20140227154923.265882695@goodmis.org Fixes: 6d723736e472 "tracing/events: add support for modules to TRACE_EVENT" Acked-by: Mathieu Desnoyers Cc: stable@vger.kernel.org # 2.6.31+ Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index accc497f8d72..7159a0a933df 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,6 +60,12 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; +bool trace_module_has_bad_taint(struct module *mod); +#else +static inline bool trace_module_has_bad_taint(struct module *mod) +{ + return false; +} #endif /* CONFIG_MODULES */ struct tracepoint_iter { -- cgit v1.2.3 From 668f9abbd4334e6c29fa8acd71635c4f9101caa7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 3 Mar 2014 15:38:18 -0800 Subject: mm: close PageTail race Commit bf6bddf1924e ("mm: introduce compaction and migration for ballooned pages") introduces page_count(page) into memory compaction which dereferences page->first_page if PageTail(page). This results in a very rare NULL pointer dereference on the aforementioned page_count(page). Indeed, anything that does compound_head(), including page_count() is susceptible to racing with prep_compound_page() and seeing a NULL or dangling page->first_page pointer. This patch uses Andrea's implementation of compound_trans_head() that deals with such a race and makes it the default compound_head() implementation. This includes a read memory barrier that ensures that if PageTail(head) is true that we return a head page that is neither NULL nor dangling. The patch then adds a store memory barrier to prep_compound_page() to ensure page->first_page is set. This is the safest way to ensure we see the head page that we are expecting, PageTail(page) is already in the unlikely() path and the memory barriers are unfortunately required. Hugetlbfs is the exception, we don't enforce a store memory barrier during init since no race is possible. Signed-off-by: David Rientjes Cc: Holger Kiehl Cc: Christoph Lameter Cc: Rafael Aquini Cc: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Rik van Riel Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 41 ----------------------------------------- include/linux/mm.h | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index db512014e061..b826239bdce0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -157,46 +157,6 @@ static inline int hpage_nr_pages(struct page *page) return HPAGE_PMD_NR; return 1; } -/* - * compound_trans_head() should be used instead of compound_head(), - * whenever the "page" passed as parameter could be the tail of a - * transparent hugepage that could be undergoing a - * __split_huge_page_refcount(). The page structure layout often - * changes across releases and it makes extensive use of unions. So if - * the page structure layout will change in a way that - * page->first_page gets clobbered by __split_huge_page_refcount, the - * implementation making use of smp_rmb() will be required. - * - * Currently we define compound_trans_head as compound_head, because - * page->private is in the same union with page->first_page, and - * page->private isn't clobbered. However this also means we're - * currently leaving dirt into the page->private field of anonymous - * pages resulting from a THP split, instead of setting page->private - * to zero like for every other page that has PG_private not set. But - * anonymous pages don't use page->private so this is not a problem. - */ -#if 0 -/* This will be needed if page->private will be clobbered in split_huge_page */ -static inline struct page *compound_trans_head(struct page *page) -{ - if (PageTail(page)) { - struct page *head; - head = page->first_page; - smp_rmb(); - /* - * head may be a dangling pointer. - * __split_huge_page_refcount clears PageTail before - * overwriting first_page, so if PageTail is still - * there it means the head pointer isn't dangling. - */ - if (PageTail(page)) - return head; - } - return page; -} -#else -#define compound_trans_head(page) compound_head(page) -#endif extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp); @@ -226,7 +186,6 @@ static inline int split_huge_page(struct page *page) do { } while (0) #define split_huge_page_pmd_mm(__mm, __address, __pmd) \ do { } while (0) -#define compound_trans_head(page) compound_head(page) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { diff --git a/include/linux/mm.h b/include/linux/mm.h index f28f46eade6a..03ab3e58f511 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -399,8 +399,18 @@ static inline void compound_unlock_irqrestore(struct page *page, static inline struct page *compound_head(struct page *page) { - if (unlikely(PageTail(page))) - return page->first_page; + if (unlikely(PageTail(page))) { + struct page *head = page->first_page; + + /* + * page->first_page may be a dangling pointer to an old + * compound page, so recheck that it is still a tail + * page before returning. + */ + smp_rmb(); + if (likely(PageTail(page))) + return head; + } return page; } -- cgit v1.2.3 From 9050d7eba40b3d79551668f54e68fd6f51945ef3 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 3 Mar 2014 15:38:27 -0800 Subject: mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking Daniel Borkmann reported a VM_BUG_ON assertion failing: ------------[ cut here ]------------ kernel BUG at mm/mlock.c:528! invalid opcode: 0000 [#1] SMP Modules linked in: ccm arc4 iwldvm [...] video CPU: 3 PID: 2266 Comm: netsniff-ng Not tainted 3.14.0-rc2+ #8 Hardware name: LENOVO 2429BP3/2429BP3, BIOS G4ET37WW (1.12 ) 05/29/2012 task: ffff8801f87f9820 ti: ffff88002cb44000 task.ti: ffff88002cb44000 RIP: 0010:[] [] munlock_vma_pages_range+0x2e0/0x2f0 Call Trace: do_munmap+0x18f/0x3b0 vm_munmap+0x41/0x60 SyS_munmap+0x22/0x30 system_call_fastpath+0x1a/0x1f RIP munlock_vma_pages_range+0x2e0/0x2f0 ---[ end trace a0088dcf07ae10f2 ]--- because munlock_vma_pages_range() thinks it's unexpectedly in the middle of a THP page. This can be reproduced with default config since 3.11 kernels. A reproducer can be found in the kernel's selftest directory for networking by running ./psock_tpacket. The problem is that an order=2 compound page (allocated by alloc_one_pg_vec_page() is part of the munlocked VM_MIXEDMAP vma (mapped by packet_mmap()) and mistaken for a THP page and assumed to be order=9. The checks for THP in munlock came with commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP pages"), i.e. since 3.9, but did not trigger a bug. It just makes munlock_vma_pages_range() skip such compound pages until the next 512-pages-aligned page, when it encounters a head page. This is however not a problem for vma's where mlocking has no effect anyway, but it can distort the accounting. Since commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and munlock+putback using pagevec") this can trigger a VM_BUG_ON in PageTransHuge() check. This patch fixes the issue by adding VM_MIXEDMAP flag to VM_SPECIAL, a list of flags that make vma's non-mlockable and non-mergeable. The reasoning is that VM_MIXEDMAP vma's are similar to VM_PFNMAP, which is already on the VM_SPECIAL list, and both are intended for non-LRU pages where mlocking makes no sense anyway. Related Lkml discussion can be found in [2]. [1] tools/testing/selftests/net/psock_tpacket [2] https://lkml.org/lkml/2014/1/10/427 Signed-off-by: Vlastimil Babka Signed-off-by: Daniel Borkmann Reported-by: Daniel Borkmann Tested-by: Daniel Borkmann Cc: Thomas Hellstrom Cc: John David Anglin Cc: HATAYAMA Daisuke Cc: Konstantin Khlebnikov Cc: Carsten Otte Cc: Jared Hulbert Tested-by: Hannes Frederic Sowa Cc: Kirill A. Shutemov Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: [3.11.x+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 03ab3e58f511..a1fe25110f50 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -175,7 +175,7 @@ extern unsigned int kobjsize(const void *objp); * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) /* * mapping from the currently active vm_flags protection bits (the -- cgit v1.2.3 From 1ae71d03194ea7424cbd14e449581f67c463d20d Mon Sep 17 00:00:00 2001 From: Liu Ping Fan Date: Mon, 3 Mar 2014 15:38:39 -0800 Subject: mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS When doing some numa tests on powerpc, I triggered an oops bug. I find it is caused by using page->_last_cpupid. It should be initialized as "-1 & LAST_CPUPID_MASK", but not "-1". Otherwise, in task_numa_fault(), we will miss the checking (last_cpupid == (-1 & LAST_CPUPID_MASK)). And finally cause an oops bug in task_numa_group(), since the online cpu is less than possible cpu. This happen with CONFIG_SPARSE_VMEMMAP disabled Call trace: SMP NR_CPUS=64 NUMA PowerNV Modules linked in: CPU: 24 PID: 804 Comm: systemd-udevd Not tainted3.13.0-rc1+ #32 task: c000001e2746aa80 ti: c000001e32c50000 task.ti:c000001e32c50000 REGS: c000001e32c53510 TRAP: 0300 Not tainted(3.13.0-rc1+) MSR: 9000000000009032 CR:28024424 XER: 20000000 CFAR: c000000000009324 DAR: 7265717569726857 DSISR:40000000 SOFTE: 1 NIP .task_numa_fault+0x1470/0x2370 LR .task_numa_fault+0x1468/0x2370 Call Trace: .task_numa_fault+0x1468/0x2370 (unreliable) .do_numa_page+0x480/0x4a0 .handle_mm_fault+0x4ec/0xc90 .do_page_fault+0x3a8/0x890 handle_page_fault+0x10/0x30 Instruction dump: 3c82fefb 3884b138 48d9cff1 60000000 48000574 3c62fefb3863af78 3c82fefb 3884b138 48d9cfd5 60000000 e93f0100 <812902e4> 7d2907b45529063e 7d2a07b4 ---[ end trace 15f2510da5ae07cf ]--- Signed-off-by: Liu Ping Fan Signed-off-by: Aneesh Kumar K.V Acked-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a1fe25110f50..c1b7414c7bef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -767,7 +767,7 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { - return xchg(&page->_last_cpupid, cpupid); + return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); } static inline int page_cpupid_last(struct page *page) @@ -776,7 +776,7 @@ static inline int page_cpupid_last(struct page *page) } static inline void page_cpupid_reset_last(struct page *page) { - page->_last_cpupid = -1; + page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else static inline int page_cpupid_last(struct page *page) -- cgit v1.2.3 From defd884845297fd5690594bfe89656b01f16d87e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:54:39 -0800 Subject: iscsi/iser-target: Fix isert_conn->state hung shutdown issues This patch addresses a couple of different hug shutdown issues related to wait_event() + isert_conn->state. First, it changes isert_conn->conn_wait + isert_conn->conn_wait_comp_err from waitqueues to completions, and sets ISER_CONN_TERMINATING from within isert_disconnect_work(). Second, it splits isert_free_conn() into isert_wait_conn() that is called earlier in iscsit_close_connection() to ensure that all outstanding commands have completed before continuing. Finally, it breaks isert_cq_comp_err() into seperate TX / RX related code, and adds logic in isert_cq_rx_comp_err() to wait for outstanding commands to complete before setting ISER_CONN_DOWN and calling complete(&isert_conn->conn_wait_comp_err). Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger --- include/target/iscsi/iscsi_transport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index ae5a17111968..4483fadfa68d 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -12,6 +12,7 @@ struct iscsit_transport { int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); + void (*iscsit_wait_conn)(struct iscsi_conn *); void (*iscsit_free_conn)(struct iscsi_conn *); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32); -- cgit v1.2.3 From 70044d71d31d6973665ced5be04ef39ac1c09a48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:19:57 -0500 Subject: firewire: don't use PREPARE_DELAYED_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. firewire core-device and sbp2 have been been multiplexing work items with multiple work functions. Introduce fw_device_workfn() and sbp2_lu_workfn() which invoke fw_device->workfn and sbp2_logical_unit->workfn respectively and always use the two functions as the work functions and update the users to set the ->workfn fields instead of overriding work functions using PREPARE_DELAYED_WORK(). This fixes a variety of possible regressions since a2c1c57be8d9 "workqueue: consider work function when searching for busy work items" due to which fw_workqueue lost its required non-reentrancy property. Signed-off-by: Tejun Heo Acked-by: Stefan Richter Cc: linux1394-devel@lists.sourceforge.net Cc: stable@vger.kernel.org # v3.9+ Cc: stable@vger.kernel.org # v3.8.2+ Cc: stable@vger.kernel.org # v3.4.60+ Cc: stable@vger.kernel.org # v3.2.40+ --- include/linux/firewire.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 5d7782e42b8f..c3683bdf28fe 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -200,6 +200,7 @@ struct fw_device { unsigned irmc:1; unsigned bc_implemented:2; + work_func_t workfn; struct delayed_work work; struct fw_attribute_group attribute_group; }; -- cgit v1.2.3 From 2ca310fc4160ed0420da65534a21ae77b24326a8 Mon Sep 17 00:00:00 2001 From: Ditang Chen Date: Fri, 7 Mar 2014 13:27:57 +0800 Subject: SUNRPC: Fix oops when trace sunrpc_task events in nfs client When tracking sunrpc_task events in nfs client, the clnt pointer may be NULL. [ 139.269266] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 139.269915] IP: [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] PGD 1d293067 PUD 1d294067 PMD 0 [ 139.269915] Oops: 0000 [#1] SMP [ 139.269915] Modules linked in: nfsv4 dns_resolver nfs lockd sunrpc fscache sg ppdev e1000 serio_raw pcspkr parport_pc parport i2c_piix4 i2c_core microcode xfs libcrc32c sd_mod sr_mod cdrom ata_generic crc_t10dif crct10dif_common pata_acpi ahci libahci ata_piix libata dm_mirror dm_region_hash dm_log dm_mod [ 139.269915] CPU: 0 PID: 59 Comm: kworker/0:2 Not tainted 3.10.0-84.el7.x86_64 #1 [ 139.269915] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 139.269915] Workqueue: rpciod rpc_async_schedule [sunrpc] [ 139.269915] task: ffff88001b598000 ti: ffff88001b632000 task.ti: ffff88001b632000 [ 139.269915] RIP: 0010:[] [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP: 0018:ffff88001b633d70 EFLAGS: 00010206 [ 139.269915] RAX: ffff88001dfc5338 RBX: ffff88001cc37a00 RCX: ffff88001dfc5334 [ 139.269915] RDX: ffff88001dfc5338 RSI: 0000000000000000 RDI: ffff88001dfc533c [ 139.269915] RBP: ffff88001b633db0 R08: 000000000000002c R09: 000000000000000a [ 139.269915] R10: 0000000000062180 R11: 00000020759fb9dc R12: ffffffffa0292c20 [ 139.269915] R13: ffff88001dfc5334 R14: 0000000000000000 R15: 0000000000000000 [ 139.269915] FS: 0000000000000000(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 139.269915] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 139.269915] CR2: 0000000000000004 CR3: 000000001d290000 CR4: 00000000000006f0 [ 139.269915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 139.269915] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 139.269915] Stack: [ 139.269915] 000000001b633d98 0000000000000246 ffff88001df1dc00 ffff88001cc37a00 [ 139.269915] ffff88001bc35e60 0000000000000000 ffff88001ffa0a48 ffff88001bc35ee0 [ 139.269915] ffff88001b633e08 ffffffffa02704b5 0000000000010000 ffff88001cc37a70 [ 139.269915] Call Trace: [ 139.269915] [] __rpc_execute+0x1d5/0x400 [sunrpc] [ 139.269915] [] rpc_async_schedule+0x26/0x30 [sunrpc] [ 139.269915] [] process_one_work+0x17b/0x460 [ 139.269915] [] worker_thread+0x11b/0x400 [ 139.269915] [] ? rescuer_thread+0x3e0/0x3e0 [ 139.269915] [] kthread+0xc0/0xd0 [ 139.269915] [] ? kthread_create_on_node+0x110/0x110 [ 139.269915] [] ret_from_fork+0x7c/0xb0 [ 139.269915] [] ? kthread_create_on_node+0x110/0x110 [ 139.269915] Code: 4c 8b 45 c8 48 8d 7d d0 89 4d c4 41 89 c9 b9 28 00 00 00 e8 9d b4 e9 e0 48 85 c0 49 89 c5 74 a2 48 89 c7 e8 9d 3f e9 e0 48 89 c2 <41> 8b 46 04 48 8b 7d d0 4c 89 e9 4c 89 e6 89 42 0c 0f b7 83 d4 [ 139.269915] RIP [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP [ 139.269915] CR2: 0000000000000004 [ 140.946406] ---[ end trace ba486328b98d7622 ]--- Signed-off-by: Ditang Chen Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ddc179b7a105..1fef3e6e9436 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -83,7 +83,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, ), TP_fast_assign( - __entry->client_id = clnt->cl_clid; + __entry->client_id = clnt ? clnt->cl_clid : -1; __entry->task_id = task->tk_pid; __entry->action = action; __entry->runstate = task->tk_runstate; @@ -91,7 +91,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d action=%pf", + TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf", __entry->task_id, __entry->client_id, __entry->flags, __entry->runstate, -- cgit v1.2.3