From 61b590b9ee4221173ad6990a1150c5c9db73564e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 23 Oct 2015 12:43:18 +0200 Subject: netfilter: ingress: don't use nf_hook_list_active nf_hook_list_active() always returns true once at least one device has NF_INGRESS hook enabled. Thus, don't use this function. Instead, inverse the test and use the static key to elide list_empty test if no NF_INGRESS hooks are active. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 187feabe557c..ba7ce8805fe3 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -5,10 +5,13 @@ #include #ifdef CONFIG_NETFILTER_INGRESS -static inline int nf_hook_ingress_active(struct sk_buff *skb) +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) { - return nf_hook_list_active(&skb->dev->nf_hooks_ingress, - NFPROTO_NETDEV, NF_NETDEV_INGRESS); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return !list_empty(&skb->dev->nf_hooks_ingress); } static inline int nf_hook_ingress(struct sk_buff *skb) -- cgit v1.2.3 From b4865988eab598e56e6e628b9b32441acd142b28 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 6 Nov 2015 18:35:57 +0100 Subject: netfilter: ingress: fix wrong input interface on hook The input and output interfaces in nf_hook_state_init() are flipped. This fixes iif matching on nftables. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index ba7ce8805fe3..5fcd375ef175 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb) struct nf_hook_state state; nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, - skb->dev, NULL, dev_net(skb->dev), NULL); + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, + skb->dev, NULL, NULL, dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } -- cgit v1.2.3 From 95ad1f4a9358dff1dcf84bf5c9cc84caa9215f7f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Nov 2015 11:21:47 +0100 Subject: netfilter: ipset: Fix extension alignment The data extensions in ipset lacked the proper memory alignment and thus could lead to kernel crash on several architectures. Therefore the structures have been reorganized and alignment attributes added where needed. The patch was tested on armv7h by Gerhard Wiesinger and on x86_64, sparc64 by Jozsef Kadlecsik. Reported-by: Gerhard Wiesinger Tested-by: Gerhard Wiesinger Tested-by: Jozsef Kadlecsik Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 48bb01edcf30..0e1f433cc4b7 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -421,7 +421,7 @@ extern void ip_set_free(void *members); extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], - size_t len); + size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); -- cgit v1.2.3 From fd589a1be20fdd76ef97700dd0185e7a060546dc Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 10 Nov 2015 18:12:42 +0200 Subject: ASoC: dapm: Reset dapm wcache after freeing damp widgets If there is anything in damp->path_source_cache or damp->path_sink_cache, it can not be valid after the widgets have been freed. Without this patch a repeated remove and load of a machine driver may cause NULL pointer reference in dapm_wcache_lookup() when a freed widget, not belonging to any list, is haunting in the wcache. Signed-off-by: Jyri Sarha Reported-by: Felipe Balbi Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 7855cfe46b69..95a937eafb79 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -398,6 +398,7 @@ int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm, int snd_soc_dapm_weak_routes(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_route *route, int num); void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w); +void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm); /* dapm events */ void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream, -- cgit v1.2.3 From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 10 Nov 2015 14:31:18 +0100 Subject: net: add __netdev_alloc_pcpu_stats() to indicate gfp flags nf_tables may create percpu counters from the packet path through its dynamic set instantiation infrastructure, so we need a way to allocate this through GFP_ATOMIC. Signed-off-by: Pablo Neira Ayuso Acked-by: David S. Miller --- include/linux/netdevice.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c00772bd136..e9d0c8a75380 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; -#define netdev_alloc_pcpu_stats(type) \ -({ \ - typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ - if (pcpu_stats) { \ - int __cpu; \ - for_each_possible_cpu(__cpu) { \ - typeof(type) *stat; \ - stat = per_cpu_ptr(pcpu_stats, __cpu); \ - u64_stats_init(&stat->syncp); \ - } \ - } \ - pcpu_stats; \ +#define __netdev_alloc_pcpu_stats(type, gfp) \ +({ \ + typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ }) +#define netdev_alloc_pcpu_stats(type) \ + __netdev_alloc_pcpu_stats(type, GFP_KERNEL); + #include /* netdevice notifier chain. Please remember to update the rtnetlink -- cgit v1.2.3 From 086f332167d64b645d37405854f049b9ad7371ab Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 10 Nov 2015 13:39:42 +0100 Subject: netfilter: nf_tables: add clone interface to expression operations With the conversion of the counter expressions to make it percpu, we need to clone the percpu memory area, otherwise we crash when using counters from flow tables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c9149cc0a02d..4bd7508bedc9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -618,6 +618,8 @@ struct nft_expr_ops { void (*eval)(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); + int (*clone)(struct nft_expr *dst, + const struct nft_expr *src); unsigned int size; int (*init)(const struct nft_ctx *ctx, @@ -660,10 +662,20 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); -static inline void nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) +static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { + int err; + __module_get(src->ops->type->owner); - memcpy(dst, src, src->ops->size); + if (src->ops->clone) { + dst->ops = src->ops; + err = src->ops->clone(dst, src); + if (err < 0) + return err; + } else { + memcpy(dst, src, src->ops->size); + } + return 0; } /** -- cgit v1.2.3 From 02bcf4e082e4dc634409a6a6cb7def8806d6e5e6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 11 Nov 2015 11:51:08 -0800 Subject: ipv6: Check rt->dst.from for the DST_NOCACHE route All DST_NOCACHE rt6_info used to have rt->dst.from set to its parent. After commit 8e3d5be73681 ("ipv6: Avoid double dst_free"), DST_NOCACHE is also set to rt6_info which does not have a parent (i.e. rt->dst.from is NULL). This patch catches the rt->dst.from == NULL case. Fixes: 8e3d5be73681 ("ipv6: Avoid double dst_free") Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index aaf9700fc9e5..fb961a576abe 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { - if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE)) + if (rt->rt6i_flags & RTF_PCPU || + (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; -- cgit v1.2.3 From 00fd38d938db3f1ab1c486549afc450cb7e751b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Nov 2015 08:43:18 -0800 Subject: tcp: ensure proper barriers in lockless contexts Some functions access TCP sockets without holding a lock and might output non consistent data, depending on compiler and or architecture. tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... Introduce sk_state_load() and sk_state_store() to fix the issues, and more clearly document where this lack of locking is happening. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index bbf7c2cf15b4..7f89e4ba18d1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2226,6 +2226,31 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } +/** + * sk_state_load - read sk->sk_state for lockless contexts + * @sk: socket pointer + * + * Paired with sk_state_store(). Used in places we do not hold socket lock : + * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... + */ +static inline int sk_state_load(const struct sock *sk) +{ + return smp_load_acquire(&sk->sk_state); +} + +/** + * sk_state_store - update sk->sk_state + * @sk: socket pointer + * @newstate: new state + * + * Paired with sk_state_load(). Should be used in contexts where + * state change might impact lockless readers. + */ +static inline void sk_state_store(struct sock *sk, int newstate) +{ + smp_store_release(&sk->sk_state, newstate); +} + void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); -- cgit v1.2.3 From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 12 Nov 2015 19:35:26 +0200 Subject: net/mlx5e: Added self loopback prevention Prevent outgoing multicast frames from looping back to the RX queue. By introducing new HW capability self_lb_en_modifiable, which indicates the support to modify self_lb_en bit in modify_tir command. When this capability is set we can prevent TIRs from sending back loopback multicast traffic to their own RQs, by "refreshing TIRs" with modify_tir command, on every time new channels (SQs/RQs) are created at device open. This is needed since TIRs are static and only allocated once on driver load, and the loopback decision is under their responsibility. Fixes issues of the kind: "IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!" The issue is seen since the IPv6 solicitations multicast messages are loopedback and the network stack thinks they are coming from another host. Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd2097455a2e..1565324eb620 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_0[0x6]; + u8 reserved_0[0x3]; + u8 self_lb_en_modifiable[0x1]; + u8 reserved_1[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_1[0x4]; + u8 reserved_2[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_2[0x3]; + u8 reserved_3[0x3]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_3[0x2]; + u8 reserved_4[0x2]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_4[0x20]; + u8 reserved_5[0x20]; - u8 reserved_5[0x10]; + u8 reserved_6[0x10]; u8 lro_min_mss_size[0x10]; - u8 reserved_6[0x120]; + u8 reserved_7[0x120]; u8 lro_timer_supported_periods[4][0x20]; - u8 reserved_7[0x600]; + u8 reserved_8[0x600]; }; struct mlx5_ifc_roce_cap_bits { @@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits { }; struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_1[0x1b]; + u8 self_lb_en[0x1]; + u8 reserved_2[0x3]; u8 lro[0x1]; }; -- cgit v1.2.3 From 500404ebcbd074ca11aa0c3fd9a268aa4054fd8b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 3 Nov 2015 12:28:10 +0200 Subject: dmaengine: of_dma: Correct return code for of_dma_request_slave_channel in case !CONFIG_OF of_dma_request_slave_channel should return either pointer for valid dma_chan or ERR_PTR() error code, NULL is not expected to be returned. Signed-off-by: Peter Ujfalusi Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/of_dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 36112cdd665a..b90d8ec57c1f 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np, static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, -- cgit v1.2.3 From 34c06254ff82a815fdccdfae7517a06c9b768cee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Nov 2015 00:12:24 -0500 Subject: cgroup: fix cftype->file_offset handling 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files") introduced cftype->file_offset so that the handles for per-css file instances can be recorded. These handles then can be used, for example, to generate file modified notifications. Unfortunately, it made the wrong assumption that files are created once for a given css and removed on its destruction. Due to the dependencies among subsystems, a css may be hidden from userland and then later shown again. This is implemented by removing and re-creating the affected files, so the associated kernfs_node for a given cgroup file may change over time. This incorrect assumption led to the corruption of css->files lists. Reimplement cftype->file_offset handling so that cgroup_file->kn is protected by a lock and updated as files are created and destroyed. This also makes keeping them on per-cgroup list unnecessary. Signed-off-by: Tejun Heo Reported-by: James Sedgwick Fixes: 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files") Acked-by: Johannes Weiner Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 4 ---- include/linux/cgroup.h | 14 +------------- 2 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 60d44b26276d..869fd4a3d28e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -90,7 +90,6 @@ enum { */ struct cgroup_file { /* do not access any fields from outside cgroup core */ - struct list_head node; /* anchored at css->files */ struct kernfs_node *kn; }; @@ -134,9 +133,6 @@ struct cgroup_subsys_state { */ u64 serial_nr; - /* all cgroup_files associated with this css */ - struct list_head files; - /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 22e3754f89c5..f64083030ad5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); +void cgroup_file_notify(struct cgroup_file *cfile); char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); @@ -516,19 +517,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } -/** - * cgroup_file_notify - generate a file modified event for a cgroup_file - * @cfile: target cgroup_file - * - * @cfile must have been obtained by setting cftype->file_offset. - */ -static inline void cgroup_file_notify(struct cgroup_file *cfile) -{ - /* might not have been created due to one of the CFTYPE selector flags */ - if (cfile->kn) - kernfs_notify(cfile->kn); -} - #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; -- cgit v1.2.3 From b4fe85f9c9146f60457e9512fb6055e69e6a7a65 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 12 Nov 2015 17:35:58 +0100 Subject: ip_tunnel: disable preemption when updating per-cpu tstats Drivers like vxlan use the recently introduced udp_tunnel_xmit_skb/udp_tunnel6_xmit_skb APIs. udp_tunnel6_xmit_skb makes use of ip6tunnel_xmit, and ip6tunnel_xmit, after sending the packet, updates the struct stats using the usual u64_stats_update_begin/end calls on this_cpu_ptr(dev->tstats). udp_tunnel_xmit_skb makes use of iptunnel_xmit, which doesn't touch tstats, so drivers like vxlan, immediately after, call iptunnel_xmit_stats, which does the same thing - calls u64_stats_update_begin/end on this_cpu_ptr(dev->tstats). While vxlan is probably fine (I don't know?), calling a similar function from, say, an unbound workqueue, on a fully preemptable kernel causes real issues: [ 188.434537] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u8:0/6 [ 188.435579] caller is debug_smp_processor_id+0x17/0x20 [ 188.435583] CPU: 0 PID: 6 Comm: kworker/u8:0 Not tainted 4.2.6 #2 [ 188.435607] Call Trace: [ 188.435611] [] dump_stack+0x4f/0x7b [ 188.435615] [] check_preemption_disabled+0x19d/0x1c0 [ 188.435619] [] debug_smp_processor_id+0x17/0x20 The solution would be to protect the whole this_cpu_ptr(dev->tstats)/u64_stats_update_begin/end blocks with disabling preemption and then reenabling it. Signed-off-by: Jason A. Donenfeld Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 3 ++- include/net/ip_tunnels.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index aaee6fa02cf1..ff788b665277 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else { stats->tx_errors++; stats->tx_aborted_errors++; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index f6dafec9102c..62a750a6a8f8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err, struct pcpu_sw_netstats __percpu *stats) { if (err > 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += err; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else if (err < 0) { err_stats->tx_errors++; err_stats->tx_aborted_errors++; -- cgit v1.2.3 From 24cb7055a3066634a0f3fa0cd6a4780652905d35 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 16 Nov 2015 10:52:48 +0100 Subject: net: switchdev: fix return code of fdb_dump stub rtnl_fdb_dump always expects an index to be returned by the ndo_fdb_dump op, but when CONFIG_NET_SWITCHDEV is off, it returns an error. Fix that by returning the given unmodified idx. A similar fix was 0890cf6cb6ab ("switchdev: fix return value of switchdev_port_fdb_dump in case of error") but for the CONFIG_NET_SWITCHDEV=y case. Fixes: 45d4122ca7cd ("switchdev: add support for fdb add/del/dump via switchdev_port_obj ops.") Signed-off-by: Dragos Tatulea Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index bc865e244efe..1d22ce9f352e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -323,7 +323,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct net_device *filter_dev, int idx) { - return -EOPNOTSUPP; + return idx; } static inline void switchdev_port_fwd_mark_set(struct net_device *dev, -- cgit v1.2.3 From aedf17f4515b12ba1cd73298e66baa69cf93010e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:36 +0100 Subject: lightnvm: change max_phys_sect to uint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The max_phys_sect variable is defined as a char. We do a boundary check to maximally allow 256 physical page descriptors per command. As we are not indexing from zero. This expression is always false. Bump the max_phys_sect to an unsigned int to support the range check. Signed-off-by: Matias Bjørling Reported-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 69c9057e1ab8..32b5369e814e 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -220,7 +220,7 @@ struct nvm_dev_ops { nvm_dev_dma_alloc_fn *dev_dma_alloc; nvm_dev_dma_free_fn *dev_dma_free; - uint8_t max_phys_sect; + unsigned int max_phys_sect; }; struct nvm_lun { -- cgit v1.2.3 From 11450469830f2481a9e7cb181609288d40f41323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:37 +0100 Subject: lightnvm: update bad block table format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specification was changed to reflect a multi-value bad block table. Instead of bit-based bad block table, the bad block table now allows eight bad block categories. Currently four are defined: * Factory bad blocks * Grown bad blocks * Device-side reserved blocks * Host-side reserved blocks The factory and grown bad blocks are the regular bad blocks. The reserved blocks are either for internal use or external use. In particular, the device-side reserved blocks allows the host to bootstrap from a limited number of flash blocks. Reducing the flash blocks to scan upon super block initialization. Support for both get bad block table and set bad block table is added. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 32b5369e814e..9b3dc1bc9296 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -191,11 +191,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) struct nvm_block; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *); +typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int, +typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int, nvm_bb_update_fn *, void *); typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); @@ -210,7 +210,7 @@ struct nvm_dev_ops { nvm_id_fn *identity; nvm_get_l2p_tbl_fn *get_l2p_tbl; nvm_op_bb_tbl_fn *get_bb_tbl; - nvm_op_set_bb_fn *set_bb; + nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; nvm_erase_blk_fn *erase_block; -- cgit v1.2.3 From 12be5edf68e785dd5dc8665db5a88152b49c1fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:39 +0100 Subject: lightnvm: expose mccap in identify command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mccap field is required for I/O command option support. It defines the following flash access modes: * SLC mode * Erase/Program Suspension * Scramble On/Off * Encryption It is slotted in between mpos and cpar, changing the offset for cpar as well. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9b3dc1bc9296..2572856e2a89 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -74,6 +74,7 @@ struct nvm_id_group { u32 tbet; u32 tbem; u32 mpos; + u32 mccap; u16 cpar; u8 res[913]; } __packed; -- cgit v1.2.3 From 73387e7bed260c89628fc6a4e3632b45be9776b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:40 +0100 Subject: lightnvm: remove unused attrs in nvm_id structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvm_id, nvm_id_group and nvm_addr_format data structures contain reserved attributes. They are unused by media managers and targets. Remove them. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2572856e2a89..e6ef8aaf533f 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -58,7 +58,6 @@ enum { struct nvm_id_group { u8 mtype; u8 fmtype; - u16 res16; u8 num_ch; u8 num_lun; u8 num_pln; @@ -76,8 +75,7 @@ struct nvm_id_group { u32 mpos; u32 mccap; u16 cpar; - u8 res[913]; -} __packed; +}; struct nvm_addr_format { u8 ch_offset; @@ -92,19 +90,16 @@ struct nvm_addr_format { u8 pg_len; u8 sect_offset; u8 sect_len; - u8 res[4]; }; struct nvm_id { u8 ver_id; u8 vmnt; u8 cgrps; - u8 res[5]; u32 cap; u32 dom; struct nvm_addr_format ppaf; u8 ppat; - u8 resv[224]; struct nvm_id_group groups[4]; } __packed; -- cgit v1.2.3 From 7386af270c72be65c7cb2ba4ad0d4e70dc373106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:44 +0100 Subject: lightnvm: remove linear and device addr modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linear and device specific address modes can be replaced with a simple offset and bit length conversion that is generic across all devices. This both simplifies the specification and removes the special case for qemu nvme, that previously relied on the linear address mapping. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 154 +++++++++++------------------------------------ 1 file changed, 34 insertions(+), 120 deletions(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e6ef8aaf533f..cbe288acb1de 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -99,7 +99,6 @@ struct nvm_id { u32 cap; u32 dom; struct nvm_addr_format ppaf; - u8 ppat; struct nvm_id_group groups[4]; } __packed; @@ -119,39 +118,28 @@ struct nvm_tgt_instance { #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 -#define NVM_SEC_BITS (8) -#define NVM_PL_BITS (6) -#define NVM_PG_BITS (16) #define NVM_BLK_BITS (16) -#define NVM_LUN_BITS (10) +#define NVM_PG_BITS (16) +#define NVM_SEC_BITS (8) +#define NVM_PL_BITS (8) +#define NVM_LUN_BITS (8) #define NVM_CH_BITS (8) struct ppa_addr { + /* Generic structure for all addresses */ union { - /* Channel-based PPA format in nand 4x2x2x2x8x10 */ - struct { - u64 ch : 4; - u64 sec : 2; /* 4 sectors per page */ - u64 pl : 2; /* 4 planes per LUN */ - u64 lun : 2; /* 4 LUNs per channel */ - u64 pg : 8; /* 256 pages per block */ - u64 blk : 10;/* 1024 blocks per plane */ - u64 resved : 36; - } chnl; - - /* Generic structure for all addresses */ struct { + u64 blk : NVM_BLK_BITS; + u64 pg : NVM_PG_BITS; u64 sec : NVM_SEC_BITS; u64 pl : NVM_PL_BITS; - u64 pg : NVM_PG_BITS; - u64 blk : NVM_BLK_BITS; u64 lun : NVM_LUN_BITS; u64 ch : NVM_CH_BITS; } g; u64 ppa; }; -} __packed; +}; struct nvm_rq { struct nvm_tgt_instance *ins; @@ -259,8 +247,7 @@ struct nvm_dev { int blks_per_lun; int sec_size; int oob_size; - int addr_mode; - struct nvm_addr_format addr_format; + struct nvm_addr_format ppaf; /* Calculated/Cached values. These do not reflect the actual usable * blocks at run-time. @@ -286,118 +273,45 @@ struct nvm_dev { char name[DISK_NAME_LEN]; }; -/* fallback conversion */ -static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct ppa_addr l; - - l.ppa = r.g.sec + - r.g.pg * dev->sec_per_pg + - r.g.blk * (dev->pgs_per_blk * - dev->sec_per_pg) + - r.g.lun * (dev->blks_per_lun * - dev->pgs_per_blk * - dev->sec_per_pg) + - r.g.ch * (dev->blks_per_lun * - dev->pgs_per_blk * - dev->luns_per_chnl * - dev->sec_per_pg); - - return l; -} - -/* fallback conversion */ -static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct ppa_addr l; - int secs, pgs, blks, luns; - sector_t ppa = r.ppa; - - l.ppa = 0; - - div_u64_rem(ppa, dev->sec_per_pg, &secs); - l.g.sec = secs; - - sector_div(ppa, dev->sec_per_pg); - div_u64_rem(ppa, dev->sec_per_blk, &pgs); - l.g.pg = pgs; - - sector_div(ppa, dev->pgs_per_blk); - div_u64_rem(ppa, dev->blks_per_lun, &blks); - l.g.blk = blks; - - sector_div(ppa, dev->blks_per_lun); - div_u64_rem(ppa, dev->luns_per_chnl, &luns); - l.g.lun = luns; - - sector_div(ppa, dev->luns_per_chnl); - l.g.ch = ppa; - - return l; -} - -static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r) +static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, + struct ppa_addr r) { struct ppa_addr l; - l.ppa = 0; - - l.chnl.sec = r.g.sec; - l.chnl.pl = r.g.pl; - l.chnl.pg = r.g.pg; - l.chnl.blk = r.g.blk; - l.chnl.lun = r.g.lun; - l.chnl.ch = r.g.ch; + l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset; + l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset; + l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset; + l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset; + l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset; + l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset; return l; } -static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r) +static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, + struct ppa_addr r) { struct ppa_addr l; - l.ppa = 0; - - l.g.sec = r.chnl.sec; - l.g.pl = r.chnl.pl; - l.g.pg = r.chnl.pg; - l.g.blk = r.chnl.blk; - l.g.lun = r.chnl.lun; - l.g.ch = r.chnl.ch; + /* + * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc. + */ + l.g.blk = (r.ppa >> dev->ppaf.blk_offset) & + (((1 << dev->ppaf.blk_len) - 1)); + l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) & + (((1 << dev->ppaf.pg_len) - 1)); + l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) & + (((1 << dev->ppaf.sect_len) - 1)); + l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) & + (((1 << dev->ppaf.pln_len) - 1)); + l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) & + (((1 << dev->ppaf.lun_len) - 1)); + l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) & + (((1 << dev->ppaf.ch_len) - 1)); return l; } -static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev, - struct ppa_addr gppa) -{ - switch (dev->addr_mode) { - case NVM_ADDRMODE_LINEAR: - return __linear_to_generic_addr(dev, gppa); - case NVM_ADDRMODE_CHANNEL: - return __chnl_to_generic_addr(gppa); - default: - BUG(); - } - return gppa; -} - -static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev, - struct ppa_addr gppa) -{ - switch (dev->addr_mode) { - case NVM_ADDRMODE_LINEAR: - return __generic_to_linear_addr(dev, gppa); - case NVM_ADDRMODE_CHANNEL: - return __generic_to_chnl_addr(gppa); - default: - BUG(); - } - return gppa; -} - static inline int ppa_empty(struct ppa_addr ppa_addr) { return (ppa_addr.ppa == ADDR_EMPTY); -- cgit v1.2.3 From 0f45c26fc302c02b0576db37d4849baa53a2bb41 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 Nov 2015 11:29:09 +0100 Subject: drm/atomic: add a drm_atomic_clean_old_fb helper. This is useful for all the boilerplate code about cleaning old_fb. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447237751-9663-4-git-send-email-maarten.lankhorst@ubuntu.com --- include/drm/drm_atomic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index e67aeac2aee0..4b74c97d297a 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -136,6 +136,9 @@ drm_atomic_connectors_for_crtc(struct drm_atomic_state *state, void drm_atomic_legacy_backoff(struct drm_atomic_state *state); +void +drm_atomic_clean_old_fb(struct drm_device *dev, unsigned plane_mask, int ret); + int __must_check drm_atomic_check_only(struct drm_atomic_state *state); int __must_check drm_atomic_commit(struct drm_atomic_state *state); int __must_check drm_atomic_async_commit(struct drm_atomic_state *state); -- cgit v1.2.3 From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 16 Nov 2015 15:43:45 -0500 Subject: vlan: Do not put vlan headers back on bridge and macvlan ports When a vlan is configured with REORDER_HEADER set to 0, the vlan header is put back into the packet and makes it appear that the vlan header is still there even after it's been processed. This posses a problem for bridge and macvlan ports. The packets passed to those device may be forwarded and at the time of the forward, vlan headers end up being unexpectedly present. With the patch, we make sure that we do not put the vlan header back (when REORDER_HEADER is 0) if a bridge or macvlan has been configured on top of the vlan device. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc221b967687..67bfac1abfc1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev) return dev->priv_flags & IFF_EBRIDGE; } +static inline bool netif_is_bridge_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_BRIDGE_PORT; +} + static inline bool netif_is_ovs_master(const struct net_device *dev) { return dev->priv_flags & IFF_OPENVSWITCH; -- cgit v1.2.3 From 819ec8e1f349f73bdf65bf33a364538e59007a9a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 16 Nov 2015 23:34:41 +0100 Subject: phy: marvell: Add support for 88E1540 PHY The 88E1540 can be found embedded in the Marvell 88E6352 switch. It is compatible with the 88E1510, so add support for it, using the 88E1510 specific functions. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index e6982ac3200d..a57f0dfb6db7 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -16,6 +16,7 @@ #define MARVELL_PHY_ID_88E1318S 0x01410e90 #define MARVELL_PHY_ID_88E1116R 0x01410e40 #define MARVELL_PHY_ID_88E1510 0x01410dd0 +#define MARVELL_PHY_ID_88E1540 0x01410eb0 #define MARVELL_PHY_ID_88E3016 0x01410e60 /* struct phy_device dev_flags definitions */ -- cgit v1.2.3 From 451c2b5caf37b526ae34a1081b71115e1de2d063 Mon Sep 17 00:00:00 2001 From: Aya Mahfouz Date: Wed, 18 Nov 2015 08:36:44 +0200 Subject: net: dns_resolver: convert time_t to time64_t Changes the definition of the pointer _expiry from time_t to time64_t. This is to handle the Y2038 problem where time_t will overflow in the year 2038. The change is safe because the kernel subsystems that call dns_query pass NULL. Signed-off-by: Arnd Bergmann Signed-off-by: Aya Mahfouz Signed-off-by: David S. Miller --- include/linux/dns_resolver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h index cc92268af89a..6ac3cad9aef1 100644 --- a/include/linux/dns_resolver.h +++ b/include/linux/dns_resolver.h @@ -27,7 +27,7 @@ #ifdef __KERNEL__ extern int dns_query(const char *type, const char *name, size_t namelen, - const char *options, char **_result, time_t *_expiry); + const char *options, char **_result, time64_t *_expiry); #endif /* KERNEL */ -- cgit v1.2.3 From db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:03:50 +0100 Subject: KVM: Provide function for VCPU lookup by id Let's provide a function to lookup a VCPU by id. Reviewed-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [split patch from refactoring patch] --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5706a2108f0a..c923350ca20a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -460,6 +460,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu->vcpu_id == id) + return vcpu; + return NULL; +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ -- cgit v1.2.3 From 851df3dc11136fde86ebd78ee7527cb43c7cd349 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 22:34:58 +0100 Subject: scpi: hide get_scpi_ops in module from built-in code The scpi_clock driver can be built-in when CONFIG_COMPILE_TEST is set even when ARM_SCPI_PROTOCOL is a loadable module, and that results in a link error: drivers/built-in.o: In function `scpi_clocks_probe': (.text+0x14453c): undefined reference to `get_scpi_ops' Using #if IS_REACHABLE() around the get_scpi_ops() declaration makes it build successfully in this case for compile-testing, but the effect is the same as when ARM_SCPI_PROTOCOL is disabled, as the code will not be used. Signed-off-by: Arnd Bergmann Acked-by: Punit Agrawal --- include/linux/scpi_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index 80af3cd35ae4..72ce932c69b2 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -71,7 +71,7 @@ struct scpi_ops { int (*sensor_get_value)(u16, u32 *); }; -#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL) +#if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL) struct scpi_ops *get_scpi_ops(void); #else static inline struct scpi_ops *get_scpi_ops(void) { return NULL; } -- cgit v1.2.3 From a35bb4458e5e5c9dc19a0daa0629409285f3b25e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 19 Nov 2015 14:17:06 +0100 Subject: scsi: report 'INQUIRY result too short' once per host Some host adapters (e.g. Hyper-V storvsc) are known for not respecting the SPC-2/3/4 requirement for 'INQUIRY data (see table ...) shall contain at least 36 bytes'. As a result we get tons on 'scsi 0:7:1:1: scsi scan: INQUIRY result too short (5), using 36' messages on console. This can be problematic for slow consoles. Introduce short_inquiry flag in struct Scsi_Host to print the message once per host. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/scsi/scsi_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index e113c757d555..3a22da73d59a 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -673,6 +673,9 @@ struct Scsi_Host { unsigned use_blk_mq:1; unsigned use_cmd_list:1; + /* Host responded with short (<36 bytes) INQUIRY result */ + unsigned short_inquiry:1; + /* * Optional work queue to be utilized by the transport */ -- cgit v1.2.3 From 2e6edc95382cc36423aff18a237173ad62d5ab52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2015 13:29:28 -0800 Subject: block: protect rw_page against device teardown Fix use after free crashes like the following: general protection fault: 0000 [#1] SMP Call Trace: [] ? pmem_do_bvec.isra.12+0xa6/0xf0 [nd_pmem] [] pmem_rw_page+0x42/0x80 [nd_pmem] [] bdev_read_page+0x50/0x60 [] do_mpage_readpage+0x510/0x770 [] ? I_BDEV+0x20/0x20 [] ? lru_cache_add+0x1c/0x50 [] mpage_readpages+0x107/0x170 [] ? I_BDEV+0x20/0x20 [] ? I_BDEV+0x20/0x20 [] blkdev_readpages+0x1d/0x20 [] __do_page_cache_readahead+0x28f/0x310 [] ? __do_page_cache_readahead+0x169/0x310 [] ? pagecache_get_page+0x2d/0x1d0 [] filemap_fault+0x396/0x530 [] __do_fault+0x4e/0xf0 [] handle_mm_fault+0x11bd/0x1b50 Cc: Cc: Jens Axboe Cc: Alexander Viro Reported-by: kbuild test robot Acked-by: Matthew Wilcox [willy: symmetry fixups] Signed-off-by: Dan Williams --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fe27f8d91f0..c0d2b7927c1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -- cgit v1.2.3 From ac0621971a26526cad8cf9db7626d5e50562a441 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 17 Nov 2015 10:24:38 +0200 Subject: mac80211: always set the buf_size in AddBA req to 64 Advertising reordering window in ADDBA less than 64 can crash some APs, an example is LinkSys WRT120N (with FW v1.0.07 build 002 Jun 18 2012). On the other hand, a driver may need to limit Tx A-MPDU size for its own reasons, like specific HW limitations. Signed-off-by: Gregory Greenman Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 82045fca388b..760bc4d5a2cf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2003,8 +2003,10 @@ enum ieee80211_hw_flags { * it shouldn't be set. * * @max_tx_aggregation_subframes: maximum number of subframes in an - * aggregate an HT driver will transmit, used by the peer as a - * hint to size its reorder buffer. + * aggregate an HT driver will transmit. Though ADDBA will advertise + * a constant value of 64 as some older APs can crash if the window + * size is smaller (an example is LinkSys WRT120N with FW v1.0.07 + * build 002 Jun 18 2012). * * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX * (if %IEEE80211_HW_QUEUE_CONTROL is set) -- cgit v1.2.3 From 0b59733b95f9d7af6bee6e6a4d0d444eb694c514 Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Fri, 20 Nov 2015 13:47:56 +0100 Subject: lightnvm: keep track of block counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintain number of in use blocks, free blocks, and bad blocks in a per lun basis. This allows the upper layers to get information about the state of each lun. Also, account for blocks reserved to the device on the free block count. nr_free_blocks matches now the actual number of blocks on the free list when the device is booted. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cbe288acb1de..831a20cf070c 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -213,7 +213,9 @@ struct nvm_lun { int lun_id; int chnl_id; + unsigned int nr_inuse_blocks; /* Number of used blocks */ unsigned int nr_free_blocks; /* Number of unused blocks */ + unsigned int nr_bad_blocks; /* Number of bad blocks */ struct nvm_block *blocks; spinlock_t lock; -- cgit v1.2.3 From 2fde0e482db2b43bb4ed0e9aebfbe78ebcbbf5a6 Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Fri, 20 Nov 2015 13:47:57 +0100 Subject: lightnvm: add free and bad lun info to show luns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add free block, used block, and bad block information to the show debug interface. This information is used to debug how targets track blocks. Also, change debug function name to make it more generic. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 831a20cf070c..3db5552b17d5 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -380,7 +380,7 @@ typedef int (nvmm_end_io_fn)(struct nvm_rq *, int); typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, unsigned long); typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); -typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *); +typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *); struct nvmm_type { const char *name; @@ -404,7 +404,7 @@ struct nvmm_type { nvmm_get_lun_fn *get_lun; /* Statistics */ - nvmm_free_blocks_print_fn *free_blocks_print; + nvmm_lun_info_print_fn *lun_info_print; struct list_head list; }; -- cgit v1.2.3 From 94a58c360a45c066ab5472cfd2bf2a4ba63aa532 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 20 Nov 2015 15:56:48 -0800 Subject: slab.h: sprinkle __assume_aligned attributes The various allocators return aligned memory. Telling the compiler that allows it to generate better code in many cases, for example when the return value is immediately passed to memset(). Some code does become larger, but at least we win twice as much as we lose: $ scripts/bloat-o-meter /tmp/vmlinux vmlinux add/remove: 0/0 grow/shrink: 13/52 up/down: 995/-2140 (-1145) An example of the different (and smaller) code can be seen in mm_alloc(). Before: : 48 8d 78 08 lea 0x8(%rax),%rdi : 48 89 c1 mov %rax,%rcx : 48 89 c2 mov %rax,%rdx : 48 c7 00 00 00 00 00 movq $0x0,(%rax) : 48 c7 80 48 03 00 00 movq $0x0,0x348(%rax) : 00 00 00 00 : 31 c0 xor %eax,%eax : 48 83 e7 f8 and $0xfffffffffffffff8,%rdi : 48 29 f9 sub %rdi,%rcx : 81 c1 50 03 00 00 add $0x350,%ecx : c1 e9 03 shr $0x3,%ecx : f3 48 ab rep stos %rax,%es:(%rdi) After: : 48 89 c2 mov %rax,%rdx : b9 6a 00 00 00 mov $0x6a,%ecx : 31 c0 xor %eax,%eax : 48 89 d7 mov %rdx,%rdi : f3 48 ab rep stos %rax,%es:(%rdi) So gcc's strategy is to do two possibly (but not really, of course) unaligned stores to the first and last word, then do an aligned rep stos covering the middle part with a little overlap. Maybe arches which do not allow unaligned stores gain even more. I don't know if gcc can actually make use of alignments greater than 8 for anything, so one could probably drop the __assume_xyz_alignment macros and just use __assume_aligned(8). The increases in code size are mostly caused by gcc deciding to opencode strlen() using the check-four-bytes-at-a-time trick when it knows the buffer is sufficiently aligned (one function grew by 200 bytes). Now it turns out that many of these strlen() calls showing up were in fact redundant, and they're gone from -next. Applying the two patches to next-20151001 bloat-o-meter instead says add/remove: 0/0 grow/shrink: 6/52 up/down: 244/-2140 (-1896) Signed-off-by: Rasmus Villemoes Acked-by: Christoph Lameter Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7c82e3b307a3..96940772bb92 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -157,6 +157,24 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #endif +/* + * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. + * Intended for arches that get misalignment faults even for 64 bit integer + * aligned buffers. + */ +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + +/* + * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN + * aligned pointers. + */ +#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) +#define __assume_page_alignment __assume_aligned(PAGE_SIZE) + /* * Kmalloc array related definitions */ @@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags); -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags); +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment; +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment; void kmem_cache_free(struct kmem_cache *, void *); /* @@ -301,8 +319,8 @@ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node); -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { @@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t); +extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment; #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size); + int node, size_t size) __assume_slab_alignment; #else static __always_inline void * kmem_cache_alloc_node_trace(struct kmem_cache *s, @@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #else static __always_inline void * kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) @@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return __kmalloc_node(size, flags, node); } -/* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. - */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - struct memcg_cache_array { struct rcu_head rcu; struct kmem_cache *entries[0]; -- cgit v1.2.3 From 5cf6a51e6062afe7cc507f32f1e5f7e6497ae844 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 20 Nov 2015 15:56:53 -0800 Subject: configfs: allow dynamic group creation This patchset introduces IIO software triggers, offers a way of configuring them via configfs and adds the IIO hrtimer based interrupt source to be used with software triggers. The architecture is now split in 3 parts, to remove all IIO trigger specific parts from IIO configfs core: (1) IIO configfs - creates the root of the IIO configfs subsys. (2) IIO software triggers - software trigger implementation, dynamically creating /config/iio/triggers group. (3) IIO hrtimer trigger - is the first interrupt source for software triggers (with syfs to follow). Each trigger type can implement its own set of attributes. Lockdep seems to be happy with the locking in configfs patch. This patch (of 5): We don't want to hardcode default groups at subsystem creation time. We export: * configfs_register_group * configfs_unregister_group to allow drivers to programatically create/destroy groups later, after module init time. This is needed for IIO configfs support. (akpm: the other 4 patches to be merged via the IIO tree) Signed-off-by: Daniel Baluta Suggested-by: Lars-Peter Clausen Reviewed-by: Christoph Hellwig Acked-by: Joel Becker Cc: Hartmut Knaack Cc: Octavian Purdila Cc: Paul Bolle Cc: Adriana Reus Cc: Cristina Opriceana Cc: Peter Meerwald Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index a8a335b7fce0..758a029011b1 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro int configfs_register_subsystem(struct configfs_subsystem *subsys); void configfs_unregister_subsystem(struct configfs_subsystem *subsys); +int configfs_register_group(struct config_group *parent_group, + struct config_group *group); +void configfs_unregister_group(struct config_group *group); + +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type); +void configfs_unregister_default_group(struct config_group *group); + /* These functions can sleep and can alloc with GFP_KERNEL */ /* WARNING: These cannot be called underneath configfs callbacks!! */ int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); -- cgit v1.2.3 From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 20 Nov 2015 15:57:21 -0800 Subject: kernel/signal.c: unexport sigsuspend() sigsuspend() is nowhere used except in signal.c itself, so we can mark it static do not pollute the global namespace. But this patch is more than a boring cleanup patch, it fixes a real issue on UserModeLinux. UML has a special console driver to display ttys using xterm, or other terminal emulators, on the host side. Vegard reported that sometimes UML is unable to spawn a xterm and he's facing the following warning: WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0() It turned out that this warning makes absolutely no sense as the UML xterm code calls sigsuspend() on the host side, at least it tries. But as the kernel itself offers a sigsuspend() symbol the linker choose this one instead of the glibc wrapper. Interestingly this code used to work since ever but always blocked signals on the wrong side. Some recent kernel change made the WARN_ON() trigger and uncovered the bug. It is a wonderful example of how much works by chance on computers. :-) Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()") Signed-off-by: Richard Weinberger Reported-by: Vegard Nossum Tested-by: Vegard Nossum Acked-by: Oleg Nesterov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index ab1e0392b5ac..92557bbce7e7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -extern int sigsuspend(sigset_t *); struct sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION -- cgit v1.2.3 From 21fa8442799945beaca074cb5bcf7cfe24969d59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Nov 2015 15:57:32 -0800 Subject: mm: fix up sparse warning in gfpflags_allow_blocking sparse says: include/linux/gfp.h:274:26: warning: incorrect type in return expression (different base types) include/linux/gfp.h:274:26: expected bool include/linux/gfp.h:274:26: got restricted gfp_t ...add a forced cast to silence the warning. Signed-off-by: Jeff Layton Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6523109e136d..8942af0813e3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { - return gfp_flags & __GFP_DIRECT_RECLAIM; + return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); } #ifdef CONFIG_HIGHMEM -- cgit v1.2.3 From 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 08:52:31 -0500 Subject: tty: audit: Fix audit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data to audit/record is in the 'from' buffer (ie., the input read buffer). Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode") Cc: stable # 4.1+ Cc: Miloslav Trmač Signed-off-by: Peter Hurley Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5b04b0a5375b..5e31f1b99037 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) -- cgit v1.2.3 From 865762a8119e74b5f0e236d2d8eaaf8be9292a06 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:58 -0800 Subject: slab/slub: adjust kmem_cache_alloc_bulk API Adjust kmem_cache_alloc_bulk API before we have any real users. Adjust API to return type 'int' instead of previously type 'bool'. This is done to allow future extension of the bulk alloc API. A future extension could be to allow SLUB to stop at a page boundary, when specified by a flag, and then return the number of objects. The advantage of this approach, would make it easier to make bulk alloc run without local IRQs disabled. With an approach of cmpxchg "stealing" the entire c->freelist or page->freelist. To avoid overshooting we would stop processing at a slab-page boundary. Else we always end up returning some objects at the cost of another cmpxchg. To keep compatible with future users of this API linking against an older kernel when using the new flag, we need to return the number of allocated objects with this API change. Signed-off-by: Jesper Dangaard Brouer Cc: Vladimir Davydov Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 96940772bb92..2037a861e367 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; -- cgit v1.2.3 From 614e4c4ebc75517295bccd29b20ddbc5b52af6fc Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 12 Nov 2015 11:00:04 +0100 Subject: perf/core: Robustify the perf_cgroup_from_task() RCU checks This patch reinforces the lockdep checks performed by perf_cgroup_from_tsk() by passing the perf_event_context whenever possible. It is okay to not hold the RCU read lock when we know we hold the ctx->lock. This patch makes sure this property holds. In some functions, such as perf_cgroup_sched_in(), we do not pass the context because we are sure we are holding the RCU read lock. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: edumazet@google.com Link: http://lkml.kernel.org/r/1447322404-10920-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d841d33bcdc9..f9828a48f16a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -697,9 +697,11 @@ struct perf_cgroup { * if there is no cgroup event for the current CPU context. */ static inline struct perf_cgroup * -perf_cgroup_from_task(struct task_struct *task) +perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) { - return container_of(task_css(task, perf_event_cgrp_id), + return container_of(task_css_check(task, perf_event_cgrp_id, + ctx ? lockdep_is_held(&ctx->lock) + : true), struct perf_cgroup, css); } #endif /* CONFIG_CGROUP_PERF */ -- cgit v1.2.3 From 90eec103b96e30401c0b846045bf8a1c7159b6da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2015 11:08:45 +0100 Subject: treewide: Remove old email address There were still a number of references to my old Red Hat email address in the kernel source. Remove these while keeping the Red Hat copyright notices intact. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 2 +- include/linux/jump_label.h | 2 +- include/linux/lockdep.h | 2 +- include/linux/proportions.h | 2 +- include/linux/uprobes.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index db284bff29dc..9dbb739cafa0 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -5,7 +5,7 @@ * Copyright 2001 Red Hat, Inc. * Based on code from mm/memory.c Copyright Linus Torvalds and others. * - * Copyright 2011 Red Hat, Inc., Peter Zijlstra + * Copyright 2011 Red Hat, Inc., Peter Zijlstra * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 8dde55974f18..0536524bb9eb 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -5,7 +5,7 @@ * Jump label support * * Copyright (C) 2009-2012 Jason Baron - * Copyright (C) 2011-2012 Peter Zijlstra + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 70400dc7660f..c57e424d914b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -2,7 +2,7 @@ * Runtime locking correctness validator * * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * * see Documentation/locking/lockdep-design.txt for more details. */ diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 5440f64d2942..21221338ad18 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -1,7 +1,7 @@ /* * FLoating proportions * - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * * This file contains the public data structure and API definitions. */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 0bdc72f36905..4a29c75b146e 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -21,7 +21,7 @@ * Authors: * Srikar Dronamraju * Jim Keniston - * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra */ #include -- cgit v1.2.3 From 7d267278a9ece963d77eefec61630223fce08c6c Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Fri, 20 Nov 2015 22:07:23 +0000 Subject: unix: avoid use-after-free in ep_remove_wait_queue Rainer Weikusat writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b36d837c701e..2a91a0561a47 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -62,6 +62,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; + wait_queue_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) -- cgit v1.2.3 From c86b3de8c8b02d7e474fdc002c8df533b844524c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Nov 2015 17:48:52 +0100 Subject: thermal: fix thermal_zone_bind_cooling_device prototype When the prototype for thermal_zone_bind_cooling_device changed, the static inline wrapper function was left alone, which in theory can cause build warnings: I have seen this error in the past: drivers/thermal/db8500_thermal.c: In function 'db8500_cdev_bind': drivers/thermal/db8500_thermal.c:78:9: error: too many arguments to function 'thermal_zone_bind_cooling_device' ret = thermal_zone_bind_cooling_device(thermal, i, cdev, while this one no longer shows up, there is no doubt that the prototype is still wrong, so let's just fix it anyway. Signed-off-by: Arnd Bergmann Fixes: 6cd9e9f629f1 ("thermal: of: fix cooling device weights in device tree") Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 4014a59828fc..613c29bd6baf 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -438,7 +438,8 @@ static inline void thermal_zone_device_unregister( static inline int thermal_zone_bind_cooling_device( struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower) + unsigned long upper, unsigned long lower, + unsigned int weight) { return -ENODEV; } static inline int thermal_zone_unbind_cooling_device( struct thermal_zone_device *tz, int trip, -- cgit v1.2.3 From 0f42a6a9b807b092841f7e1b381f8c7e80a0d86a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Nov 2015 09:38:48 +0100 Subject: nfs: use btrfs ioctl defintions for clone The NFS CLONE_RANGE defintion was wrong and thus never worked. Fix this by simply using the btrfs ioctl defintion. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h index 654bae3f1a38..5e6296160361 100644 --- a/include/uapi/linux/nfs.h +++ b/include/uapi/linux/nfs.h @@ -33,17 +33,6 @@ #define NFS_PIPE_DIRNAME "nfs" -/* NFS ioctls */ -/* Let's follow btrfs lead on CLONE to avoid messing userspace */ -#define NFS_IOC_CLONE _IOW(0x94, 9, int) -#define NFS_IOC_CLONE_RANGE _IOW(0x94, 13, int) - -struct nfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_off, count; - __u64 dst_off; -}; - /* * NFS stats. The good thing with these values is that NFSv3 errors are * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which -- cgit v1.2.3 From 91ab4b4d16e6649fbbf65f303c0c4e20ed680bd1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Nov 2015 14:30:26 -0500 Subject: nfs: use sliding delay when LAYOUTGET gets NFS4ERR_DELAY When LAYOUTGET gets NFS4ERR_DELAY, we currently will wait 15s before retrying the call. That is a _very_ long time, so add a timeout value to struct nfs4_layoutget and pass nfs4_async_handle_error a pointer to it. This allows the RPC engine to use a sliding delay window, instead of a 15s delay. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 570d630f98ae..11bbae44f4cb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -251,6 +251,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_res res; struct rpc_cred *cred; gfp_t gfp_flags; + long timeout; }; struct nfs4_getdeviceinfo_args { -- cgit v1.2.3 From c3ede03c881ca8ad618ad52c82b44ecb72c6e408 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 9 Nov 2015 16:43:09 +0100 Subject: gpu: ipu-v3: drop unused dmfc field from client platform data This field is never used, drop it. Signed-off-by: Philipp Zabel --- include/video/imx-ipu-v3.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 85dedca3dcfb..eeba75395f7d 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -343,7 +343,6 @@ struct ipu_client_platformdata { int di; int dc; int dp; - int dmfc; int dma[2]; }; -- cgit v1.2.3 From 0e3dfda91d9fe8e2c4d0b5d21434b173a241eeaf Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 24 Nov 2015 16:23:05 +0100 Subject: KVM: arm/arm64: arch_timer: Preserve physical dist. active state on LR.active We were incorrectly removing the active state from the physical distributor on the timer interrupt when the timer output level was deasserted. We shouldn't be doing this without considering the virtual interrupt's active state, because the architecture requires that when an LR has the HW bit set and the pending or active bits set, then the physical interrupt must also have the corresponding bits set. This addresses an issue where we have been observing an inconsistency between the LR state and the physical distributor state where the LR state was active and the physical distributor was not active, which shouldn't happen. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 9c747cb14ad8..d2f41477f8ae 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -342,10 +342,10 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, struct irq_phys_map *map, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) -- cgit v1.2.3 From 264640fc2c5f4f913db5c73fa3eb1ead2c45e9d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 24 Nov 2015 15:07:11 +0100 Subject: ipv6: distinguish frag queues by device for multicast and link-local packets If a fragmented multicast packet is received on an ethernet device which has an active macvlan on top of it, each fragment is duplicated and received both on the underlying device and the macvlan. If some fragments for macvlan are processed before the whole packet for the underlying device is reassembled, the "overlapping fragments" test in ip6_frag_queue() discards the whole fragment queue. To resolve this, add device ifindex to the search key and require it to match reassembling multicast packets and packets to link-local addresses. Note: similar patch has been already submitted by Yoshifuji Hideaki in http://patchwork.ozlabs.org/patch/220979/ but got lost and forgotten for some reason. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e1a10b0ac0b0..ea5a13ef85a6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -490,6 +490,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + int iif; u8 ecn; }; -- cgit v1.2.3 From fbc416ff86183e2203cdf975e2881d7c164b0271 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Nov 2015 12:12:21 +0100 Subject: arm64: fix building without CONFIG_UID16 As reported by Michal Simek, building an ARM64 kernel with CONFIG_UID16 disabled currently fails because the system call table still needs to reference the individual function entry points that are provided by kernel/sys_ni.c in this case, and the declarations are hidden inside of #ifdef CONFIG_UID16: arch/arm64/include/asm/unistd32.h:57:8: error: 'sys_lchown16' undeclared here (not in a function) __SYSCALL(__NR_lchown, sys_lchown16) I believe this problem only exists on ARM64, because older architectures tend to not need declarations when their system call table is built in assembly code, while newer architectures tend to not need UID16 support. ARM64 only uses these system calls for compatibility with 32-bit ARM binaries. This changes the CONFIG_UID16 check into CONFIG_HAVE_UID16, which is set unconditionally on ARM64 with CONFIG_COMPAT, so we see the declarations whenever we need them, but otherwise the behavior is unchanged. Fixes: af1839eb4bd4 ("Kconfig: clean up the long arch list for the UID16 config option") Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- include/linux/syscalls.h | 2 +- include/linux/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a156b82dd14c..c2b66a277e98 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename, asmlinkage long sys_lchown(const char __user *filename, uid_t user, gid_t group); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 asmlinkage long sys_chown16(const char __user *filename, old_uid_t user, old_gid_t group); asmlinkage long sys_lchown16(const char __user *filename, diff --git a/include/linux/types.h b/include/linux/types.h index 70d8500bddf1..70dd3dfde631 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -35,7 +35,7 @@ typedef __kernel_gid16_t gid16_t; typedef unsigned long uintptr_t; -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ typedef __kernel_old_uid_t old_uid_t; typedef __kernel_old_gid_t old_gid_t; -- cgit v1.2.3 From c9da161c6517ba12154059d3b965c2cbaf16f90f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 24 Nov 2015 21:28:15 +0100 Subject: bpf: fix clearing on persistent program array maps Currently, when having map file descriptors pointing to program arrays, there's still the issue that we unconditionally flush program array contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens when such a file descriptor is released and is independent of the map's refcount. Having this flush independent of the refcount is for a reason: there can be arbitrary complex dependency chains among tail calls, also circular ones (direct or indirect, nesting limit determined during runtime), and we need to make sure that the map drops all references to eBPF programs it holds, so that the map's refcount can eventually drop to zero and initiate its freeing. Btw, a walk of the whole dependency graph would not be possible for various reasons, one being complexity and another one inconsistency, i.e. new programs can be added to parts of the graph at any time, so there's no guaranteed consistent state for the time of such a walk. Now, the program array pinning itself works, but the issue is that each derived file descriptor on close would nevertheless call unconditionally into bpf_fd_array_map_clear(). Instead, keep track of users and postpone this flush until the last reference to a user is dropped. As this only concerns a subset of references (f.e. a prog array could hold a program that itself has reference on the prog array holding it, etc), we need to track them separately. Short analysis on the refcounting: on map creation time usercnt will be one, so there's no change in behaviour for bpf_map_release(), if unpinned. If we already fail in map_create(), we are immediately freed, and no file descriptor has been made public yet. In bpf_obj_pin_user(), we need to probe for a possible map in bpf_fd_probe_obj() already with a usercnt reference, so before we drop the reference on the fd with fdput(). Therefore, if actual pinning fails, we need to drop that reference again in bpf_any_put(), otherwise we keep holding it. When last reference drops on the inode, the bpf_any_put() in bpf_evict_inode() will take care of dropping the usercnt again. In the bpf_obj_get_user() case, the bpf_any_get() will grab a reference on the usercnt, still at a time when we have the reference on the path. Should we later on fail to grab a new file descriptor, bpf_any_put() will drop it, otherwise we hold it until bpf_map_release() time. Joint work with Alexei. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index de464e6683b6..83d1926c61e4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,7 @@ struct bpf_map { struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; + atomic_t usercnt; }; struct bpf_map_type_list { @@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); -struct bpf_map *bpf_map_get(u32 ufd); +struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); +void bpf_map_inc(struct bpf_map *map, bool uref); +void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From 7c7a0e945349a3d0d497d7f32db6ed33d4031110 Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Wed, 11 Nov 2015 09:12:25 +0800 Subject: ARM/PCI: Move align_resource function pointer to pci_host_bridge structure Commit b3a72384fe29 ("ARM/PCI: Replace pci_sys_data->align_resource with global function pointer") introduced an ARM-specific align_resource() function pointer. This is not portable to other arches and doesn't work for platforms with two different PCIe host bridge controllers. Move the function pointer to the pci_host_bridge structure so each host bridge driver can specify its own align_resource() function. Signed-off-by: Gabriele Paoloni Signed-off-by: Bjorn Helgaas Reviewed-by: Arnd Bergmann --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index e828e7b4afec..6ae25aae88fd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -412,9 +412,18 @@ struct pci_host_bridge { void (*release_fn)(struct pci_host_bridge *); void *release_data; unsigned int ignore_reset_delay:1; /* for entire hierarchy */ + /* Resource alignment requirements */ + resource_size_t (*align_resource)(struct pci_dev *dev, + const struct resource *res, + resource_size_t start, + resource_size_t size, + resource_size_t align); }; #define to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev) + +struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); + void pci_set_host_bridge_release(struct pci_host_bridge *bridge, void (*release_fn)(struct pci_host_bridge *), void *release_data); -- cgit v1.2.3 From ca369d51b3e1649be4a72addd6d6a168cfb3f537 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 13 Nov 2015 16:46:48 -0500 Subject: block/sd: Fix device-imposed transfer length limits Commit 4f258a46346c ("sd: Fix maximum I/O size for BLOCK_PC requests") had the unfortunate side-effect of removing an implicit clamp to BLK_DEF_MAX_SECTORS for REQ_TYPE_FS requests in the block layer code. This caused problems for some SMR drives. Debugging this issue revealed a few problems with the existing infrastructure since the block layer didn't know how to deal with device-imposed limits, only limits set by the I/O controller. - Introduce a new queue limit, max_dev_sectors, which is used by the ULD to signal the maximum sectors for a REQ_TYPE_FS request. - Ensure that max_dev_sectors is correctly stacked and taken into account when overriding max_sectors through sysfs. - Rework sd_read_block_limits() so it saves the max_xfer and opt_xfer values for later processing. - In sd_revalidate() set the queue's max_dev_sectors based on the MAXIMUM TRANSFER LENGTH value in the Block Limits VPD. If this value is not reported, fall back to a cap based on the CDB TRANSFER LENGTH field size. - In sd_revalidate(), use OPTIMAL TRANSFER LENGTH from the Block Limits VPD--if reported and sane--to signal the preferred device transfer size for FS requests. Otherwise use BLK_DEF_MAX_SECTORS. - blk_limits_max_hw_sectors() is no longer used and can be removed. Signed-off-by: Martin K. Petersen Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=93581 Reviewed-by: Christoph Hellwig Tested-by: sweeneygj@gmx.com Tested-by: Arzeets Tested-by: David Eisner Tested-by: Mario Kicherer Signed-off-by: Martin K. Petersen --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 38a5ff772a37..9dacb745fa96 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -253,6 +253,7 @@ struct queue_limits { unsigned long virt_boundary_mask; unsigned int max_hw_sectors; + unsigned int max_dev_sectors; unsigned int chunk_sectors; unsigned int max_sectors; unsigned int max_segment_size; @@ -948,7 +949,6 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *, extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); -extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); -- cgit v1.2.3 From 057085e522f8bf94c2e691a5b76880f68060f8ba Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 5 Nov 2015 23:37:59 -0800 Subject: target: Fix race for SCF_COMPARE_AND_WRITE_POST checking This patch addresses a race + use after free where the first stage of COMPARE_AND_WRITE in compare_and_write_callback() is rescheduled after the backend sends the secondary WRITE, resulting in second stage compare_and_write_post() callback completing in target_complete_ok_work() before the first can return. Because current code depends on checking se_cmd->se_cmd_flags after return from se_cmd->transport_complete_callback(), this results in first stage having SCF_COMPARE_AND_WRITE_POST set, which incorrectly falls through into second stage CAW processing code, eventually triggering a NULL pointer dereference due to use after free. To address this bug, pass in a new *post_ret parameter into se_cmd->transport_complete_callback(), and depend upon this value instead of ->se_cmd_flags to determine when to return or fall through into ->queue_status() code for CAW. Cc: Sagi Grimberg Cc: # v3.12+ Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 0a2c74008e53..aabf0aca0171 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -474,7 +474,7 @@ struct se_cmd { struct completion cmd_wait_comp; const struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); - sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); + sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *); void *protocol_data; unsigned char *t_task_cdb; -- cgit v1.2.3 From 3a66d7dca186ebdef9b0bf55e216778fa598062c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Oct 2015 16:02:14 -0700 Subject: kref: Remove kref_put_spinlock_irqsave() The last user is gone. Hence remove this function. Signed-off-by: Bart Van Assche Cc: Greg Kroah-Hartman Cc: Christoph Hellwig Cc: Joern Engel Signed-off-by: Nicholas Bellinger --- include/linux/kref.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include') diff --git a/include/linux/kref.h b/include/linux/kref.h index 484604d184be..e15828fd71f1 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -19,7 +19,6 @@ #include #include #include -#include struct kref { atomic_t refcount; @@ -99,38 +98,6 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) return kref_sub(kref, 1, release); } -/** - * kref_put_spinlock_irqsave - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the - * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. - * @lock: lock to take in release case - * - * Behaves identical to kref_put with one exception. If the reference count - * drops to zero, the lock will be taken atomically wrt dropping the reference - * count. The release function has to call spin_unlock() without _irqrestore. - */ -static inline int kref_put_spinlock_irqsave(struct kref *kref, - void (*release)(struct kref *kref), - spinlock_t *lock) -{ - unsigned long flags; - - WARN_ON(release == NULL); - if (atomic_add_unless(&kref->refcount, -1, 1)) - return 0; - spin_lock_irqsave(lock, flags); - if (atomic_dec_and_test(&kref->refcount)) { - release(kref); - local_irq_restore(flags); - return 1; - } - spin_unlock_irqrestore(lock, flags); - return 0; -} - static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) -- cgit v1.2.3 From 08236c6bb2980561fba657c58fdc76f2865f236c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sat, 28 Nov 2015 16:49:27 +0100 Subject: lightnvm: unconverted ppa returned in get_bb_tbl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get_bb_tbl function takes ppa as a generic address, which is converted to the ppa device address within the device driver. When the update_bbtbl callback is called from get_bb_tbl, the device specific ppa is used, instead of the generic ppa. Make sure to pass the generic ppa. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 3db5552b17d5..c6916aec43b6 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -179,7 +179,7 @@ typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int, +typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, nvm_bb_update_fn *, void *); typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); -- cgit v1.2.3 From bf4e6b4e757488dee1b6a581f49c7ac34cd217f8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 26 Nov 2015 08:46:57 +0100 Subject: block: Always check queue limits for cloned requests When a cloned request is retried on other queues it always needs to be checked against the queue limits of that queue. Otherwise the calculations for nr_phys_segments might be wrong, leading to a crash in scsi_init_sgtable(). To clarify this the patch renames blk_rq_check_limits() to blk_cloned_rq_check_limits() and removes the symbol export, as the new function should only be used for cloned requests and never exported. Cc: Mike Snitzer Cc: Ewan Milne Cc: Jeff Moyer Signed-off-by: Hannes Reinecke Fixes: e2a60da74 ("block: Clean up special command handling logic") Cc: stable@vger.kernel.org # 3.7+ Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c0d2b7927c1f..c06f8eaa42ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -773,7 +773,6 @@ extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); -extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, -- cgit v1.2.3 From 880621c2605b82eb5af91a2c94223df6f5a3fb64 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 22 Nov 2015 17:46:09 +0100 Subject: packet: Allow packets with only a header (but no payload) Commit 9c7077622dd91 ("packet: make packet_snd fail on len smaller than l2 header") added validation for the packet size in packet_snd. This change enforces that every packet needs a header (with at least hard_header_len bytes) plus a payload with at least one byte. Before this change the payload was optional. This fixes PPPoE connections which do not have a "Service" or "Host-Uniq" configured (which is violating the spec, but is still widely used in real-world setups). Those are currently failing with the following message: "pppd: packet size is too short (24 <= 24)" Signed-off-by: Martin Blumenstingl Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bfac1abfc1..3b5d134e945a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1398,7 +1398,8 @@ enum netdev_priv_flags { * @dma: DMA channel * @mtu: Interface MTU value * @type: Interface hardware type - * @hard_header_len: Hardware header length + * @hard_header_len: Hardware header length, which means that this is the + * minimum size of a packet. * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed -- cgit v1.2.3 From 304d888b29cf96f1dd53511ee686499cd8cdf249 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 27 Nov 2015 18:17:05 +0100 Subject: Revert "ipv6: ndisc: inherit metadata dst when creating ndisc requests" This reverts commit ab450605b35caa768ca33e86db9403229bf42be4. In IPv6, we cannot inherit the dst of the original dst. ndisc packets are IPv6 packets and may take another route than the original packet. This patch breaks the following scenario: a packet comes from eth0 and is forwarded through vxlan1. The encapsulated packet triggers an NS which cannot be sent because of the wrong route. CC: Jiri Benc CC: Thomas Graf Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ndisc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index bf3937431030..2d8edaad29cb 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -181,8 +181,7 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - struct sk_buff *oskb); + const struct in6_addr *daddr, const struct in6_addr *saddr); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); -- cgit v1.2.3 From 9cd3e072b0be17446e37d7414eac8a3499e0601e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:10 -0800 Subject: net: rename SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA This patch is a cleanup to make following patch easier to review. Goal is to move SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA from (struct socket)->flags to a (struct socket_wq)->flags to benefit from RCU protection in sock_wake_async() To ease backports, we rename both constants. Two new helpers, sk_set_bit(int nr, struct sock *sk) and sk_clear_bit(int net, struct sock *sk) are added so that following patch can change their implementation. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 6 +++--- include/net/sock.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 70ac5e28e6b7..f514e4dd5521 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,8 +34,8 @@ struct inode; struct file; struct net; -#define SOCK_ASYNC_NOSPACE 0 -#define SOCK_ASYNC_WAITDATA 1 +#define SOCKWQ_ASYNC_NOSPACE 0 +#define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 @@ -96,7 +96,7 @@ struct socket_wq { * struct socket - general BSD socket * @state: socket state (%SS_CONNECTED, etc) * @type: socket type (%SOCK_STREAM, etc) - * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) + * @flags: socket flags (%SOCK_NOSPACE, etc) * @ops: protocol specific socket operations * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..c155d09d8af4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2005,6 +2005,16 @@ static inline unsigned long sock_wspace(struct sock *sk) return amt; } +static inline void sk_set_bit(int nr, struct sock *sk) +{ + set_bit(nr, &sk->sk_socket->flags); +} + +static inline void sk_clear_bit(int nr, struct sock *sk) +{ + clear_bit(nr, &sk->sk_socket->flags); +} + static inline void sk_wake_async(struct sock *sk, int how, int band) { if (sock_flag(sk, SOCK_FASYNC)) -- cgit v1.2.3 From ceb5d58b217098a657f3850b7a2640f995032e62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 20:03:11 -0800 Subject: net: fix sock_wake_async() rcu protection Dmitry provided a syzkaller (http://github.com/google/syzkaller) triggering a fault in sock_wake_async() when async IO is requested. Said program stressed af_unix sockets, but the issue is generic and should be addressed in core networking stack. The problem is that by the time sock_wake_async() is called, we should not access the @flags field of 'struct socket', as the inode containing this socket might be freed without further notice, and without RCU grace period. We already maintain an RCU protected structure, "struct socket_wq" so moving SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA into it is the safe route. It also reduces number of cache lines needing dirtying, so might provide a performance improvement anyway. In followup patches, we might move remaining flags (SOCK_NOSPACE, SOCK_PASSCRED, SOCK_PASSSEC) to save 8 bytes and let 'struct socket' being mostly read and let it being shared between cpus. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 7 ++++++- include/net/sock.h | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index f514e4dd5521..0b4ac7da583a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -34,6 +34,10 @@ struct inode; struct file; struct net; +/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located + * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. + * Eventually all flags will be in sk->sk_wq_flags. + */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 @@ -89,6 +93,7 @@ struct socket_wq { /* Note: wait MUST be first field of socket_wq */ wait_queue_head_t wait; struct fasync_struct *fasync_list; + unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */ struct rcu_head rcu; } ____cacheline_aligned_in_smp; @@ -202,7 +207,7 @@ enum { SOCK_WAKE_URG, }; -int sock_wake_async(struct socket *sk, int how, int band); +int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, diff --git a/include/net/sock.h b/include/net/sock.h index c155d09d8af4..0434138c5f95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -384,8 +384,10 @@ struct sock { int sk_rcvbuf; struct sk_filter __rcu *sk_filter; - struct socket_wq __rcu *sk_wq; - + union { + struct socket_wq __rcu *sk_wq; + struct socket_wq *sk_wq_raw; + }; #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif @@ -2005,20 +2007,27 @@ static inline unsigned long sock_wspace(struct sock *sk) return amt; } +/* Note: + * We use sk->sk_wq_raw, from contexts knowing this + * pointer is not NULL and cannot disappear/change. + */ static inline void sk_set_bit(int nr, struct sock *sk) { - set_bit(nr, &sk->sk_socket->flags); + set_bit(nr, &sk->sk_wq_raw->flags); } static inline void sk_clear_bit(int nr, struct sock *sk) { - clear_bit(nr, &sk->sk_socket->flags); + clear_bit(nr, &sk->sk_wq_raw->flags); } -static inline void sk_wake_async(struct sock *sk, int how, int band) +static inline void sk_wake_async(const struct sock *sk, int how, int band) { - if (sock_flag(sk, SOCK_FASYNC)) - sock_wake_async(sk->sk_socket, how, band); + if (sock_flag(sk, SOCK_FASYNC)) { + rcu_read_lock(); + sock_wake_async(rcu_dereference(sk->sk_wq), how, band); + rcu_read_unlock(); + } } /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might -- cgit v1.2.3 From 64031e3e8a5c042840c5123af695eec89f9e6a24 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Wed, 2 Dec 2015 15:44:22 +0800 Subject: ACPI / property: fix compile error for acpi_node_get_property_reference() when CONFIG_ACPI=n In commit 60ba032ed76e ("ACPI / property: Drop size_prop from acpi_dev_get_property_reference()"), the argument "const char *cells_name" was dropped, but forgot to update the stub function in no-ACPI case, it will lead to compile error when CONFIG_ACPI=n, easliy remove "const char *cells_name" to fix it. Fixes: 60ba032ed76e "ACPI / property: Drop size_prop from acpi_dev_get_property_reference()" Reported-by: Kejian Yan Signed-off-by: Hanjun Guo Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 865d948c60e6..9e6f4bb4692f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -782,8 +782,8 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, } static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, - const char *name, const char *cells_name, - size_t index, struct acpi_reference_args *args) + const char *name, size_t index, + struct acpi_reference_args *args) { return -ENXIO; } -- cgit v1.2.3 From 69030dd1c3671625c6f766af0b64a4bb4409ac3b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 1 Dec 2015 16:52:14 -0800 Subject: cpufreq: use last policy after online for drivers with ->setpolicy For cpufreq drivers which use setpolicy interface, after offline->online the policy is set to default. This can be reproduced by setting the default policy of intel_pstate or longrun to ondemand and then change to "performance". After offline and online, the setpolicy will be called with the policy=ondemand. For drivers using governors this condition is handled by storing last_governor, during offline and restoring during online. The same should be done for drivers using setpolicy interface. Storing last_policy during offline and restoring during online. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ef4c5b1a860f..177c7680c1a8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -77,6 +77,7 @@ struct cpufreq_policy { unsigned int suspend_freq; /* freq to set during suspend */ unsigned int policy; /* see above */ + unsigned int last_policy; /* policy before unplug */ struct cpufreq_governor *governor; /* see below */ void *governor_data; bool governor_enabled; /* governor start/stop flag */ -- cgit v1.2.3 From 45f6fad84cc305103b28d73482b344d7f5b76f39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 19:37:57 -0800 Subject: ipv6: add complete rcu protection around np->opt This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/ipv6.h | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0ef2a97ccdb5..402753bccafa 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -227,7 +227,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ea5a13ef85a6..9a5c9f013784 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -217,7 +218,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -252,6 +253,24 @@ struct ipv6_fl_socklist { struct rcu_head rcu; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, -- cgit v1.2.3 From 38ee8fb67c3457f36f5137073c4b8ac2436d2393 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 30 Nov 2015 12:17:06 -0200 Subject: sctp: convert sack_needed and sack_generation to bits They don't need to be any bigger than that and with this we start a new bitfield for tracking association runtime stuff, like zero window situation. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 495c87e367b3..7bbb71081aeb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,10 +775,10 @@ struct sctp_transport { hb_sent:1, /* Is the Path MTU update pending on this tranport */ - pmtu_pending:1; + pmtu_pending:1, - /* Has this transport moved the ctsn since we last sacked */ - __u32 sack_generation; + /* Has this transport moved the ctsn since we last sacked */ + sack_generation:1; u32 dst_cookie; struct flowi fl; @@ -1482,19 +1482,19 @@ struct sctp_association { prsctp_capable:1, /* Can peer do PR-SCTP? */ auth_capable:1; /* Is peer doing SCTP-AUTH? */ - /* Ack State : This flag indicates if the next received + /* sack_needed : This flag indicates if the next received * : packet is to be responded to with a - * : SACK. This is initializedto 0. When a packet - * : is received it is incremented. If this value + * : SACK. This is initialized to 0. When a packet + * : is received sack_cnt is incremented. If this value * : reaches 2 or more, a SACK is sent and the * : value is reset to 0. Note: This is used only * : when no DATA chunks are received out of * : order. When DATA chunks are out of order, * : SACK's are not delayed (see Section 6). */ - __u8 sack_needed; /* Do we need to sack the peer? */ + __u8 sack_needed:1, /* Do we need to sack the peer? */ + sack_generation:1; __u32 sack_cnt; - __u32 sack_generation; __u32 adaptation_ind; /* Adaptation Code point. */ -- cgit v1.2.3 From 1f7dd3e5a6e4f093017fff12232572ee1aa4639b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Dec 2015 10:18:21 -0500 Subject: cgroup: fix handling of multi-destination migration from subtree_control enabling Consider the following v2 hierarchy. P0 (+memory) --- P1 (-memory) --- A \- B P0 has memory enabled in its subtree_control while P1 doesn't. If both A and B contain processes, they would belong to the memory css of P1. Now if memory is enabled on P1's subtree_control, memory csses should be created on both A and B and A's processes should be moved to the former and B's processes the latter. IOW, enabling controllers can cause atomic migrations into different csses. The core cgroup migration logic has been updated accordingly but the controller migration methods haven't and still assume that all tasks migrate to a single target css; furthermore, the methods were fed the css in which subtree_control was updated which is the parent of the target csses. pids controller depends on the migration methods to move charges and this made the controller attribute charges to the wrong csses often triggering the following warning by driving a counter negative. WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40() Modules linked in: CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29 ... ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000 ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00 ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8 Call Trace: [] dump_stack+0x4e/0x82 [] warn_slowpath_common+0x82/0xc0 [] warn_slowpath_null+0x1a/0x20 [] pids_cancel.constprop.6+0x31/0x40 [] pids_can_attach+0x6d/0xf0 [] cgroup_taskset_migrate+0x6c/0x330 [] cgroup_migrate+0xf5/0x190 [] cgroup_attach_task+0x176/0x200 [] __cgroup_procs_write+0x2ad/0x460 [] cgroup_procs_write+0x14/0x20 [] cgroup_file_write+0x35/0x1c0 [] kernfs_fop_write+0x141/0x190 [] __vfs_write+0x28/0xe0 [] vfs_write+0xac/0x1a0 [] SyS_write+0x49/0xb0 [] entry_SYSCALL_64_fastpath+0x12/0x76 This patch fixes the bug by removing @css parameter from the three migration methods, ->can_attach, ->cancel_attach() and ->attach() and updating cgroup_taskset iteration helpers also return the destination css in addition to the task being migrated. All controllers are updated accordingly. * Controllers which don't care whether there are one or multiple target csses can be converted trivially. cpu, io, freezer, perf, netclassid and netprio fall in this category. * cpuset's current implementation assumes that there's single source and destination and thus doesn't support v2 hierarchy already. The only change made by this patchset is how that single destination css is obtained. * memory migration path already doesn't do anything on v2. How the single destination css is obtained is updated and the prep stage of mem_cgroup_can_attach() is reordered to accomodate the change. * pids is the only controller which was affected by this bug. It now correctly handles multi-destination migrations and no longer causes counter underflow from incorrect accounting. Signed-off-by: Tejun Heo Reported-and-tested-by: Daniel Wagner Cc: Aleksa Sarai --- include/linux/cgroup-defs.h | 9 +++------ include/linux/cgroup.h | 33 ++++++++++++++++++++++----------- 2 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 869fd4a3d28e..06b77f9dd3f2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -422,12 +422,9 @@ struct cgroup_subsys { void (*css_reset)(struct cgroup_subsys_state *css); void (*css_e_css_changed)(struct cgroup_subsys_state *css); - int (*can_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); + int (*can_attach)(struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_taskset *tset); + void (*attach)(struct cgroup_taskset *tset); int (*can_fork)(struct task_struct *task, void **priv_p); void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f64083030ad5..cb91b44f5f78 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -120,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *css); -struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); -struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); +struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, + struct cgroup_subsys_state **dst_cssp); +struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, + struct cgroup_subsys_state **dst_cssp); void css_task_iter_start(struct cgroup_subsys_state *css, struct css_task_iter *it); @@ -236,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it); /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor + * @dst_css: the destination css * @tset: taskset to iterate * * @tset may contain multiple tasks and they may belong to multiple - * processes. When there are multiple tasks in @tset, if a task of a - * process is in @tset, all tasks of the process are in @tset. Also, all - * are guaranteed to share the same source and destination csses. + * processes. + * + * On the v2 hierarchy, there may be tasks from multiple processes and they + * may not share the source or destination csses. + * + * On traditional hierarchies, when there are multiple tasks in @tset, if a + * task of a process is in @tset, all tasks of the process are in @tset. + * Also, all are guaranteed to share the same source and destination csses. * * Iteration is not in any specific order. */ -#define cgroup_taskset_for_each(task, tset) \ - for ((task) = cgroup_taskset_first((tset)); (task); \ - (task) = cgroup_taskset_next((tset))) +#define cgroup_taskset_for_each(task, dst_css, tset) \ + for ((task) = cgroup_taskset_first((tset), &(dst_css)); \ + (task); \ + (task) = cgroup_taskset_next((tset), &(dst_css))) /** * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset * @leader: the loop cursor + * @dst_css: the destination css * @tset: takset to iterate * * Iterate threadgroup leaders of @tset. For single-task migrations, @tset * may not contain any. */ -#define cgroup_taskset_for_each_leader(leader, tset) \ - for ((leader) = cgroup_taskset_first((tset)); (leader); \ - (leader) = cgroup_taskset_next((tset))) \ +#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \ + for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \ + (leader); \ + (leader) = cgroup_taskset_next((tset), &(dst_css))) \ if ((leader) != (leader)->group_leader) \ ; \ else -- cgit v1.2.3 From 6bd4f355df2eae80b8a5c7b097371cd1e05f20d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Dec 2015 21:53:57 -0800 Subject: ipv6: kill sk_dst_lock While testing the np->opt RCU conversion, I found that UDP/IPv6 was using a mixture of xchg() and sk_dst_lock to protect concurrent changes to sk->sk_dst_cache, leading to possible corruptions and crashes. ip6_sk_dst_lookup_flow() uses sk_dst_check() anyway, so the simplest way to fix the mess is to remove sk_dst_lock completely, as we did for IPv4. __ip6_dst_store() and ip6_dst_store() share same implementation. sk_setup_caps() being called with socket lock being held or not, we have to use sk_dst_set() instead of __sk_dst_set() Note that I had to move the "np->dst_cookie = rt6_get_cookie(rt);" in ip6_dst_store() before the sk_setup_caps(sk, dst) call. This is because ip6_dst_store() can be called from process context, without any lock held. As soon as the dst is installed in sk->sk_dst_cache, dst can be freed from another cpu doing a concurrent ip6_dst_store() Doing the dst dereference before doing the install is needed to make sure no use after free would trigger. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 17 ++++------------- include/net/sock.h | 3 +-- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2bfb2ad2fab1..877f682989b8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); /* * Store a destination cache entry in a socket */ -static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct rt6_info *rt = (struct rt6_info *) dst; + np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif - np->dst_cookie = rt6_get_cookie(rt); -} - -static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) -{ - spin_lock(&sk->sk_dst_lock); - __ip6_dst_store(sk, dst, daddr, saddr); - spin_unlock(&sk->sk_dst_lock); } static inline bool ipv6_unicast_destination(const struct sk_buff *skb) diff --git a/include/net/sock.h b/include/net/sock.h index 0434138c5f95..52d27ee924f4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -254,7 +254,6 @@ struct cg_proto; * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache - * @sk_dst_lock: destination cache lock * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed @@ -393,7 +392,7 @@ struct sock { #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - spinlock_t sk_dst_lock; + /* Note: 32bit hole on 64bit arches */ atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; -- cgit v1.2.3 From 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 20:08:51 -0800 Subject: net_sched: fix qdisc_tree_decrease_qlen() races qdisc_tree_decrease_qlen() suffers from two problems on multiqueue devices. One problem is that it updates sch->q.qlen and sch->qstats.drops on the mq/mqprio root qdisc, while it should not : Daniele reported underflows errors : [ 681.774821] PAX: sch->q.qlen: 0 n: 1 [ 681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head; [ 681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G O 4.2.6.201511282239-1-grsec #1 [ 681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015 [ 681.774956] ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c [ 681.774959] ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b [ 681.774960] ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001 [ 681.774962] Call Trace: [ 681.774967] [] dump_stack+0x4c/0x7f [ 681.774970] [] report_size_overflow+0x34/0x50 [ 681.774972] [] qdisc_tree_decrease_qlen+0x152/0x160 [ 681.774976] [] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel] [ 681.774978] [] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel] [ 681.774980] [] __qdisc_run+0x4d/0x1d0 [ 681.774983] [] net_tx_action+0xc2/0x160 [ 681.774985] [] __do_softirq+0xf1/0x200 [ 681.774987] [] run_ksoftirqd+0x1e/0x30 [ 681.774989] [] smpboot_thread_fn+0x150/0x260 [ 681.774991] [] ? sort_range+0x40/0x40 [ 681.774992] [] kthread+0xe4/0x100 [ 681.774994] [] ? kthread_worker_fn+0x170/0x170 [ 681.774995] [] ret_from_fork+0x3e/0x70 mq/mqprio have their own ways to report qlen/drops by folding stats on all their queues, with appropriate locking. A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup() without proper locking : concurrent qdisc updates could corrupt the list that qdisc_match_from_root() parses to find a qdisc given its handle. Fix first problem adding a TCQ_F_NOPARENT qdisc flag that qdisc_tree_decrease_qlen() can use to abort its tree traversal, as soon as it meets a mq/mqprio qdisc children. Second problem can be fixed by RCU protection. Qdisc are already freed after RCU grace period, so qdisc_list_add() and qdisc_list_del() simply have to use appropriate rcu list variants. A future patch will add a per struct netdev_queue list anchor, so that qdisc_tree_decrease_qlen() can have more efficient lookups. Reported-by: Daniele Fucini Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4c79ce8c1f92..b2a8e6338576 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,6 +61,9 @@ struct Qdisc { */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : + * qdisc_tree_decrease_qlen() should stop. + */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; -- cgit v1.2.3 From a0af2e538c80f3e47f1d6ddf120a153ad909e8ad Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 2 Dec 2015 09:24:46 -0800 Subject: drm: Fix an unwanted master inheritance v2 A client calling drmSetMaster() using a file descriptor that was opened when another client was master would inherit the latter client's master object and all its authenticated clients. This is unwanted behaviour, and when this happens, instead allocate a brand new master object for the client calling drmSetMaster(). Fixes a BUG() throw in vmw_master_set(). Cc: Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/drm/drmP.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 0b921ae06cd8..441b26e846d8 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -309,6 +309,11 @@ struct drm_file { unsigned universal_planes:1; /* true if client understands atomic properties */ unsigned atomic:1; + /* + * This client is allowed to gain master privileges for @master. + * Protected by struct drm_device::master_mutex. + */ + unsigned allowed_master:1; struct pid *pid; kuid_t uid; @@ -910,6 +915,7 @@ extern int drm_open(struct inode *inode, struct file *filp); extern ssize_t drm_read(struct file *filp, char __user *buffer, size_t count, loff_t *offset); extern int drm_release(struct inode *inode, struct file *filp); +extern int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv); /* Mapping support (drm_vm.h) */ extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); -- cgit v1.2.3 From bbc8764f80eb872d2b36302882ddfc9882de4b16 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Nov 2015 17:37:31 +0100 Subject: drm/nouveau: Fix pre-nv50 pageflip events (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently pre-nv50 pageflip events happen before the actual vblank period. Therefore that functionality got semi-disabled in commit af4870e406126b7ac0ae7c7ce5751f25ebe60f28 Author: Mario Kleiner Date: Tue May 13 00:42:08 2014 +0200 drm/nouveau/kms/nv04-nv40: fix pageflip events via special case. Unfortunately that hack got uprooted in commit cc1ef118fc099295ae6aabbacc8af94d8d8885eb Author: Thierry Reding Date: Wed Aug 12 17:00:31 2015 +0200 drm/irq: Make pipe unsigned and name consistent Triggering a warning when trying to sample the vblank timestamp for a non-existing pipe. There's a few ways to fix this: - Open-code the old behaviour, which just enshrines this slight breakage of the userspace ABI. - Revert Mario's commit and again inflict broken timestamps, again not pretty. - Fix this for real by delaying the pageflip TS until the next vblank interrupt, thereby making it accurate. This patch implements the third option. Since having a page flip interrupt that happens when the pageflip gets armed and not when it completes in the next vblank seems to be fairly common (older i915 hw works very similarly) create a new helper to arm vblank events for such drivers. v2 (Mario Kleiner): - Fix function prototypes in drmP.h - Add missing vblank_put() for pageflip completion without pageflip event. - Initialize sequence number for queued pageflip event to avoid trouble in drm_handle_vblank_events(). - Remove dead code and spelling fix. v3 (Mario Kleiner): - Add a signed-off-by and cc stable tag per Ilja's advice. v4 (Thierry Reding): - Fix kerneldoc typo, discovered by Michel Dänzer - Rearrange tags and changelog Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=106431 Cc: Thierry Reding Cc: Mario Kleiner Acked-by: Ben Skeggs Cc: Ilia Mirkin Signed-off-by: Daniel Vetter Reviewed-by: Mario Kleiner Cc: stable@vger.kernel.org # v4.3 Signed-off-by: Mario Kleiner Signed-off-by: Thierry Reding Signed-off-by: Dave Airlie --- include/drm/drmP.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 441b26e846d8..0a271ca1f7c7 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -953,6 +953,10 @@ extern void drm_send_vblank_event(struct drm_device *dev, unsigned int pipe, struct drm_pending_vblank_event *e); extern void drm_crtc_send_vblank_event(struct drm_crtc *crtc, struct drm_pending_vblank_event *e); +extern void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe, + struct drm_pending_vblank_event *e); +extern void drm_crtc_arm_vblank_event(struct drm_crtc *crtc, + struct drm_pending_vblank_event *e); extern bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe); extern bool drm_crtc_handle_vblank(struct drm_crtc *crtc); extern int drm_vblank_get(struct drm_device *dev, unsigned int pipe); -- cgit v1.2.3 From ae5515d66362b9d96cdcfce504567f0b8b7bd83e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 4 Dec 2015 08:38:42 -0700 Subject: Revert: "vfio: Include No-IOMMU mode" Revert commit 033291eccbdb ("vfio: Include No-IOMMU mode") due to lack of a user. This was originally intended to fill a need for the DPDK driver, but uptake has been slow so rather than support an unproven kernel interface revert it and revisit when userspace catches up. Signed-off-by: Alex Williamson --- include/linux/vfio.h | 3 --- include/uapi/linux/vfio.h | 7 ------- 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 610a86a892b8..ddb440975382 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,9 +44,6 @@ struct vfio_device_ops { void (*request)(void *device_data, unsigned int count); }; -extern struct iommu_group *vfio_iommu_group_get(struct device *dev); -extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); - extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 751b69f858c8..9fd7b5d8df2f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -38,13 +38,6 @@ #define VFIO_SPAPR_TCE_v2_IOMMU 7 -/* - * The No-IOMMU IOMMU offers no translation or isolation for devices and - * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU - * code will taint the host kernel and should be used with extreme caution. - */ -#define VFIO_NOIOMMU_IOMMU 8 - /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between -- cgit v1.2.3 From 7c23b7c1996597dd9d60bb282fb5fa1be6ebd18b Mon Sep 17 00:00:00 2001 From: "Lu, Han" Date: Mon, 7 Dec 2015 15:59:13 +0800 Subject: ALSA: hda - Fix playback noise with 24/32 bit sample size on BXT In BXT-P A0, HD-Audio DMA requests is later than expected, and makes an audio stream sensitive to system latencies when 24/32 bits are playing. Adjusting threshold of DMA fifo to force the DMA request sooner to improve latency tolerance at the expense of power. v2: move Intel specific code to hda_intel.c Signed-off-by: Lu, Han Signed-off-by: Takashi Iwai --- include/sound/hda_register.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h index 2ae8812d7b1a..94dc6a9772e0 100644 --- a/include/sound/hda_register.h +++ b/include/sound/hda_register.h @@ -93,6 +93,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define AZX_REG_HSW_EM4 0x100c #define AZX_REG_HSW_EM5 0x1010 +/* Skylake/Broxton display HD-A controller Extended Mode registers */ +#define AZX_REG_SKL_EM4L 0x1040 + /* PCI space */ #define AZX_PCIREG_TCSEL 0x44 -- cgit v1.2.3 From ea013a9b205b47b1fcbc72522146fad560af0712 Mon Sep 17 00:00:00 2001 From: Andreas Werner Date: Fri, 4 Dec 2015 18:12:49 +0100 Subject: libata-eh.c: Introduce new ata port flag for controller which lockup on read log page Some controller lockup on a ata_read_log_page. Add new ata port flag ATA_FLAG_NO_LOG_PAGE which can used to blacklist a controller. If this flag is set, any attempt to read a log page returns an error without actually issuing the command. Signed-off-by: Andreas Werner Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 83577f8fd15b..600c1e0626a5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -210,6 +210,7 @@ enum { ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ /* (doesn't imply presence) */ ATA_FLAG_SATA = (1 << 1), + ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */ ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */ -- cgit v1.2.3 From 57b4bd06ff0372fe1e3617889c4b37fbd500364a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sun, 6 Dec 2015 11:25:47 +0100 Subject: lightnvm: comments on constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not obvious what NVM_IO_* and NVM_BLK_T_* are used for. Make sure to comment them appropriately as the other constants. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index c6916aec43b6..935ef3844c05 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -50,9 +50,16 @@ enum { NVM_IO_DUAL_ACCESS = 0x1, NVM_IO_QUAD_ACCESS = 0x2, + /* NAND Access Modes */ NVM_IO_SUSPEND = 0x80, NVM_IO_SLC_MODE = 0x100, NVM_IO_SCRAMBLE_DISABLE = 0x200, + + /* Block Types */ + NVM_BLK_T_FREE = 0x0, + NVM_BLK_T_BAD = 0x1, + NVM_BLK_T_DEV = 0x2, + NVM_BLK_T_HOST = 0x4, }; struct nvm_id_group { -- cgit v1.2.3 From 16f26c3aa9b9c36a9d1092ae3258461d1008481e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Sun, 6 Dec 2015 11:25:48 +0100 Subject: lightnvm: replace req queue with nvmdev for lld MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case where a request queue is passed to the low lever lightnvm device drive integration, the device driver might pass its admin commands through another queue. Instead pass nvm_dev, and let the low level drive the appropriate queue. Reported-by: Christoph Hellwig Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 935ef3844c05..034117b3be5f 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -183,17 +183,17 @@ struct nvm_block; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); -typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); -typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, +typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); +typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *, void *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, nvm_bb_update_fn *, void *); -typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); -typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); -typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *); -typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *); +typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); +typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); typedef void (nvm_destroy_dma_pool_fn)(void *); -typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t, +typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, dma_addr_t *); typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); -- cgit v1.2.3 From d144da8c6f51f48ec39d891ea9dff80169c45f3b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 2 Nov 2015 12:13:25 -0500 Subject: IB/core: use RCU for uverbs id lookup The current implementation gets a spin_lock, and at any scale with qib and hfi1 post send, the lock contention grows exponentially with the number of QPs. idr_find() is RCU compatibile, so read doesn't need the lock. Change to use rcu_read_lock() and rcu_read_unlock() in __idr_get_uobj(). kfree_rcu() is used to insure a grace period between the idr removal and actual free. Reviewed-by: Ira Weiny Signed-off-by: Mike Marciniszyn Reviewed-By: Jason Gunthorpe Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9a68a19532ba..120da1d7f57e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1271,6 +1271,7 @@ struct ib_uobject { int id; /* index into kernel idr */ struct kref ref; struct rw_semaphore mutex; /* protects .live */ + struct rcu_head rcu; /* kfree_rcu() overhead */ int live; }; -- cgit v1.2.3 From 533708867dd6388f643f12c87465b59e732d729d Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Fri, 13 Nov 2015 15:22:22 -0500 Subject: IB/mad: Require CM send method for everything except ClassPortInfo Receipt of CM MAD with other than the Send method for an attribute other than the ClassPortInfo attribute is invalid. CM attributes other than ClassPortInfo only use the send method. The SRP initiator does not maintain a timeout policy for CM connect requests relies on the CM layer to do that. The result was that the SRP initiator hung as the connect request never completed. A new SRP target has been observed to respond to Send CM REQ with GetResp of CM REQ with bad status. This is non conformant with IBA spec but exposes a vulnerability in the current MAD/CM code which will respond to the incoming GetResp of CM REQ as if it was a valid incoming Send of CM REQ rather than tossing this on the floor. It also causes the MAD layer not to retransmit the original REQ even though it has not received a REP. Reviewed-by: Sagi Grimberg Signed-off-by: Hal Rosenstock Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/ib_mad.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 188df91d5851..ec9b44dd3d80 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -237,6 +237,8 @@ struct ib_vendor_mad { u8 data[IB_MGMT_VENDOR_DATA]; }; +#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001) + struct ib_class_port_info { u8 base_version; u8 class_version; -- cgit v1.2.3 From a5e14ba334e202c58e45ef47414ec94c585c1a8c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 28 Oct 2015 13:28:15 +0200 Subject: mlx4: Expose correct max_sge_rd limit mlx4 devices (ConnectX-2, ConnectX-3) has a limitation where rdma read work queue entries cannot exceed 512 bytes. A rdma_read wqe needs to fit in 512 bytes: - wqe control segment (16 bytes) - rdma segment (16 bytes) - scatter elements (16 bytes each) So max_sge_rd should be: (512 - 16 - 16) / 16 = 30. Signed-off-by: Sagi Grimberg Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7501626ab529..d3133be12d92 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -426,6 +426,17 @@ enum { MLX4_MAX_FAST_REG_PAGES = 511, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX4_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + enum { MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, -- cgit v1.2.3 From 4c3141e09cfa6460bfcd5e90f73e498db654c917 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Tue, 1 Dec 2015 17:24:17 +0100 Subject: of/irq: Export of_irq_find_parent again of_irq_find_parent was made static since it had no users outside of of_irq.c. Export it again since we are going to use it again. Signed-off-by: Carlo Caione [robh: move of_irq_find_parent to correct ifdef section] Signed-off-by: Rob Herring --- include/linux/of_irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 039f2eec49ce..f648acf27ed7 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index); extern int of_irq_get_byname(struct device_node *dev, const char *name); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); +extern struct device_node *of_irq_find_parent(struct device_node *child); extern struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token); @@ -70,6 +71,11 @@ static inline int of_irq_to_resource_table(struct device_node *dev, { return 0; } +static inline void *of_irq_find_parent(struct device_node *child) +{ + return NULL; +} + static inline struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token) -- cgit v1.2.3 From eaddb5725357e9f05ffe5d271630f8197d089da4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 9 Dec 2015 09:11:10 -0600 Subject: of/irq: move of_msi_map_rid declaration to the correct ifdef section In checking fixes for of_irq_find_parent declaration location, I found that of_msi_map_rid is also wrong. of_msi_map_rid is not implemented for Sparc, so it should not be in the Sparc specific section of the header. Move it to just depend on OF_IRQ. Cc: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of_irq.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index f648acf27ed7..1e0deb8e8494 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -53,6 +53,7 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev, extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid); extern void of_msi_configure(struct device *dev, struct device_node *np); +u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); #else static inline int of_irq_count(struct device_node *dev) { @@ -90,6 +91,11 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev static inline void of_msi_configure(struct device *dev, struct device_node *np) { } +static inline u32 of_msi_map_rid(struct device *dev, + struct device_node *msi_np, u32 rid_in) +{ + return rid_in; +} #endif #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC) @@ -99,7 +105,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np) * so declare it here regardless of the CONFIG_OF_IRQ setting. */ extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); -u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); #else /* !CONFIG_OF && !CONFIG_SPARC */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, @@ -107,12 +112,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, { return 0; } - -static inline u32 of_msi_map_rid(struct device *dev, - struct device_node *msi_np, u32 rid_in) -{ - return rid_in; -} #endif /* !CONFIG_OF */ #endif /* __OF_IRQ_H */ -- cgit v1.2.3 From d7e35dfa2531b53618b9e6edcd8752ce988ac555 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 3 Dec 2015 22:04:01 -0500 Subject: bitops.h: correctly handle rol32 with 0 byte shift ROL on a 32 bit integer with a shift of 32 or more is undefined and the result is arch-dependent. Avoid this by handling the trivial case of roling by 0 correctly. The trivial solution of checking if shift is 0 breaks gcc's detection of this code as a ROL instruction, which is unacceptable. This bug was reported and fixed in GCC (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57157): The standard rotate idiom, (x << n) | (x >> (32 - n)) is recognized by gcc (for concreteness, I discuss only the case that x is an uint32_t here). However, this is portable C only for n in the range 0 < n < 32. For n == 0, we get x >> 32 which gives undefined behaviour according to the C standard (6.5.7, Bitwise shift operators). To portably support n == 0, one has to write the rotate as something like (x << n) | (x >> ((-n) & 31)) And this is apparently not recognized by gcc. Note that this is broken on older GCCs and will result in slower ROL. Acked-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2b8ed123ad36..defeaac0745f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -107,7 +107,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift) */ static inline __u32 rol32(__u32 word, unsigned int shift) { - return (word << shift) | (word >> (32 - shift)); + return (word << shift) | (word >> ((-shift) & 31)); } /** -- cgit v1.2.3 From 059393c5bdd1420bdf1bed2972f33196dff263ae Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 7 Dec 2015 10:11:11 +0000 Subject: irqchip/gic-v3: Add missing struct device_node declaration When the GICv3 header file is used in a C file that doesn't include any of the OF stuff, we end up with a bunch of ugly warnings. Let's keep GCC quiet by adding a forward declaration. Signed-off-by: Marc Zyngier Cc: Cc: Jason Cooper Link: http://lkml.kernel.org/r/1449483072-17694-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c9ae0c6ec050..d5d798b35c1f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -330,6 +330,7 @@ struct rdists { }; struct irq_domain; +struct device_node; int its_cpu_init(void); int its_init(struct device_node *node, struct rdists *rdists, struct irq_domain *domain); -- cgit v1.2.3 From ad87e03213b552a5c33d5e1e7a19a73768397010 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 10 Dec 2015 15:27:21 -0500 Subject: USB: add quirk for devices with broken LPM Some USB device / host controller combinations seem to have problems with Link Power Management. For example, Steinar found that his xHCI controller wouldn't handle bandwidth calculations correctly for two video cards simultaneously when LPM was enabled, even though the bus had plenty of bandwidth available. This patch introduces a new quirk flag for devices that should remain disabled for LPM, and creates quirk entries for Steinar's devices. Signed-off-by: Alan Stern Reported-by: Steinar H. Gunderson Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 9948c874e3f1..1d0043dc34e4 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -47,4 +47,7 @@ /* device generates spurious wakeup, ignore remote wakeup capability */ #define USB_QUIRK_IGNORE_REMOTE_WAKEUP BIT(9) +/* device can't handle Link Power Management */ +#define USB_QUIRK_NO_LPM BIT(10) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From 98e89cf02aed11166698dd53c6f14865613babb3 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 11 Dec 2015 13:40:43 -0800 Subject: mm: kmemleak: mark kmemleak_init prototype as __init The kmemleak_init() definition in mm/kmemleak.c is marked __init but its prototype in include/linux/kmemleak.h is marked __ref since commit a6186d89c913 ("kmemleak: Mark the early log buffer as __initdata"). This causes a section mismatch which is reported as a warning when building with clang -Wsection, because kmemleak_init() is declared in section .ref.text but defined in .init.text. Fix this by marking kmemleak_init() prototype __init. Signed-off-by: Nicolas Iooss Signed-off-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemleak.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index d0a1f99e24e3..4894c6888bc6 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -25,7 +25,7 @@ #ifdef CONFIG_DEBUG_KMEMLEAK -extern void kmemleak_init(void) __ref; +extern void kmemleak_init(void) __init; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, -- cgit v1.2.3 From 86fffe4a61dd972d5a4e23260d530be6da02f614 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 Dec 2015 13:40:46 -0800 Subject: kernel: remove stop_machine() Kconfig dependency Currently the full stop_machine() routine is only enabled on SMP if module unloading is enabled, or if the CPUs are hotpluggable. This leads to configurations where stop_machine() is broken as it will then only run the callback on the local CPU with irqs disabled, and not stop the other CPUs or run the callback on them. For example, this breaks MTRR setup on x86 in certain configs since ea8596bb2d8d379 ("kprobes/x86: Remove unused text_poke_smp() and text_poke_smp_batch() functions") as the MTRR is only established on the boot CPU. This patch removes the Kconfig option for STOP_MACHINE and uses the SMP and HOTPLUG_CPU config options to compile the correct stop_machine() for the architecture, removing the false dependency on MODULE_UNLOAD in the process. Link: https://lkml.org/lkml/2014/10/8/124 References: https://bugs.freedesktop.org/show_bug.cgi?id=84794 Signed-off-by: Chris Wilson Acked-by: Ingo Molnar Cc: "Paul E. McKenney" Cc: Pranith Kumar Cc: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Cc: H. Peter Anvin Cc: Tejun Heo Cc: Iulia Manda Cc: Andy Lutomirski Cc: Rusty Russell Cc: Peter Zijlstra Cc: Chuck Ebbert Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stop_machine.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 0adedca24c5b..0e1b1540597a 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -99,7 +99,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * grabbing every spinlock (and more). So the "read" side to such a * lock is anything which disables preemption. */ -#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) +#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU) /** * stop_machine: freeze the machine on all CPUs and run this function @@ -118,7 +118,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); -#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ +#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ static inline int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) @@ -137,5 +137,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, return stop_machine(fn, data, cpus); } -#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ +#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ #endif /* _LINUX_STOP_MACHINE */ -- cgit v1.2.3 From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 13 Dec 2015 22:11:16 +0100 Subject: sched/wait: Fix the signal handling fix Jan Stancek reported that I wrecked things for him by fixing things for Vladimir :/ His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which should not be possible, however my previous patch made this possible by unconditionally checking signal_pending(). We cannot use current->state as was done previously, because the instruction after the store to that variable it can be changed. We must instead pass the initial state along and use that. Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers") Reported-by: Jan Stancek Reported-by: Chris Mason Tested-by: Jan Stancek Tested-by: Vladimir Murzin Tested-by: Chris Mason Reviewed-by: Paul Turner Cc: Ingo Molnar Cc: tglx@linutronix.de Cc: Oleg Nesterov Cc: hpa@zytor.com Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- include/linux/wait.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 1e1bf9f963a9..513b36f04dfd 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -145,7 +145,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) list_del(&old->task_list); } -typedef int wait_bit_action_f(struct wait_bit_key *); +typedef int wait_bit_action_f(struct wait_bit_key *, int mode); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); @@ -960,10 +960,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); } while (0) -extern int bit_wait(struct wait_bit_key *); -extern int bit_wait_io(struct wait_bit_key *); -extern int bit_wait_timeout(struct wait_bit_key *); -extern int bit_wait_io_timeout(struct wait_bit_key *); +extern int bit_wait(struct wait_bit_key *, int); +extern int bit_wait_io(struct wait_bit_key *, int); +extern int bit_wait_timeout(struct wait_bit_key *, int); +extern int bit_wait_io_timeout(struct wait_bit_key *, int); /** * wait_on_bit - wait for a bit to be cleared -- cgit v1.2.3 From c466595c416c04036e1ba36ecdc5fe9072c76228 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Nov 2015 08:26:04 +0000 Subject: irqdomain: Make irq_domain_alloc_irqs_recursive available We are soon going to need the MSI layer to call into the domain allocators. Instead of open coding this, make the standard irq_domain_alloc_irqs_recursive function available to the MSI layer. Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d5e5c5bef28c..cf96c6a326f7 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -367,6 +367,9 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false); } +extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, + unsigned int irq_base, + unsigned int nr_irqs, void *arg); extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, -- cgit v1.2.3 From b2eba39bcab9d60a6c3b80c7fc2f3dacb77eeaae Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Nov 2015 08:26:05 +0000 Subject: genirq/msi: Make the .prepare callback reusable The .prepare callbacks are so far only called from msi_domain_alloc_irqs. In order to reuse that code, split that code and create a msi_domain_prepare_irqs function that the existing code can call into. Signed-off-by: Marc Zyngier --- include/linux/msi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index f71a25e5fd25..1c0bb2c0b211 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -279,6 +279,10 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, irq_write_msi_msg_t write_msi_msg); void platform_msi_domain_free_irqs(struct device *dev); + +/* When an MSI domain is used as an intermediate domain */ +int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *args); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN -- cgit v1.2.3 From 2145ac9310b60c1c11294b7bea10fe154009be1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Nov 2015 08:26:06 +0000 Subject: genirq/msi: Add msi_domain_populate_irqs To be able to allocate interrupts from the MSI layer down, add a new msi_domain_populate_irqs entry point. Signed-off-by: Marc Zyngier --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 1c0bb2c0b211..cee102b1916d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -283,6 +283,8 @@ void platform_msi_domain_free_irqs(struct device *dev); /* When an MSI domain is used as an intermediate domain */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *args); +int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, + int virq, int nvec, msi_alloc_info_t *args); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN -- cgit v1.2.3 From 552c494a7666c7fe490f179db1f52239a41fe734 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 23 Nov 2015 08:26:07 +0000 Subject: platform-msi: Allow creation of a MSI-based stacked irq domain We almost have all the needed bits requiredable to create a irq domain on top of a MSI domain. For this, we enable a few things: - the virq is stored in the msi_desc - device, msi_alloc_info and domain-specific data are stored in the platform_priv_data structure - we introduce a new API for platform-msi: /* Create a MSI-based domain */ struct irq_domain * platform_msi_create_device_domain(struct device *dev, unsigned int nvec, irq_write_msi_msg_t write_msi_msg, const struct irq_domain_ops *ops, void *host_data); /* Allocate MSIs in an MSI domain */ int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); /* Free MSIs from an MSI domain */ void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); /* Obtain the host data passed to platform_msi_create_device_domain */ void *platform_msi_get_host_data(struct irq_domain *domain); platform_msi_create_device_domain() is a hybrid of irqdomain creation and interrupt allocation, creating a domain backed by the MSIs associated to a device. IRQs can then be allocated in that domain using platform_msi_domain_alloc(). This now allows a wired irq to MSI bridge to be created. Signed-off-by: Marc Zyngier --- include/linux/msi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index cee102b1916d..1c6342ab8c0e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -174,6 +174,7 @@ struct msi_controller { #include struct irq_domain; +struct irq_domain_ops; struct irq_chip; struct device_node; struct fwnode_handle; @@ -285,6 +286,17 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *args); int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, int virq, int nvec, msi_alloc_info_t *args); +struct irq_domain * +platform_msi_create_device_domain(struct device *dev, + unsigned int nvec, + irq_write_msi_msg_t write_msi_msg, + const struct irq_domain_ops *ops, + void *host_data); +int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nvec); +void *platform_msi_get_host_data(struct irq_domain *domain); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN -- cgit v1.2.3