From cd33dc9ac2977ebe30cecbf39d2992190fbac5b4 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 8 Sep 2015 14:51:12 +0100 Subject: thermal: Fix thermal_zone_of_sensor_register to match documentation thermal_zone_of_sensor_register is documented as returning a pointer to either a valid thermal_zone_device on success, or a corresponding ERR_PTR() value. In contrast, the function returns NULL when THERMAL_OF is configured off. Fix this. Signed-off-by: Punit Agrawal Acked-by: Guenter Roeck Cc: Eduardo Valentin Cc: Zhang Rui Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 17292fee8686..0c5518e13584 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -360,7 +360,7 @@ static inline struct thermal_zone_device * thermal_zone_of_sensor_register(struct device *dev, int id, void *data, const struct thermal_zone_of_device_ops *ops) { - return NULL; + return ERR_PTR(-ENODEV); } static inline -- cgit v1.2.3 From c973c3bcec3752455c4d7545edd42935cd7942d9 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:50 +0100 Subject: thermal: Add a function to get the minimum power The thermal core already has a function to get the maximum power of a cooling device: power_actor_get_max_power(). Add a function to get the minimum power of a cooling device. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 0c5518e13584..157d366e761b 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -380,6 +380,8 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) int power_actor_get_max_power(struct thermal_cooling_device *, struct thermal_zone_device *tz, u32 *max_power); +int power_actor_get_min_power(struct thermal_cooling_device *, + struct thermal_zone_device *tz, u32 *min_power); int power_actor_set_power(struct thermal_cooling_device *, struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, @@ -415,6 +417,10 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 *max_power) { return 0; } +static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + u32 *min_power) +{ return -ENODEV; } static inline int power_actor_set_power(struct thermal_cooling_device *cdev, struct thermal_instance *tz, u32 power) { return 0; } -- cgit v1.2.3 From 63cdbc06b357dcb3a7104a421ee4a4550d7fadfd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Sep 2015 17:06:27 +0200 Subject: netfilter: bridge: fix routing of bridge frames with call-iptables=1 We can't re-use the physoutdev storage area. 1. When using NFQUEUE in PREROUTING, we attempt to bump a bogus refcnt since nf_bridge->physoutdev is garbage (ipv4/ipv6 address) 2. for same reason, we crash in physdev match in FORWARD or later if skb is routed instead of bridged. This increases nf_bridge_info to 40 bytes, but we have no other choice. Fixes: 72b1e5e4cac7 ("netfilter: bridge: reduce nf_bridge_info to 32 bytes again") Reported-by: Sander Eikelenboom Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2738d355cdf9..9987af080fa0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -179,6 +179,9 @@ struct nf_bridge_info { u8 bridged_dnat:1; __u16 frag_max_size; struct net_device *physindev; + + /* always valid & non-NULL from FORWARD on, for physdev match */ + struct net_device *physoutdev; union { /* prerouting: detect dnat in orig/reply direction */ __be32 ipv4_daddr; @@ -189,9 +192,6 @@ struct nf_bridge_info { * skb is out in neigh layer. */ char neigh_header[8]; - - /* always valid & non-NULL from FORWARD on, for physdev match */ - struct net_device *physoutdev; }; }; #endif -- cgit v1.2.3 From f230d1e891ba1da5953460516960894154f265db Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:06 -0700 Subject: ipv6: Rename the dst_cache helper functions in ip6_tunnel It is a prep work to fix the dst_entry refcnt bugs in ip6_tunnel. This patch rename: 1. ip6_tnl_dst_check() to ip6_tnl_dst_get() to better reflect that it will take a dst refcnt in the next patch. 2. ip6_tnl_dst_store() to ip6_tnl_dst_set() to have a more conventional name matching with ip6_tnl_dst_get(). Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index b8529aa1dae7..979b081a47e8 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -60,9 +60,9 @@ struct ipv6_tlv_tnl_enc_lim { __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t); +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); void ip6_tnl_dst_reset(struct ip6_tnl *t); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst); +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, -- cgit v1.2.3 From cdf3464e6c6bd764277cbbe992cd12da735b92fb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:07 -0700 Subject: ipv6: Fix dst_entry refcnt bugs in ip6_tunnel Problems in the current dst_entry cache in the ip6_tunnel: 1. ip6_tnl_dst_set is racy. There is no lock to protect it: - One major problem is that the dst refcnt gets messed up. F.e. the same dst_cache can be released multiple times and then triggering the infamous dst refcnt < 0 warning message. - Another issue is the inconsistency between dst_cache and dst_cookie. It can be reproduced by adding and removing the ip6gre tunnel while running a super_netperf TCP_CRR test. 2. ip6_tnl_dst_get does not take the dst refcnt before returning the dst. This patch: 1. Create a percpu dst_entry cache in ip6_tnl 2. Use a spinlock to protect the dst_cache operations 3. ip6_tnl_dst_get always takes the dst refcnt before returning Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 979b081a47e8..60b4f402f78c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -32,6 +32,12 @@ struct __ip6_tnl_parm { __be32 o_key; }; +struct ip6_tnl_dst { + spinlock_t lock; + struct dst_entry *dst; + u32 cookie; +}; + /* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ @@ -39,8 +45,7 @@ struct ip6_tnl { struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ - struct dst_entry *dst_cache; /* cached dst */ - u32 dst_cookie; + struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */ int err_count; unsigned long err_time; @@ -61,6 +66,8 @@ struct ipv6_tlv_tnl_enc_lim { } __packed; struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); +int ip6_tnl_dst_init(struct ip6_tnl *t); +void ip6_tnl_dst_destroy(struct ip6_tnl *t); void ip6_tnl_dst_reset(struct ip6_tnl *t); void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, -- cgit v1.2.3 From 70da5b5c532f0ec8aa76b4f46158da5f010f34b3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2015 14:30:09 -0700 Subject: ipv6: Replace spinlock with seqlock and rcu in ip6_tunnel This patch uses a seqlock to ensure consistency between idst->dst and idst->cookie. It also makes dst freeing from fib tree to undergo a rcu grace period. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 60b4f402f78c..65c2a9397b3c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -33,8 +33,8 @@ struct __ip6_tnl_parm { }; struct ip6_tnl_dst { - spinlock_t lock; - struct dst_entry *dst; + seqlock_t lock; + struct dst_entry __rcu *dst; u32 cookie; }; -- cgit v1.2.3 From 0c986253b939cc14c69d4adbe2b4121bdf4aa220 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2015 11:51:12 -0400 Subject: Revert "sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem" This reverts commit d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14. d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify threadgroup locking") changed how cgroup synchronizes against task fork and exits so that it uses global percpu_rwsem instead of per-process rwsem; unfortunately, the write [un]lock paths of percpu_rwsem always involve synchronize_rcu_expedited() which turned out to be too expensive. Improvements for percpu_rwsem are scheduled to be merged in the coming v4.4-rc1 merge window which alleviates this issue. For now, revert the two commits to restore per-process rwsem. They will be re-applied for the v4.4-rc1 merge window. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com Reported-by: Christian Borntraeger Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Paolo Bonzini Cc: stable@vger.kernel.org # v4.2+ --- include/linux/cgroup-defs.h | 27 ++------------------------- include/linux/init_task.h | 8 ++++++++ include/linux/sched.h | 12 ++++++++++++ 3 files changed, 22 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4d8fcf2187dc..8492721b39be 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -473,31 +473,8 @@ struct cgroup_subsys { unsigned int depends_on; }; -extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; - -/** - * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups - * @tsk: target task - * - * Called from threadgroup_change_begin() and allows cgroup operations to - * synchronize against threadgroup changes using a percpu_rw_semaphore. - */ -static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) -{ - percpu_down_read(&cgroup_threadgroup_rwsem); -} - -/** - * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups - * @tsk: target task - * - * Called from threadgroup_change_end(). Counterpart of - * cgroup_threadcgroup_change_begin(). - */ -static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) -{ - percpu_up_read(&cgroup_threadgroup_rwsem); -} +void cgroup_threadgroup_change_begin(struct task_struct *tsk); +void cgroup_threadgroup_change_end(struct task_struct *tsk); #else /* CONFIG_CGROUPS */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d0b380ee7d67..e38681f4912d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,6 +25,13 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; +#ifdef CONFIG_CGROUPS +#define INIT_GROUP_RWSEM(sig) \ + .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), +#else +#define INIT_GROUP_RWSEM(sig) +#endif + #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -57,6 +64,7 @@ extern struct fs_struct init_fs; INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ + INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9daa387c..b7b9501b41af 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -762,6 +762,18 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + /* + * group_rwsem prevents new tasks from entering the threadgroup and + * member tasks from exiting,a more specifically, setting of + * PF_EXITING. fork and exit paths are protected with this rwsem + * using threadgroup_change_begin/end(). Users which require + * threadgroup to remain stable should use threadgroup_[un]lock() + * which also takes care of exec path. Currently, cgroup is the + * only user. + */ + struct rw_semaphore group_rwsem; +#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ -- cgit v1.2.3 From 0243ed44ad4a25dbd2e92ad97e5e12a1a6c72d6c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 15 Sep 2015 04:59:21 -0700 Subject: spi: fix kernel-doc warnings in spi.h Fix the following 'make htmldocs' warnings: .//include/linux/spi/spi.h:71: warning: No description found for parameter 'lock' .//include/linux/spi/spi.h:71: warning: Excess struct/union/enum/typedef member 'clock' description in 'spi_statistics' Signed-off-by: Geliang Tang Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 269e8afd3e2a..6b00f18f5e6b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -34,7 +34,7 @@ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @clock: lock protecting this structure + * @lock: lock protecting this structure * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled -- cgit v1.2.3 From 37a1d3611c126fd9782ce5235791f898f053e763 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sun, 13 Sep 2015 10:18:33 -0700 Subject: ipv6: include NLM_F_REPLACE in route replace notifications This patch adds NLM_F_REPLACE flag to ipv6 route replace notifications. This makes nlm_flags in ipv6 replace notifications consistent with ipv4. Signed-off-by: Roopa Prabhu Acked-by: Nicolas Dichtel Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 063d30474cf6..aaf9700fc9e5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -275,7 +275,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc); int fib6_del(struct rt6_info *rt, struct nl_info *info); -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, + unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); -- cgit v1.2.3 From 0fdea1e8a2853f79d39b8555cc9de16a7e0ab26f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Sep 2015 23:43:17 -0400 Subject: SUNRPC: Ensure that we wait for connections to complete before retrying Commit 718ba5b87343, moved the responsibility for unlocking the socket to xs_tcp_setup_socket, meaning that the socket will be unlocked before we know that it has finished trying to connect. The following patch is based on an initial patch by Russell King to ensure that we delay clearing the XPRT_CONNECTING flag until we either know that we failed to initiate a connection attempt, or the connection attempt itself failed. Fixes: 718ba5b87343 ("SUNRPC: Add helpers to prevent socket create from racing") Reported-by: Russell King Reported-by: Russell King Tested-by: Russell King Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7591788e9fbf..357e44c1a46b 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,7 @@ struct sock_xprt { /* * Connection of transports */ + unsigned long sock_state; struct delayed_work connect_worker; struct sockaddr_storage srcaddr; unsigned short srcport; @@ -76,6 +77,8 @@ struct sock_xprt { */ #define TCP_RPC_REPLY (1UL << 6) +#define XPRT_SOCK_CONNECTING 1U + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -- cgit v1.2.3 From 58189ca7b27411c3dc9a5cb9eeee0906da684c59 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 15 Sep 2015 15:10:50 -0700 Subject: net: Fix vti use case with oif in dst lookups Steffen reported that the recent change to add oif to dst lookups breaks the VTI use case. The problem is that with the oif set in the flow struct the comparison to the nh_oif is triggered. Fix by splitting the FLOWI_FLAG_VRFSRC into 2 flags -- one that triggers the vrf device cache bypass (FLOWI_FLAG_VRFSRC) and another telling the lookup to not compare nh oif (FLOWI_FLAG_SKIP_NH_OIF). Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups") Signed-off-by: David Ahern Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/flow.h | 1 + include/net/route.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index acd6a096250e..9b85db85f13c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -35,6 +35,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_VRFSRC 0x04 +#define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; diff --git a/include/net/route.h b/include/net/route.h index cc61cb95f059..f46af256880c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -255,7 +255,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; if (netif_index_is_vrf(sock_net(sk), oif)) - flow_flags |= FLOWI_FLAG_VRFSRC; + flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); -- cgit v1.2.3 From b7f76ea2ef6739ee484a165ffbac98deb855d3d3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 18 Sep 2015 23:41:23 +0200 Subject: security: fix typo in security_task_prctl Signed-off-by: Jann Horn Reviewed-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 79d85ddf8093..2f4c1f7aa7db 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -946,7 +946,7 @@ static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg4, unsigned long arg5) { - return cap_task_prctl(option, arg2, arg3, arg3, arg5); + return cap_task_prctl(option, arg2, arg3, arg4, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) -- cgit v1.2.3 From 0315e382704817b279e5693dca8ab9d89aa20b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Forr=C3=B3?= Date: Thu, 17 Sep 2015 16:01:32 +0200 Subject: net: Fix behaviour of unreachable, blackhole and prohibit routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Man page of ip-route(8) says following about route types: unreachable - these destinations are unreachable. Packets are dis‐ carded and the ICMP message host unreachable is generated. The local senders get an EHOSTUNREACH error. blackhole - these destinations are unreachable. Packets are dis‐ carded silently. The local senders get an EINVAL error. prohibit - these destinations are unreachable. Packets are discarded and the ICMP message communication administratively prohibited is generated. The local senders get an EACCES error. In the inet6 address family, this was correct, except the local senders got ENETUNREACH error instead of EHOSTUNREACH in case of unreachable route. In the inet address family, all three route types generated ICMP message net unreachable, and the local senders got ENETUNREACH error. In both address families all three route types now behave consistently with documentation. Signed-off-by: Nikola Forró Signed-off-by: David S. Miller --- include/net/ip_fib.h | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a37d0432bebd..727d6e9a9685 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -236,8 +236,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) - err = 0; + if (tb) + err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); + + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); @@ -258,7 +261,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; - int err; + int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) @@ -268,15 +271,20 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, res->tclassid = 0; - for (err = 0; !err; err = -ENETUNREACH) { - tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; + tb = rcu_dereference_rtnl(net->ipv4.fib_main); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); + + if (!err) + goto out; + + tb = rcu_dereference_rtnl(net->ipv4.fib_default); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); - tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; - } +out: + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); -- cgit v1.2.3 From 83cf9a2521b0934a5f9d04082c9bb4f554fddcd4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 18 Sep 2015 11:47:41 +0200 Subject: ip6tunnel: make rx/tx bytes counters consistent Like the previous patch, which fixes ipv4 tunnels, here is the ipv6 part. Before the patch, the external ipv6 header + gre header were included on tx. After the patch: $ ping -c1 192.168.6.121 ; ip -s l ls dev ip6gre1 PING 192.168.6.121 (192.168.6.121) 56(84) bytes of data. 64 bytes from 192.168.6.121: icmp_req=1 ttl=64 time=1.92 ms --- 192.168.6.121 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 1.923/1.923/1.923/0.000 ms 7: ip6gre1@NONE: mtu 1440 qdisc noqueue state UNKNOWN mode DEFAULT group default link/gre6 20:01:06:60:30:08:c1:c3:00:00:00:00:00:00:01:23 peer 20:01:06:60:30:08:c1:c3:00:00:00:00:00:00:01:21 RX: bytes packets errors dropped overrun mcast 84 1 0 0 0 0 TX: bytes packets errors dropped carrier collsns 84 1 0 0 0 0 $ ping -c1 192.168.1.121 ; ip -s l ls dev ip6tnl1 PING 192.168.1.121 (192.168.1.121) 56(84) bytes of data. 64 bytes from 192.168.1.121: icmp_req=1 ttl=64 time=2.28 ms --- 192.168.1.121 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 2.288/2.288/2.288/0.000 ms 8: ip6tnl1@NONE: mtu 1452 qdisc noqueue state UNKNOWN mode DEFAULT group default link/tunnel6 2001:660:3008:c1c3::123 peer 2001:660:3008:c1c3::121 RX: bytes packets errors dropped overrun mcast 84 1 0 0 0 0 TX: bytes packets errors dropped carrier collsns 84 1 0 0 0 0 Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 65c2a9397b3c..fa915fa0f703 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -86,7 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device_stats *stats = &dev->stats; int pkt_len, err; - pkt_len = skb->len; + pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { -- cgit v1.2.3 From ed2e923945892a8372ab70d2f61d364b0b6d9054 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Sep 2015 09:08:34 -0700 Subject: tcp/dccp: fix timewait races in timer handling When creating a timewait socket, we need to arm the timer before allowing other cpus to find it. The signal allowing cpus to find the socket is setting tw_refcnt to non zero value. As we set tw_refcnt in __inet_twsk_hashdance(), we therefore need to call inet_twsk_schedule() first. This also means we need to remove tw_refcnt changes from inet_twsk_schedule() and let the caller handle it. Note that because we use mod_timer_pinned(), we have the guarantee the timer wont expire before we set tw_refcnt as we run in BH context. To make things more readable I introduced inet_twsk_reschedule() helper. When rearming the timer, we can use mod_timer_pending() to make sure we do not rearm a canceled timer. Note: This bug can possibly trigger if packets of a flow can hit multiple cpus. This does not normally happen, unless flow steering is broken somehow. This explains this bug was spotted ~5 months after its introduction. A similar fix is needed for SYN_RECV sockets in reqsk_queue_hash_req(), but will be provided in a separate patch for proper tracking. Fixes: 789f558cfb36 ("tcp/dccp: get rid of central timewait timer") Signed-off-by: Eric Dumazet Reported-by: Ying Cai Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 879d6e5a973b..186f3a1e1b1f 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,7 +110,19 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, + bool rearm); + +static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + +static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, true); +} + void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, -- cgit v1.2.3 From ac5be6b47e8bd25b62bed2c82cda7398999f59e9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:58:49 -0700 Subject: userfaultfd: revert "userfaultfd: waitqueue: add nr wake parameter to __wake_up_locked_key" This reverts commit 51360155eccb907ff8635bd10fc7de876408c2e0 and adapts fs/userfaultfd.c to use the old version of that function. It didn't look robust to call __wake_up_common with "nr == 1" when we absolutely require wakeall semantics, but we've full control of what we insert in the two waitqueue heads of the blocked userfaults. No exclusive waitqueue risks to be inserted into those two waitqueue heads so we can as well stick to "nr == 1" of the old code and we can rely purely on the fact no waitqueue inserted in one of the two waitqueue heads we must enforce as wakeall, has wait->flags WQ_FLAG_EXCLUSIVE set. Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index d3d077228d4c..1e1bf9f963a9 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -147,8 +147,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) typedef int wait_bit_action_f(struct wait_bit_key *); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, - void *key); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); @@ -180,7 +179,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, (void *) (m)) #define wake_up_locked_poll(x, m) \ - __wake_up_locked_key((x), TASK_NORMAL, 1, (void *) (m)) + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) #define wake_up_interruptible_poll(x, m) \ __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) #define wake_up_interruptible_sync_poll(x, m) \ -- cgit v1.2.3 From 09f7298100ea9767324298ab0c7979f6d7463183 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 22 Sep 2015 14:59:09 -0700 Subject: userfaultfd: register uapi generic syscall (aarch64) Add the userfaultfd syscalls to uapi asm-generic, it was tested with postcopy live migration on aarch64 with both 4k and 64k pagesize kernels. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Andrea Arcangeli Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/unistd.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 8da542a2874d..ee124009e12a 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -709,17 +709,19 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 281 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) -#define __NR_membarrier 282 +#define __NR_userfaultfd 282 +__SYSCALL(__NR_userfaultfd, sys_userfaultfd) +#define __NR_membarrier 283 __SYSCALL(__NR_membarrier, sys_membarrier) #undef __NR_syscalls -#define __NR_syscalls 283 +#define __NR_syscalls 284 /* * All syscalls below here should go away really, * these are provided for both review and as a porting * help for the C library version. -* + * * Last chance: are any of these important enough to * enable by default? */ -- cgit v1.2.3 From 2d8bff12699abc3a9bf886bb0b79f44d94d81496 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 23 Sep 2015 14:57:58 -0400 Subject: netpoll: Close race condition between poll_one_napi and napi_disable Drivers might call napi_disable while not holding the napi instance poll_lock. In those instances, its possible for a race condition to exist between poll_one_napi and napi_disable. That is to say, poll_one_napi only tests the NAPI_STATE_SCHED bit to see if there is work to do during a poll, and as such the following may happen: CPU0 CPU1 ndo_tx_timeout napi_poll_dev napi_disable poll_one_napi test_and_set_bit (ret 0) test_bit (ret 1) reset adapter napi_poll_routine If the adapter gets a tx timeout without a napi instance scheduled, its possible for the adapter to think it has exclusive access to the hardware (as the napi instance is now scheduled via the napi_disable call), while the netpoll code thinks there is simply work to do. The result is parallel hardware access leading to corrupt data structures in the driver, and a crash. Additionaly, there is another, more critical race between netpoll and napi_disable. The disabled napi state is actually identical to the scheduled state for a given napi instance. The implication being that, if a napi instance is disabled, a netconsole instance would see the napi state of the device as having been scheduled, and poll it, likely while the driver was dong something requiring exclusive access. In the case above, its fairly clear that not having the rings in a state ready to be polled will cause any number of crashes. The fix should be pretty easy. netpoll uses its own bit to indicate that that the napi instance is in a state of being serviced by netpoll (NAPI_STATE_NPSVC). We can just gate disabling on that bit as well as the sched bit. That should prevent netpoll from conducting a napi poll if we convert its set bit to a test_and_set_bit operation to provide mutual exclusion Change notes: V2) Remove a trailing whtiespace Resubmit with proper subject prefix V3) Clean up spacing nits Signed-off-by: Neil Horman CC: "David S. Miller" CC: jmaxwell@redhat.com Tested-by: jmaxwell@redhat.com Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 88a00694eda5..2d15e3831440 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -507,6 +507,7 @@ static inline void napi_enable(struct napi_struct *n) BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); + clear_bit(NAPI_STATE_NPSVC, &n->state); } #ifdef CONFIG_SMP -- cgit v1.2.3 From 9badce000e2ce68ba74838a3cd356dde58221c2f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Sep 2015 17:07:29 -0400 Subject: cgroup, writeback: don't enable cgroup writeback on traditional hierarchies inode_cgwb_enabled() gates cgroup writeback support. If it returns true, each inode is attached to the corresponding memory domain which gets mapped to io domain. It currently only tests whether the filesystem and bdi support cgroup writeback; however, cgroup writeback support doesn't work on traditional hierarchies and thus it should also test whether memcg and iocg are on the default hierarchy. This caused traditional hierarchy setups to hit the cgroup writeback path inadvertently and ended up creating separate writeback domains for each memcg and mapping them all to the root iocg uncovering a couple issues in the cgroup writeback path. cgroup writeback was never meant to be enabled on traditional hierarchies. Make inode_cgwb_enabled() test whether both memcg and iocg are on the default hierarchy. Signed-off-by: Tejun Heo Reported-by: Artem Bityutskiy Reported-by: Dexuan Cui Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com Link: http://lkml.kernel.org/g/f30d4a6aa8a546ff88f73021d026a453@SIXPR30MB031.064d.mgd.msft.net --- include/linux/backing-dev.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5a5d79ee256f..d5eb4ad1c534 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -252,13 +253,19 @@ int inode_congested(struct inode *inode, int cong_bits); * @inode: inode of interest * * cgroup writeback requires support from both the bdi and filesystem. - * Test whether @inode has both. + * Also, both memcg and iocg have to be on the default hierarchy. Test + * whether all conditions are met. + * + * Note that the test result may change dynamically on the same inode + * depending on how memcg and iocg are configured. */ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return bdi_cap_account_dirty(bdi) && + return cgroup_on_dfl(mem_cgroup_root_css->cgroup) && + cgroup_on_dfl(blkcg_root_css->cgroup) && + bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } -- cgit v1.2.3 From 6ae459bdaaeebc632b16e54dcbabb490c6931d61 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 22 Sep 2015 12:57:53 -0700 Subject: skbuff: Fix skb checksum flag on skb pull VXLAN device can receive skb with checksum partial. But the checksum offset could be in outer header which is pulled on receive. This results in negative checksum offset for the skb. Such skb can cause the assert failure in skb_checksum_help(). Following patch fixes the bug by setting checksum-none while pulling outer header. Following is the kernel panic msg from old kernel hitting the bug. ------------[ cut here ]------------ kernel BUG at net/core/dev.c:1906! RIP: 0010:[] skb_checksum_help+0x144/0x150 Call Trace: [] queue_userspace_packet+0x408/0x470 [openvswitch] [] ovs_dp_upcall+0x5d/0x60 [openvswitch] [] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch] [] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch] [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] vxlan_rcv+0x53/0x60 [openvswitch] [] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch] [] udp_queue_rcv_skb+0x2dc/0x3b0 [] __udp4_lib_rcv+0x1cf/0x6c0 [] udp_rcv+0x1a/0x20 [] ip_local_deliver_finish+0xdd/0x280 [] ip_local_deliver+0x88/0x90 [] ip_rcv_finish+0x10d/0x370 [] ip_rcv+0x235/0x300 [] __netif_receive_skb+0x55d/0x620 [] netif_receive_skb+0x80/0x90 [] virtnet_poll+0x555/0x6f0 [] net_rx_action+0x134/0x290 [] __do_softirq+0xa8/0x210 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x8e/0xb0 [] do_IRQ+0x63/0xe0 [] common_interrupt+0x6e/0x6e Reported-by: Anupam Chanda Signed-off-by: Pravin B Shelar Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9987af080fa0..2b0a30a6e31c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2707,6 +2707,9 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) <= len) + skb->ip_summed = CHECKSUM_NONE; } unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); -- cgit v1.2.3 From 63d008a4e9ee86614ca5671b7f3ba447df007190 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 22 Sep 2015 18:12:11 +0200 Subject: ipv4: send arp replies to the correct tunnel When using ip lwtunnels, the additional data for xmit (basically, the actual tunnel to use) are carried in ip_tunnel_info either in dst->lwtstate or in metadata dst. When replying to ARP requests, we need to send the reply to the same tunnel the request came from. This means we need to construct proper metadata dst for ARP replies. We could perform another route lookup to get a dst entry with the correct lwtstate. However, this won't always ensure that the outgoing tunnel is the same as the incoming one, and it won't work anyway for IPv4 duplicate address detection. The only thing to do is to "reverse" the ip_tunnel_info. Signed-off-by: Jiri Benc Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a3ba888e8..f6dafec9102c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -276,6 +276,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); +struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, + gfp_t flags); struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, int gso_type_mask); -- cgit v1.2.3 From b194f30c61efb0767a98f47a64530baa8b731670 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 22 Sep 2015 18:12:12 +0200 Subject: lwtunnel: remove source and destination UDP port config option The UDP tunnel config is asymmetric wrt. to the ports used. The source and destination ports from one direction of the tunnel are not related to the ports of the other direction. We need to be able to respond to ARP requests using the correct ports without involving routing. As the consequence, UDP ports need to be fixed property of the tunnel interface and cannot be set per route. Remove the ability to set ports per route. This is still okay to do, as no kernel has been released with these attributes yet. Note that the ability to specify source and destination ports is preserved for other users of the lwtunnel API which don't use routes for tunnel key specification (like openvswitch). If in the future we rework ARP handling to allow port specification, the attributes can be added back. Signed-off-by: Jiri Benc Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 34141a5dfe74..f8b01887a495 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -21,8 +21,6 @@ enum lwtunnel_ip_t { LWTUNNEL_IP_SRC, LWTUNNEL_IP_TTL, LWTUNNEL_IP_TOS, - LWTUNNEL_IP_SPORT, - LWTUNNEL_IP_DPORT, LWTUNNEL_IP_FLAGS, __LWTUNNEL_IP_MAX, }; @@ -36,8 +34,6 @@ enum lwtunnel_ip6_t { LWTUNNEL_IP6_SRC, LWTUNNEL_IP6_HOPLIMIT, LWTUNNEL_IP6_TC, - LWTUNNEL_IP6_SPORT, - LWTUNNEL_IP6_DPORT, LWTUNNEL_IP6_FLAGS, __LWTUNNEL_IP6_MAX, }; -- cgit v1.2.3 From 3e3aaf649416988ca8be4ad2c52dc24d8be7b46e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:02 +0100 Subject: phy: fix mdiobus module safety Re-implement the mdiobus module refcounting to ensure that we actually ensure that the mdiobus module code does not go away while we might call into it. The old scheme using bus->dev.driver was buggy, because bus->dev is a class device which never has a struct device_driver associated with it, and hence the associated code trying to obtain a refcount did nothing useful. Instead, take the approach that other subsystems do: pass the module when calling mdiobus_register(), and record that in the mii_bus struct. When we need to increment the module use count in the phy code, use this stored pointer. When the phy is deteched, drop the module refcount, remembering that the phy device might go away at that point. This doesn't stop the mii_bus going away while there are in-use phys - it merely stops the underlying code vanishing. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 962387a192f1..11bce44f6d65 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -153,6 +154,7 @@ struct sk_buff; * PHYs should register using this structure */ struct mii_bus { + struct module *owner; const char *name; char id[MII_BUS_ID_SIZE]; void *priv; @@ -198,7 +200,8 @@ static inline struct mii_bus *mdiobus_alloc(void) return mdiobus_alloc_size(0); } -int mdiobus_register(struct mii_bus *bus); +int __mdiobus_register(struct mii_bus *bus, struct module *owner); +#define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE) void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); -- cgit v1.2.3 From 38737e490d4ea91660d3cec83ef88c4e6d360ae4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:28 +0100 Subject: phy: add phy_device_remove() Add a phy_device_remove() function to complement phy_device_register(), which undoes the effects of phy_device_register() by removing the phy device from visibility, but not freeing it. This allows these details to be moved out of the mdio bus code into the phy code where this action belongs. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 11bce44f6d65..4a4e3a092337 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -745,6 +745,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, struct phy_c45_device_ids *c45_ids); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); +void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); -- cgit v1.2.3 From eeeb9522231118138be418ff527dc8c9050f4707 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 15 Sep 2015 17:27:35 -0700 Subject: target: Propigate backend read-only to core_tpg_add_lun This patch adds a DF_READ_ONLY flag that is used by IBLOCK to signal when a backend has been set to read-only mode, in order to propigate read-only status up to core_tpg_add_lun() for all future LUN fabric exports. With this is place, existing emulation for reporting read-only in spc_emulate_modesense() and normal transport_lookup_cmd_lun() TCM_WRITE_PROTECTED status checking just works as expected. Reported-by: Joeue Deng Reported-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index ac9bf1c0e42d..5f48754dc36a 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -730,6 +730,7 @@ struct se_device { #define DF_EMULATED_VPD_UNIT_SERIAL 0x00000004 #define DF_USING_UDEV_PATH 0x00000008 #define DF_USING_ALIAS 0x00000010 +#define DF_READ_ONLY 0x00000020 /* Physical device queue depth */ u32 queue_depth; /* Used for SPC-2 reservations enforce of ISIDs */ -- cgit v1.2.3