From a57de0b4336e48db2811a2030bb68dba8dd09d88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Jul 2009 12:09:13 +0000 Subject: net: adding memory barrier to the poll and receive callbacks Adding memory barrier after the poll_wait function, paired with receive callbacks. Adding fuctions sock_poll_wait and sk_has_sleeper to wrap the memory barrier. Without the memory barrier, following race can happen. The race fires, when following code paths meet, and the tp->rcv_nxt and __add_wait_queue updates stay in CPU caches. CPU1 CPU2 sys_select receive packet ... ... __add_wait_queue update tp->rcv_nxt ... ... tp->rcv_nxt check sock_def_readable ... { schedule ... if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep) ... } If there was no cache the code would work ok, since the wait_queue and rcv_nxt are opposit to each other. Meaning that once tp->rcv_nxt is updated by CPU2, the CPU1 either already passed the tp->rcv_nxt check and sleeps, or will get the new value for tp->rcv_nxt and will return with new data mask. In both cases the process (CPU1) is being added to the wait queue, so the waitqueue_active (CPU2) call cannot miss and will wake up CPU1. The bad case is when the __add_wait_queue changes done by CPU1 stay in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 will then endup calling schedule and sleep forever if there are no more data on the socket. Calls to poll_wait in following modules were ommited: net/bluetooth/af_bluetooth.c net/irda/af_irda.c net/irda/irnet/irnet_ppp.c net/mac80211/rc80211_pid_debugfs.c net/phonet/socket.c net/rds/af_rds.c net/rfkill/core.c net/sunrpc/cache.c net/sunrpc/rpc_pipe.c net/tipc/socket.c Signed-off-by: Jiri Olsa Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 352f06bbd7a9..4eb8409249f6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -54,6 +54,7 @@ #include #include +#include #include #include @@ -1241,6 +1242,71 @@ static inline int sk_has_allocations(const struct sock *sk) return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); } +/** + * sk_has_sleeper - check if there are any waiting processes + * @sk: socket + * + * Returns true if socket has waiting processes + * + * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * barrier call. They were added due to the race found within the tcp code. + * + * Consider following tcp code paths: + * + * CPU1 CPU2 + * + * sys_select receive packet + * ... ... + * __add_wait_queue update tp->rcv_nxt + * ... ... + * tp->rcv_nxt check sock_def_readable + * ... { + * schedule ... + * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + * wake_up_interruptible(sk->sk_sleep) + * ... + * } + * + * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay + * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 + * could then endup calling schedule and sleep forever if there are no more + * data on the socket. + */ +static inline int sk_has_sleeper(struct sock *sk) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier is paired in the sock_poll_wait. + */ + smp_mb(); + return sk->sk_sleep && waitqueue_active(sk->sk_sleep); +} + +/** + * sock_poll_wait - place memory barrier behind the poll_wait call. + * @filp: file + * @wait_address: socket wait queue + * @p: poll_table + * + * See the comments in the sk_has_sleeper function. + */ +static inline void sock_poll_wait(struct file *filp, + wait_queue_head_t *wait_address, poll_table *p) +{ + if (p && wait_address) { + poll_wait(filp, wait_address, p); + /* + * We need to be sure we are in sync with the + * socket flags modification. + * + * This memory barrier is paired in the sk_has_sleeper. + */ + smp_mb(); + } +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in -- cgit v1.2.3 From ad46276952f1af34cd91d46d49ba13d347d56367 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Jul 2009 12:10:31 +0000 Subject: memory barrier: adding smp_mb__after_lock Adding smp_mb__after_lock define to be used as a smp_mb call after a lock. Making it nop for x86, since {read|write|spin}_lock() on x86 are full memory barriers. Signed-off-by: Jiri Olsa Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 4eb8409249f6..2c0da9239b95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1271,6 +1271,9 @@ static inline int sk_has_allocations(const struct sock *sk) * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 * could then endup calling schedule and sleep forever if there are no more * data on the socket. + * + * The sk_has_sleeper is always called right after a call to read_lock, so we + * can use smp_mb__after_lock barrier. */ static inline int sk_has_sleeper(struct sock *sk) { @@ -1280,7 +1283,7 @@ static inline int sk_has_sleeper(struct sock *sk) * * This memory barrier is paired in the sock_poll_wait. */ - smp_mb(); + smp_mb__after_lock(); return sk->sk_sleep && waitqueue_active(sk->sk_sleep); } -- cgit v1.2.3 From 4dc6dc7162c08b9965163c9ab3f9375d4adff2c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jul 2009 23:13:10 +0000 Subject: net: sock_copy() fixes Commit e912b1142be8f1e2c71c71001dc992c6e5eb2ec1 (net: sk_prot_alloc() should not blindly overwrite memory) took care of not zeroing whole new socket at allocation time. sock_copy() is another spot where we should be very careful. We should not set refcnt to a non null value, until we are sure other fields are correctly setup, or a lockless reader could catch this socket by mistake, while not fully (re)initialized. This patch puts sk_node & sk_refcnt to the very beginning of struct sock to ease sock_copy() & sk_prot_alloc() job. We add appropriate smp_wmb() before sk_refcnt initializations to match our RCU requirements (changes to sock keys should be committed to memory before sk_refcnt setting) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 2c0da9239b95..950409dcec3d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -104,15 +104,15 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets + * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol + * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 - * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables - * @skc_refcnt: reference count - * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @@ -120,17 +120,21 @@ struct net; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - unsigned short skc_family; - volatile unsigned char skc_state; - unsigned char skc_reuse; - int skc_bound_dev_if; + /* + * first fields are not copied in sock_copy() + */ union { struct hlist_node skc_node; struct hlist_nulls_node skc_nulls_node; }; - struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; + unsigned short skc_family; + volatile unsigned char skc_state; + unsigned char skc_reuse; + int skc_bound_dev_if; + struct hlist_node skc_bind_node; struct proto *skc_prot; #ifdef CONFIG_NET_NS struct net *skc_net; @@ -208,15 +212,17 @@ struct sock { * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; +#define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node +#define sk_refcnt __sk_common.skc_refcnt + +#define sk_copy_start __sk_common.skc_hash +#define sk_hash __sk_common.skc_hash #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if -#define sk_node __sk_common.skc_node -#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node -#define sk_refcnt __sk_common.skc_refcnt -#define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net kmemcheck_bitfield_begin(flags); -- cgit v1.2.3 From e3afe7b75ed8f809c1473ea9b39267487c187ccb Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Thu, 16 Jul 2009 05:04:51 +0000 Subject: tcp: Fix MD5 signature checking on IPv4 mapped sockets Fix MD5 signature checking so that an IPv4 active open to an IPv6 socket can succeed. In particular, use the correct address family's signature generation function for the SYN/ACK. Reported-by: Stephen Hemminger Signed-off-by: John Dykstra Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 19f4150f4d4d..88af84306471 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1425,6 +1425,11 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); + int (*calc_md5_hash) (char *location, + struct tcp_md5sig_key *md5, + struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); #endif }; -- cgit v1.2.3 From dcf777f6ed9799c5ac90ac17a5c369e6b73ca92e Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 26 Jul 2009 19:11:14 -0700 Subject: NET: ROSE: Don't use static buffer. The use of a static buffer in rose2asc() to return its result is not threadproof and can result in corruption if multiple threads are trying to use one of the procfs files based on rose2asc(). Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/rose.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/rose.h b/include/net/rose.h index cbd5364b2c8a..5ba9f02731eb 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -156,7 +156,7 @@ extern int sysctl_rose_maximum_vcs; extern int sysctl_rose_window_size; extern int rosecmp(rose_address *, rose_address *); extern int rosecmpm(rose_address *, rose_address *, unsigned short); -extern const char *rose2asc(const rose_address *); +extern char *rose2asc(char *buf, const rose_address *); extern struct sock *rose_find_socket(unsigned int, struct rose_neigh *); extern void rose_kill_by_neigh(struct rose_neigh *); extern unsigned int rose_new_lci(struct rose_neigh *); -- cgit v1.2.3 From af0d3b103bcfa877343ee338de12002cd50c9ee5 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 3 Aug 2009 04:26:16 +0000 Subject: bluetooth: rfcomm_init bug fix rfcomm tty may be used before rfcomm_tty_driver initilized, The problem is that now socket layer init before tty layer, if userspace program do socket callback right here then oops will happen. reporting in: http://marc.info/?l=linux-bluetooth&m=124404919324542&w=2 make 3 changes: 1. remove #ifdef in rfcomm/core.c, make it blank function when rfcomm tty not selected in rfcomm.h 2. tune the rfcomm_init error patch to ensure tty driver initilized before rfcomm socket usage. 3. remove __exit for rfcomm_cleanup_sockets because above change need call it in a __init function. Reported-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Signed-off-by: Dave Young Signed-off-by: David S. Miller --- include/net/bluetooth/rfcomm.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 80072611d26a..c274993234e3 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,17 @@ struct rfcomm_dev_list_req { }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); + +#ifdef CONFIG_BT_RFCOMM_TTY int rfcomm_init_ttys(void); void rfcomm_cleanup_ttys(void); - +#else +static inline int rfcomm_init_ttys(void) +{ + return 0; +} +static inline void rfcomm_cleanup_ttys(void) +{ +} +#endif #endif /* __RFCOMM_H */ -- cgit v1.2.3 From 371842448c05b42d11a4be1c8e4e81d62ecc7534 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 30 Jul 2009 17:43:48 -0700 Subject: cfg80211: fix regression on beacon world roaming feature A regression was added through patch a4ed90d6: "cfg80211: respect API on orig_flags on channel for beacon hint" We did indeed respect _orig flags but the intention was not clearly stated in the commit log. This patch fixes firmware issues picked up by iwlwifi when we lift passive scan of beaconing restrictions on channels its EEPROM has been configured to always enable. By doing so though we also disallowed beacon hints on devices registering their wiphy with custom world regulatory domains enabled, this happens to be currently ath5k, ath9k and ar9170. The passive scan and beacon restrictions on those devices would never be lifted even if we did find a beacon and the hardware did support such enhancements when world roaming. Since Johannes indicates iwlwifi firmware cannot be changed to allow beacon hinting we set up a flag now to specifically allow drivers to disable beacon hints for devices which cannot use them. We enable the flag on iwlwifi to disable beacon hints and by default enable it for all other drivers. It should be noted beacon hints lift passive scan flags and beacon restrictions when we receive a beacon from an AP on any 5 GHz non-DFS channels, and channels 12-14 on the 2.4 GHz band. We don't bother with channels 1-11 as those channels are allowed world wide. This should fix world roaming for ath5k, ath9k and ar9170, thereby improving scan time when we receive the first beacon from any AP, and also enabling beaconing operation (AP/IBSS/Mesh) on cards which would otherwise not be allowed to do so. Drivers not using custom regulatory stuff (wiphy_apply_custom_regulatory()) were not affected by this as the orig_flags for the channels would have been cleared upon wiphy registration. I tested this with a world roaming ath5k card. Cc: Jouni Malinen Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1a21895b732b..d1892d66701a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -979,6 +979,10 @@ struct cfg80211_ops { * channels at a later time. This can be used for devices which do not * have calibration information gauranteed for frequencies or settings * outside of its regulatory domain. + * @disable_beacon_hints: enable this if your driver needs to ensure that + * passive scan flags and beaconing flags may not be lifted by cfg80211 + * due to regulatory beacon hints. For more information on beacon + * hints read the documenation for regulatory_hint_found_beacon() * @reg_notifier: the driver's regulatory notification callback * @regd: the driver's regulatory domain, if one was requested via * the regulatory_hint() API. This can be used by the driver @@ -1004,6 +1008,7 @@ struct wiphy { bool custom_regulatory; bool strict_regulatory; + bool disable_beacon_hints; enum cfg80211_signal_type signal_type; -- cgit v1.2.3