From fe42b170afae5978dc90641f29f2b39aefaa47fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2014 10:09:18 -0800 Subject: net-tcp: fastopen: fix high order allocations [ Upstream commit f5ddcbbb40aa0ba7fbfe22355d287603dbeeaaac ] This patch fixes two bugs in fastopen : 1) The tcp_sendmsg(..., @size) argument was ignored. Code was relying on user not fooling the kernel with iovec mismatches 2) When MTU is about 64KB, tcp_send_syn_data() attempts order-5 allocations, which are likely to fail when memory gets fragmented. Fixes: 783237e8daf13 ("net-tcp: Fast Open client - sending SYN-data") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Tested-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 8 +++++--- net/ipv4/tcp_output.c | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1a2e249cef49..39bdb14b3214 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1001,7 +1001,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) +static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); int err, flags; @@ -1016,11 +1017,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) if (unlikely(tp->fastopen_req == NULL)) return -ENOBUFS; tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags); - *size = tp->fastopen_req->copied; + *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; } @@ -1040,7 +1042,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flags = msg->msg_flags; if (flags & MSG_FASTOPEN) { - err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; else if (err) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d2df17940e07..6da3d94a114b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2892,7 +2892,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; - syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + space = min_t(size_t, space, fo->size); + + /* limit to order-0 allocations */ + space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); + + syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, sk->sk_allocation); if (syn_data == NULL) goto fallback; -- cgit v1.2.3 From 913cfa947d296bd0a0671e87f102a49c24683fd8 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Thu, 27 Feb 2014 17:14:41 +0800 Subject: neigh: recompute reachabletime before returning from neigh_periodic_work() [ Upstream commit feff9ab2e7fa773b6a3965f77375fe89f7fd85cf ] If the neigh table's entries is less than gc_thresh1, the function will return directly, and the reachabletime will not be recompute, so the reachabletime can be guessed. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 49aeab86f317..b49e8bafab17 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -764,9 +764,6 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) - goto out; - /* * periodically recompute ReachableTime from random function */ @@ -779,6 +776,9 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; -- cgit v1.2.3 From 150d6bdb9d2c1bbdf95ba9502c35b5f4ddc4bd7e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 21 Feb 2014 02:55:35 +0100 Subject: ipv6: reuse ip6_frag_id from ip6_ufo_append_data [ Upstream commit 916e4cf46d0204806c062c8c6c4d1f633852c5b6 ] Currently we generate a new fragmentation id on UFO segmentation. It is pretty hairy to identify the correct net namespace and dst there. Especially tunnels use IFF_XMIT_DST_RELEASE and thus have no skb_dst available at all. This causes unreliable or very predictable ipv6 fragmentation id generation while segmentation. Luckily we already have pregenerated the ip6_frag_id in ip6_ufo_append_data and can use it here. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3696aa28784a..2f65b022627b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -108,7 +108,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() -- cgit v1.2.3 From d1f7dd4e9b53146b6049de3a6b4c2b74472e26e5 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 27 Feb 2014 12:57:58 +0100 Subject: ipv6: ipv6_find_hdr restore prev functionality [ Upstream commit accfe0e356327da5bd53da8852b93fc22de9b5fc ] The commit 9195bb8e381d81d5a315f911904cdf0cfcc919b8 ("ipv6: improve ipv6_find_hdr() to skip empty routing headers") broke ipv6_find_hdr(). When a target is specified like IPPROTO_ICMPV6 ipv6_find_hdr() returns -ENOENT when it's found, not the header as expected. A part of IPVS is broken and possible also nft_exthdr_eval(). When target is -1 which it is most cases, it works. This patch exits the do while loop if the specific header is found so the nexthdr could be returned as expected. Reported-by: Art -kwaak- van Breemen Signed-off-by: Hans Schillstrom CC:Ansis Atteka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/exthdrs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df4..51af9d0d019a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) + if (target < 0 || found) break; return -ENOENT; } -- cgit v1.2.3 From 892b46a6a68fa1ae1f85f3c2ba475382b7b6acc5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 3 Mar 2014 17:23:04 +0100 Subject: net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable [ Upstream commit ec0223ec48a90cb605244b45f7c62de856403729 ] RFC4895 introduced AUTH chunks for SCTP; during the SCTP handshake RANDOM; CHUNKS; HMAC-ALGO are negotiated (CHUNKS being optional though): ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- -------------------- COOKIE-ECHO --------------------> <-------------------- COOKIE-ACK --------------------- A special case is when an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- RFC4895, section 6.3. Receiving Authenticated Chunks says: The receiver MUST use the HMAC algorithm indicated in the HMAC Identifier field. If this algorithm was not specified by the receiver in the HMAC-ALGO parameter in the INIT or INIT-ACK chunk during association setup, the AUTH chunk and all the chunks after it MUST be discarded and an ERROR chunk SHOULD be sent with the error cause defined in Section 4.1. [...] If no endpoint pair shared key has been configured for that Shared Key Identifier, all authenticated chunks MUST be silently discarded. [...] When an endpoint requires COOKIE-ECHO chunks to be authenticated, some special procedures have to be followed because the reception of a COOKIE-ECHO chunk might result in the creation of an SCTP association. If a packet arrives containing an AUTH chunk as a first chunk, a COOKIE-ECHO chunk as the second chunk, and possibly more chunks after them, and the receiver does not have an STCB for that packet, then authentication is based on the contents of the COOKIE-ECHO chunk. In this situation, the receiver MUST authenticate the chunks in the packet by using the RANDOM parameters, CHUNKS parameters and HMAC_ALGO parameters obtained from the COOKIE-ECHO chunk, and possibly a local shared secret as inputs to the authentication procedure specified in Section 6.3. If authentication fails, then the packet is discarded. If the authentication is successful, the COOKIE-ECHO and all the chunks after the COOKIE-ECHO MUST be processed. If the receiver has an STCB, it MUST process the AUTH chunk as described above using the STCB from the existing association to authenticate the COOKIE-ECHO chunk and all the chunks after it. [...] Commit bbd0d59809f9 introduced the possibility to receive and verification of AUTH chunk, including the edge case for authenticated COOKIE-ECHO. On reception of COOKIE-ECHO, the function sctp_sf_do_5_1D_ce() handles processing, unpacks and creates a new association if it passed sanity checks and also tests for authentication chunks being present. After a new association has been processed, it invokes sctp_process_init() on the new association and walks through the parameter list it received from the INIT chunk. It checks SCTP_PARAM_RANDOM, SCTP_PARAM_HMAC_ALGO and SCTP_PARAM_CHUNKS, and copies them into asoc->peer meta data (peer_random, peer_hmacs, peer_chunks) in case sysctl -w net.sctp.auth_enable=1 is set. If in INIT's SCTP_PARAM_SUPPORTED_EXT parameter SCTP_CID_AUTH is set, peer_random != NULL and peer_hmacs != NULL the peer is to be assumed asoc->peer.auth_capable=1, in any other case asoc->peer.auth_capable=0. Now, if in sctp_sf_do_5_1D_ce() chunk->auth_chunk is available, we set up a fake auth chunk and pass that on to sctp_sf_authenticate(), which at latest in sctp_auth_calculate_hmac() reliably dereferences a NULL pointer at position 0..0008 when setting up the crypto key in crypto_hash_setkey() by using asoc->asoc_shared_key that is NULL as condition key_id == asoc->active_key_id is true if the AUTH chunk was injected correctly from remote. This happens no matter what net.sctp.auth_enable sysctl says. The fix is to check for net->sctp.auth_enable and for asoc->peer.auth_capable before doing any operations like sctp_sf_authenticate() as no key is activated in sctp_auth_asoc_init_active_key() for each case. Now as RFC4895 section 6.3 states that if the used HMAC-ALGO passed from the INIT chunk was not used in the AUTH chunk, we SHOULD send an error; however in this case it would be better to just silently discard such a maliciously prepared handshake as we didn't even receive a parameter at all. Also, as our endpoint has no shared key configured, section 6.3 says that MUST silently discard, which we are doing from now onwards. Before calling sctp_sf_pdiscard(), we need not only to free the association, but also the chunk->auth_chunk skb, as commit bbd0d59809f9 created a skb clone in that case. I have tested this locally by using netfilter's nfqueue and re-injecting packets into the local stack after maliciously modifying the INIT chunk (removing RANDOM; HMAC-ALGO param) and the SCTP packet containing the COOKIE_ECHO (injecting AUTH chunk before COOKIE_ECHO). Fixed with this patch applied. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_statefuns.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index de1a0138317f..7ceb25ba85b8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -765,6 +765,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, struct sctp_chunk auth; sctp_ierror_t ret; + /* Make sure that we and the peer are AUTH capable */ + if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { + kfree_skb(chunk->auth_chunk); + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* set-up our fake chunk so that we can process it */ auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; -- cgit v1.2.3 From b59ed9d77e700c22ca6ab042004a48f078de5989 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 11 Feb 2014 16:02:47 +0100 Subject: mac80211: send control port protocol frames to the VO queue commit 1bf4bbb4024dcdab5e57634dd8ae1072d42a53ac upstream. Improves reliability of wifi connections with WPA, since authentication frames are prioritized over normal traffic and also typically exempt from aggregation. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/wme.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index afba19cb6f87..a282fddf8b00 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -153,6 +153,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, return IEEE80211_AC_BE; } + if (skb->protocol == sdata->control_port_protocol) { + skb->priority = 7; + return ieee80211_downgrade_queue(sdata, skb); + } + /* use the data classifier to determine what 802.1d tag the * data frame has */ skb->priority = cfg80211_classify8021d(skb); -- cgit v1.2.3 From a7ee1a84a81555b19ec3d02f104bfd70cf0b668a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 20 Feb 2014 09:22:11 +0200 Subject: mac80211: fix AP powersave TX vs. wakeup race commit 1d147bfa64293b2723c4fec50922168658e613ba upstream. There is a race between the TX path and the STA wakeup: while a station is sleeping, mac80211 buffers frames until it wakes up, then the frames are transmitted. However, the RX and TX path are concurrent, so the packet indicating wakeup can be processed while a packet is being transmitted. This can lead to a situation where the buffered frames list is emptied on the one side, while a frame is being added on the other side, as the station is still seen as sleeping in the TX path. As a result, the newly added frame will not be send anytime soon. It might be sent much later (and out of order) when the station goes to sleep and wakes up the next time. Additionally, it can lead to the crash below. Fix all this by synchronising both paths with a new lock. Both path are not fastpath since they handle PS situations. In a later patch we'll remove the extra skb queue locks to reduce locking overhead. BUG: unable to handle kernel NULL pointer dereference at 000000b0 IP: [] ieee80211_report_used_skb+0x11/0x3e0 [mac80211] *pde = 00000000 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC EIP: 0060:[] EFLAGS: 00210282 CPU: 1 EIP is at ieee80211_report_used_skb+0x11/0x3e0 [mac80211] EAX: e5900da0 EBX: 00000000 ECX: 00000001 EDX: 00000000 ESI: e41d00c0 EDI: e5900da0 EBP: ebe458e4 ESP: ebe458b0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 000000b0 CR3: 25a78000 CR4: 000407d0 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 Process iperf (pid: 3934, ti=ebe44000 task=e757c0b0 task.ti=ebe44000) iwlwifi 0000:02:00.0: I iwl_pcie_enqueue_hcmd Sending command LQ_CMD (#4e), seq: 0x0903, 92 bytes at 3[3]:9 Stack: e403b32c ebe458c4 00200002 00200286 e403b338 ebe458cc c10960bb e5900da0 ff76a6ec ebe458d8 00000000 e41d00c0 e5900da0 ebe458f0 ff6f1b75 e403b210 ebe4598c ff723dc1 00000000 ff76a6ec e597c978 e403b758 00000002 00000002 Call Trace: [] ieee80211_free_txskb+0x15/0x20 [mac80211] [] invoke_tx_handlers+0x1661/0x1780 [mac80211] [] ieee80211_tx+0x75/0x100 [mac80211] [] ieee80211_xmit+0x8f/0xc0 [mac80211] [] ieee80211_subif_start_xmit+0x4fe/0xe20 [mac80211] [] dev_hard_start_xmit+0x450/0x950 [] sch_direct_xmit+0xa9/0x250 [] __qdisc_run+0x4b/0x150 [] dev_queue_xmit+0x2c2/0xca0 Reported-by: Yaara Rozenblum Signed-off-by: Emmanuel Grumbach Reviewed-by: Stanislaw Gruszka [reword commit log, use a separate lock] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 4 ++++ net/mac80211/sta_info.h | 7 +++---- net/mac80211/tx.c | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11216bc13b27..a66d0068a664 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -339,6 +339,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; spin_lock_init(&sta->lock); + spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -1045,6 +1046,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); + /* sync with ieee80211_tx_h_unicast_ps_buf */ + spin_lock(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1064,6 +1067,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) } ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + spin_unlock(&sta->ps_lock); local->total_ps_buffered -= buffered; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index adc30045f99e..3184b2b2853c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -244,6 +244,7 @@ struct sta_ampdu_mlme { * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly + * @ps_lock: used for powersave (when mac80211 is the AP) related locking * @ps_tx_buf: buffers (per AC) of frames to transmit to this station * when it leaves power saving state or polls * @tx_filtered: buffers (per AC) of frames we already tried to @@ -324,10 +325,8 @@ struct sta_info { /* use the accessors defined below */ unsigned long _flags; - /* - * STA powersave frame queues, no more than the internal - * locking required. - */ + /* STA powersave lock and frame queues */ + spinlock_t ps_lock; struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fe9d6e7b904b..6d5791d735f3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -461,6 +461,20 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); + + /* sync with ieee80211_sta_ps_deliver_wakeup */ + spin_lock(&sta->ps_lock); + /* + * STA woke up the meantime and all the frames on ps_tx_buf have + * been queued to pending queue. No reordering can happen, go + * ahead and Tx the packet. + */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + spin_unlock(&sta->ps_lock); + return TX_CONTINUE; + } + if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); ps_dbg(tx->sdata, @@ -474,6 +488,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); + spin_unlock(&sta->ps_lock); if (!timer_pending(&local->sta_cleanup)) mod_timer(&local->sta_cleanup, -- cgit v1.2.3 From e713fe3d4bde100807377a2699587d05721f909b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2014 20:34:34 +0100 Subject: mac80211: don't validate unchanged AP bandwidth while tracking commit 963a1852fbac4f75a2d938fa2e734ef1e6d4c044 upstream. The MLME code in mac80211 must track whether or not the AP changed bandwidth, but if there's no change while tracking it shouldn't do anything, otherwise regulatory updates can make it impossible to connect to certain APs if the regulatory database doesn't match the information from the AP. See the precise scenario described in the code. This still leaves some possible problems with CSA or if the AP actually changed bandwidth, but those cases are less common and won't completely prevent using it. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=70881 Reported-and-tested-by: Nate Carlson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5b4328dcbe4e..400b219ca090 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -359,6 +359,28 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = 0; out: + /* + * When tracking the current AP, don't do any further checks if the + * new chandef is identical to the one we're currently using for the + * connection. This keeps us from playing ping-pong with regulatory, + * without it the following can happen (for example): + * - connect to an AP with 80 MHz, world regdom allows 80 MHz + * - AP advertises regdom US + * - CRDA loads regdom US with 80 MHz prohibited (old database) + * - the code below detects an unsupported channel, downgrades, and + * we disconnect from the AP in the caller + * - disconnect causes CRDA to reload world regdomain and the game + * starts anew. + * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881) + * + * It seems possible that there are still scenarios with CSA or real + * bandwidth changes where a this could happen, but those cases are + * less common and wouldn't completely prevent using the AP. + */ + if (tracking && + cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) + return ret; + /* don't print the message below for VHT mismatch if VHT is disabled */ if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; -- cgit v1.2.3 From c02d9a9697134e6fd6527c33ae0d78a8faad6e41 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Feb 2014 20:47:53 +0100 Subject: mac80211: fix association to 20/40 MHz VHT networks commit cb664981607a6b5b3d670ad57bbda893b2528d96 upstream. When a VHT network uses 20 or 40 MHz as per the HT operation information, the channel center frequency segment 0 field in the VHT operation information is reserved, so ignore it. This fixes association with such networks when the AP puts 0 into the field, previously we'd disconnect due to an invalid channel with the message wlan0: AP VHT information is invalid, disable VHT Fixes: f2d9d270c15ae ("mac80211: support VHT association") Reported-by: Tim Nelson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 400b219ca090..49bc2246bd86 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -310,6 +310,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, switch (vht_oper->chan_width) { case IEEE80211_VHT_CHANWIDTH_USE_HT: vht_chandef.width = chandef->width; + vht_chandef.center_freq1 = chandef->center_freq1; break; case IEEE80211_VHT_CHANWIDTH_80MHZ: vht_chandef.width = NL80211_CHAN_WIDTH_80; -- cgit v1.2.3 From 30f754e9d9e0508024e6f872dfc888551102875f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Mar 2014 13:46:53 +0100 Subject: mac80211: clear sequence/fragment number in QoS-null frames commit 864a6040f395464003af8dd0d8ca86fed19866d4 upstream. Avoid leaking data by sending uninitialized memory and setting an invalid (non-zero) fragment number (the sequence number is ignored anyway) by setting the seq_ctrl field to zero. Fixes: 3f52b7e328c5 ("mac80211: mesh power save basics") Fixes: ce662b44ce22 ("mac80211: send (QoS) Null if no buffered frames") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_ps.c | 1 + net/mac80211/sta_info.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3b7bfc01ee36..ddda201832b3 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) sdata->vif.addr); nullfunc->frame_control = fc; nullfunc->duration_id = 0; + nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a66d0068a664..0418777c361f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1114,6 +1114,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); -- cgit v1.2.3 From 83bb80f4ade8b6fa1e62a46aa67d52cc9fa12f0b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:29:58 +1100 Subject: net: unix socket code abuses csum_partial commit 0a13404dd3bf4ea870e3d96270b5a382edca85c0 upstream. The unix socket code is using the result of csum_partial to hash into a lookup table: unix_hash_fold(csum_partial(sunaddr, len, 0)); csum_partial is only guaranteed to produce something that can be folded into a checksum, as its prototype explains: * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic The 32bit value should not be used directly. Depending on the alignment, the ppc64 csum_partial will return different 32bit partial checksums that will fold into the same 16bit checksum. This difference causes the following testcase (courtesy of Gustavo) to sometimes fail: #include #include int main() { int fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); int i = 1; setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &i, 4); struct sockaddr addr; addr.sa_family = AF_LOCAL; bind(fd, &addr, 2); listen(fd, 128); struct sockaddr_storage ss; socklen_t sslen = (socklen_t)sizeof(ss); getsockname(fd, (struct sockaddr*)&ss, &sslen); fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); if (connect(fd, (struct sockaddr*)&ss, sslen) == -1){ perror(NULL); return 1; } printf("OK\n"); return 0; } As suggested by davem, fix this by using csum_fold to fold the partial 32bit checksum into a 16bit checksum before using it. Signed-off-by: Anton Blanchard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3ca7927520b0..94d334781554 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -160,9 +160,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned int unix_hash_fold(__wsum n) { - unsigned int hash = (__force unsigned int)n; + unsigned int hash = (__force unsigned int)csum_fold(n); - hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } -- cgit v1.2.3 From 4892ed8deb6989cd9c831156920bae490d6ad4d1 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 2 Dec 2013 19:11:48 -0800 Subject: libceph: block I/O when PAUSE or FULL osd map flags are set commit d29adb34a94715174c88ca93e8aba955850c9bde upstream. The PAUSEWR and PAUSERD flags are meant to stop the cluster from processing writes and reads, respectively. The FULL flag is set when the cluster determines that it is out of space, and will no longer process writes. PAUSEWR and PAUSERD are purely client-side settings already implemented in userspace clients. The osd does nothing special with these flags. When the FULL flag is set, however, the osd responds to all writes with -ENOSPC. For cephfs, this makes sense, but for rbd the block layer translates this into EIO. If a cluster goes from full to non-full quickly, a filesystem on top of rbd will not behave well, since some writes succeed while others get EIO. Fix this by blocking any writes when the FULL flag is set in the osd client. This is the same strategy used by userspace, so apply it by default. A follow-on patch makes this configurable. __map_request() is called to re-target osd requests in case the available osds changed. Add a paused field to a ceph_osd_request, and set it whenever an appropriate osd map flag is set. Avoid queueing paused requests in __map_request(), but force them to be resent if they become unpaused. Also subscribe to the next osd map from the monitor if any of these flags are set, so paused requests can be unblocked as soon as possible. Fixes: http://tracker.ceph.com/issues/6079 Reviewed-by: Sage Weil Signed-off-by: Josh Durgin Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index bc0016e3e5ac..49486f35159f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1224,6 +1224,22 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, } EXPORT_SYMBOL(ceph_osdc_set_request_linger); +/* + * Returns whether a request should be blocked from being sent + * based on the current osdmap and osd_client settings. + * + * Caller should hold map_sem for read. + */ +static bool __req_should_be_paused(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) +{ + bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); + bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL); + return (req->r_flags & CEPH_OSD_FLAG_READ && pauserd) || + (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr); +} + /* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is @@ -1241,6 +1257,7 @@ static int __map_request(struct ceph_osd_client *osdc, int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; + bool was_paused; dout("map_request %p tid %lld\n", req, req->r_tid); err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap, @@ -1257,12 +1274,18 @@ static int __map_request(struct ceph_osd_client *osdc, num = err; } + was_paused = req->r_paused; + req->r_paused = __req_should_be_paused(osdc, req); + if (was_paused && !req->r_paused) + force_resend = 1; + if ((!force_resend && req->r_osd && req->r_osd->o_osd == o && req->r_sent >= req->r_osd->o_incarnation && req->r_num_pg_osds == num && memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || - (req->r_osd == NULL && o == -1)) + (req->r_osd == NULL && o == -1) || + req->r_paused) return 0; /* no change */ dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", @@ -1797,7 +1820,9 @@ done: * we find out when we are no longer full and stop returning * ENOSPC. */ - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) + if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) || + ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR)) ceph_monc_request_next_osdmap(&osdc->client->monc); mutex_lock(&osdc->request_mutex); -- cgit v1.2.3 From 27ff9f136acde1c1b5480f5fc3a874a7ca046f71 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Tue, 10 Dec 2013 09:35:13 -0800 Subject: libceph: resend all writes after the osdmap loses the full flag commit 9a1ea2dbff11547a8e664f143c1ffefc586a577a upstream. With the current full handling, there is a race between osds and clients getting the first map marked full. If the osd wins, it will return -ENOSPC to any writes, but the client may already have writes in flight. This results in the client getting the error and propagating it up the stack. For rbd, the block layer turns this into EIO, which can cause corruption in filesystems above it. To avoid this race, osds are being changed to drop writes that came from clients with an osdmap older than the last osdmap marked full. In order for this to work, clients must resend all writes after they encounter a full -> not full transition in the osdmap. osds will wait for an updated map instead of processing a request from a client with a newer map, so resent writes will not be dropped by the osd unless there is another not full -> full transition. This approach requires both osds and clients to be fixed to avoid the race. Old clients talking to osds with this fix may hang instead of returning EIO and potentially corrupting an fs. New clients talking to old osds have the same behavior as before if they encounter this race. Fixes: http://tracker.ceph.com/issues/6938 Reviewed-by: Sage Weil Signed-off-by: Josh Durgin Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 49486f35159f..3663a305daf7 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1629,14 +1629,17 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) * * Caller should hold map_sem for read. */ -static void kick_requests(struct ceph_osd_client *osdc, int force_resend) +static void kick_requests(struct ceph_osd_client *osdc, bool force_resend, + bool force_resend_writes) { struct ceph_osd_request *req, *nreq; struct rb_node *p; int needmap = 0; int err; + bool force_resend_req; - dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); + dout("kick_requests %s %s\n", force_resend ? " (force resend)" : "", + force_resend_writes ? " (force resend writes)" : ""); mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; ) { req = rb_entry(p, struct ceph_osd_request, r_node); @@ -1661,7 +1664,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) continue; } - err = __map_request(osdc, req, force_resend); + force_resend_req = force_resend || + (force_resend_writes && + req->r_flags & CEPH_OSD_FLAG_WRITE); + err = __map_request(osdc, req, force_resend_req); if (err < 0) continue; /* error */ if (req->r_osd == NULL) { @@ -1681,7 +1687,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) r_linger_item) { dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); - err = __map_request(osdc, req, force_resend); + err = __map_request(osdc, req, + force_resend || force_resend_writes); dout("__map_request returned %d\n", err); if (err == 0) continue; /* no change and no osd was specified */ @@ -1723,6 +1730,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) struct ceph_osdmap *newmap = NULL, *oldmap; int err; struct ceph_fsid fsid; + bool was_full; dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0); p = msg->front.iov_base; @@ -1736,6 +1744,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) down_write(&osdc->map_sem); + was_full = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL); + /* incremental maps */ ceph_decode_32_safe(&p, end, nr_maps, bad); dout(" %d inc maps\n", nr_maps); @@ -1760,7 +1770,10 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = newmap; } - kick_requests(osdc, 0); + was_full = was_full || + ceph_osdmap_flag(osdc->osdmap, + CEPH_OSDMAP_FULL); + kick_requests(osdc, 0, was_full); } else { dout("ignoring incremental map %u len %d\n", epoch, maplen); @@ -1803,7 +1816,10 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) skipped_map = 1; ceph_osdmap_destroy(oldmap); } - kick_requests(osdc, skipped_map); + was_full = was_full || + ceph_osdmap_flag(osdc->osdmap, + CEPH_OSDMAP_FULL); + kick_requests(osdc, skipped_map, was_full); } p += maplen; nr_maps--; -- cgit v1.2.3 From b086eb683c73fb6a506eae277f65eeac597a6f16 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jan 2014 00:57:54 +0100 Subject: netfilter: nf_conntrack_dccp: fix skb_header_pointer API usages commit b22f5126a24b3b2f15448c3f2a254fc10cbc2b92 upstream. Some occurences in the netfilter tree use skb_header_pointer() in the following way ... struct dccp_hdr _dh, *dh; ... skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); ... where dh itself is a pointer that is being passed as the copy buffer. Instead, we need to use &_dh as the forth argument so that we're copying the data into an actual buffer that sits on the stack. Currently, we probably could overwrite memory on the stack (e.g. with a possibly mal-formed DCCP packet), but unintentionally, as we only want the buffer to be placed into _dh variable. Fixes: 2bc780499aa3 ("[NETFILTER]: nf_conntrack: add DCCP protocol support") Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_proto_dccp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a99b6c3427b0..59359bec328a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -428,7 +428,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, const char *msg; u_int8_t state; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; @@ -486,7 +486,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, u_int8_t type, old_state, new_state; enum ct_dccp_roles role; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); type = dh->dccph_type; @@ -577,7 +577,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, unsigned int cscov; const char *msg; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); if (dh == NULL) { msg = "nf_ct_dccp: short packet "; goto out_invalid; -- cgit v1.2.3 From f7a2e253a883ddea610b5d465f95d56a41fac325 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 3 Mar 2014 23:19:18 +0100 Subject: net: fix for a race condition in the inet frag code [ Upstream commit 24b9bf43e93e0edd89072da51cf1fab95fc69dec ] I stumbled upon this very serious bug while hunting for another one, it's a very subtle race condition between inet_frag_evictor, inet_frag_intern and the IPv4/6 frag_queue and expire functions (basically the users of inet_frag_kill/inet_frag_put). What happens is that after a fragment has been added to the hash chain but before it's been added to the lru_list (inet_frag_lru_add) in inet_frag_intern, it may get deleted (either by an expired timer if the system load is high or the timer sufficiently low, or by the fraq_queue function for different reasons) before it's added to the lru_list, then after it gets added it's a matter of time for the evictor to get to a piece of memory which has been freed leading to a number of different bugs depending on what's left there. I've been able to trigger this on both IPv4 and IPv6 (which is normal as the frag code is the same), but it's been much more difficult to trigger on IPv4 due to the protocol differences about how fragments are treated. The setup I used to reproduce this is: 2 machines with 4 x 10G bonded in a RR bond, so the same flow can be seen on multiple cards at the same time. Then I used multiple instances of ping/ping6 to generate fragmented packets and flood the machines with them while running other processes to load the attacked machine. *It is very important to have the _same flow_ coming in on multiple CPUs concurrently. Usually the attacked machine would die in less than 30 minutes, if configured properly to have many evictor calls and timeouts it could happen in 10 minutes or so. An important point to make is that any caller (frag_queue or timer) of inet_frag_kill will remove both the timer refcount and the original/guarding refcount thus removing everything that's keeping the frag from being freed at the next inet_frag_put. All of this could happen before the frag was ever added to the LRU list, then it gets added and the evictor uses a freed fragment. An example for IPv6 would be if a fragment is being added and is at the stage of being inserted in the hash after the hash lock is released, but before inet_frag_lru_add executes (or is able to obtain the lru lock) another overlapping fragment for the same flow arrives at a different CPU which finds it in the hash, but since it's overlapping it drops it invoking inet_frag_kill and thus removing all guarding refcounts, and afterwards freeing it by invoking inet_frag_put which removes the last refcount added previously by inet_frag_find, then inet_frag_lru_add gets executed by inet_frag_intern and we have a freed fragment in the lru_list. The fix is simple, just move the lru_add under the hash chain locked region so when a removing function is called it'll have to wait for the fragment to be added to the lru_list, and then it'll remove it (it works because the hash chain removal is done before the lru_list one and there's no window between the two list adds when the frag can get dropped). With this fix applied I couldn't kill the same machine in 24 hours with the same setup. Fixes: 3ef0eb0db4bf ("net: frag, move LRU list maintenance outside of rwlock") CC: Florian Westphal CC: Jesper Dangaard Brouer CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7e06641e36ae..f7f8af9a5cc4 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -283,9 +283,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); + inet_frag_lru_add(nf, qp); spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - inet_frag_lru_add(nf, qp); + return qp; } -- cgit v1.2.3 From ec494e10043dc71f30b9f32a9db15a6c2a9ae051 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 Mar 2014 16:35:51 +0100 Subject: net: sctp: fix skb leakage in COOKIE ECHO path of chunk->auth_chunk [ Upstream commit c485658bae87faccd7aed540fd2ca3ab37992310 ] While working on ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable"), we noticed that there's a skb memory leakage in the error path. Running the same reproducer as in ec0223ec48a9 and by unconditionally jumping to the error label (to simulate an error condition) in sctp_sf_do_5_1D_ce() receive path lets kmemleak detector bark about the unfreed chunk->auth_chunk skb clone: Unreferenced object 0xffff8800b8f3a000 (size 256): comm "softirq", pid 0, jiffies 4294769856 (age 110.757s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 89 ab 75 5e d4 01 58 13 00 00 00 00 00 00 00 00 ..u^..X......... backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0xc8/0x210 [] skb_clone+0x49/0xb0 [] sctp_endpoint_bh_rcv+0x1d9/0x230 [sctp] [] sctp_inq_push+0x4c/0x70 [sctp] [] sctp_rcv+0x82e/0x9a0 [sctp] [] ip_local_deliver_finish+0xa8/0x210 [] nf_reinject+0xbf/0x180 [] nfqnl_recv_verdict+0x1d2/0x2b0 [nfnetlink_queue] [] nfnetlink_rcv_msg+0x14b/0x250 [nfnetlink] [] netlink_rcv_skb+0xa9/0xc0 [] nfnetlink_rcv+0x23f/0x408 [nfnetlink] [] netlink_unicast+0x168/0x250 [] netlink_sendmsg+0x2e1/0x3f0 [] sock_sendmsg+0x8b/0xc0 [] ___sys_sendmsg+0x369/0x380 What happens is that commit bbd0d59809f9 clones the skb containing the AUTH chunk in sctp_endpoint_bh_rcv() when having the edge case that an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- When we enter sctp_sf_do_5_1D_ce() and before we actually get to the point where we process (and subsequently free) a non-NULL chunk->auth_chunk, we could hit the "goto nomem_init" path from an error condition and thus leave the cloned skb around w/o freeing it. The fix is to centrally free such clones in sctp_chunk_destroy() handler that is invoked from sctp_chunk_free() after all refs have dropped; and also move both kfree_skb(chunk->auth_chunk) there, so that chunk->auth_chunk is either NULL (since sctp_chunkify() allocs new chunks through kmem_cache_zalloc()) or non-NULL with a valid skb pointer. chunk->skb and chunk->auth_chunk are the only skbs in the sctp_chunk structure that need to be handeled. While at it, we should use consume_skb() for both. It is the same as dev_kfree_skb() but more appropriately named as we are not a device but a protocol. Also, this effectively replaces the kfree_skb() from both invocations into consume_skb(). Functions are the same only that kfree_skb() assumes that the frame was being dropped after a failure (e.g. for tools like drop monitor), usage of consume_skb() seems more appropriate in function sctp_chunk_destroy() though. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 4 ++-- net/sctp/sm_statefuns.c | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index cf579e71cff0..673921cfb97e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1403,8 +1403,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk) BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); - /* Free the chunk skb data and the SCTP_chunk stub itself. */ - dev_kfree_skb(chunk->skb); + consume_skb(chunk->skb); + consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7ceb25ba85b8..9973079401c4 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -767,7 +767,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { - kfree_skb(chunk->auth_chunk); sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -782,10 +781,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); - - /* We can now safely free the auth_chunk clone */ - kfree_skb(chunk->auth_chunk); - if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); -- cgit v1.2.3 From 47c044663922c0671a04df426c48b444a9a3a50d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 4 Mar 2014 03:57:35 +0100 Subject: bridge: multicast: add sanity check for query source addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6565b9eeef194afbb3beec80d6dd2447f4091f8c ] MLD queries are supposed to have an IPv6 link-local source address according to RFC2710, section 4 and RFC3810, section 5.1.14. This patch adds a sanity check to ignore such broken MLD queries. Without this check, such malformed MLD queries can result in a denial of service: The queries are ignored by any MLD listener therefore they will not respond with an MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2a180a380181..81de0106528b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1176,6 +1176,12 @@ static int br_ip6_multicast_query(struct net_bridge *br, br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + err = -EINVAL; + goto out; + } + if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; -- cgit v1.2.3 From 458b05e1e8b8034e2c9fee5f8170545343e08a3d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Mar 2014 18:06:41 +0100 Subject: inet: frag: make sure forced eviction removes all frags [ Upstream commit e588e2f286ed7da011ed357c24c5b9a554e26595 ] Quoting Alexander Aring: While fragmentation and unloading of 6lowpan module I got this kernel Oops after few seconds: BUG: unable to handle kernel paging request at f88bbc30 [..] Modules linked in: ipv6 [last unloaded: 6lowpan] Call Trace: [] ? call_timer_fn+0x54/0xb3 [] ? process_timeout+0xa/0xa [] run_timer_softirq+0x140/0x15f Problem is that incomplete frags are still around after unload; when their frag expire timer fires, we get crash. When a netns is removed (also done when unloading module), inet_frag calls the evictor with 'force' argument to purge remaining frags. The evictor loop terminates when accounted memory ('work') drops to 0 or the lru-list becomes empty. However, the mem accounting is done via percpu counters and may not be accurate, i.e. loop may terminate prematurely. Alter evictor to only stop once the lru list is empty when force is requested. Reported-by: Phoebe Buckheister Reported-by: Alexander Aring Tested-by: Alexander Aring Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f7f8af9a5cc4..af02a39175e3 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -211,7 +211,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) } work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0) { + while (work > 0 || force) { spin_lock(&nf->lru_lock); if (list_empty(&nf->lru_list)) { -- cgit v1.2.3 From f423fefea41979114d6895a38f112505797aeb48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Mar 2014 18:42:27 -0700 Subject: net: unix: non blocking recvmsg() should not return -EINTR [ Upstream commit de1443916791d75fdd26becb116898277bb0273f ] Some applications didn't expect recvmsg() on a non blocking socket could return -EINTR. This possibility was added as a side effect of commit b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines"). To hit this bug, you need to be a bit unlucky, as the u->readlock mutex is usually held for very small periods. Fixes: b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines") Signed-off-by: Eric Dumazet Cc: Rainer Weikusat Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 94d334781554..75e198d029d2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1792,8 +1792,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } @@ -1913,6 +1916,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = msg->msg_name; int copied = 0; + int noblock = flags & MSG_DONTWAIT; int check_creds = 0; int target; int err = 0; @@ -1928,7 +1932,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, noblock); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -1940,8 +1944,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(timeo); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } -- cgit v1.2.3 From cb82e2ab0ee9944b55d97e42f66d07ce417c7119 Mon Sep 17 00:00:00 2001 From: Anton Nayshtut Date: Wed, 5 Mar 2014 08:30:08 +0200 Subject: ipv6: Fix exthdrs offload registration. [ Upstream commit d2d273ffabd315eecefce21a4391d44b6e156b73 ] Without this fix, ipv6_exthdrs_offload_init doesn't register IPPROTO_DSTOPTS offload, but returns 0 (as the IPPROTO_ROUTING registration actually succeeds). This then causes the ipv6_gso_segment to drop IPv6 packets with IPPROTO_DSTOPTS header. The issue detected and the fix verified by running MS HCK Offload LSO test on top of QEMU Windows guests, as this test sends IPv6 packets with IPPROTO_DSTOPTS. Signed-off-by: Anton Nayshtut Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/exthdrs_offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3abfd06..447a7fbd1bb6 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: -- cgit v1.2.3 From 8f41e6a828ce53021ebbf2594195a7ec8ee0628e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 6 Mar 2014 17:51:57 +0100 Subject: ipv6: don't set DST_NOCOUNT for remotely added routes [ Upstream commit c88507fbad8055297c1d1e21e599f46960cbee39 ] DST_NOCOUNT should only be used if an authorized user adds routes locally. In case of routes which are added on behalf of router advertisments this flag must not get used as it allows an unlimited number of routes getting added remotely. Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6c389300f4e9..3fde3e977862 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1428,7 +1428,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; -- cgit v1.2.3 From 471e50f30749fbd9a410b29b6549aa26f76fc021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Bostr=C3=B6m?= Date: Mon, 10 Mar 2014 16:17:15 +0100 Subject: vlan: Set correct source MAC address with TX VLAN offload enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dd38743b4cc2f86be250eaf156cf113ba3dd531a ] With TX VLAN offload enabled the source MAC address for frames sent using the VLAN interface is currently set to the address of the real interface. This is wrong since the VLAN interface may be configured with a different address. The bug was introduced in commit 2205369a314e12fcec4781cc73ac9c08fc2b47de ("vlan: Fix header ops passthru when doing TX VLAN offload."). This patch sets the source address before calling the create function of the real interface. Signed-off-by: Peter Boström Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4af64afc7022..48db8587cccf 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -557,6 +557,9 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; + if (saddr == NULL) + saddr = dev->dev_addr; + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); } -- cgit v1.2.3 From cbbb5a252da5584e9a48e412e69ea09b4dd34cac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Mar 2014 09:50:11 -0700 Subject: tcp: tcp_release_cb() should release socket ownership [ Upstream commit c3f9b01849ef3bc69024990092b9f42e20df7797 ] Lars Persson reported following deadlock : -000 |M:0x0:0x802B6AF8(asm) <-- arch_spin_lock -001 |tcp_v4_rcv(skb = 0x8BD527A0) <-- sk = 0x8BE6B2A0 -002 |ip_local_deliver_finish(skb = 0x8BD527A0) -003 |__netif_receive_skb_core(skb = 0x8BD527A0, ?) -004 |netif_receive_skb(skb = 0x8BD527A0) -005 |elk_poll(napi = 0x8C770500, budget = 64) -006 |net_rx_action(?) -007 |__do_softirq() -008 |do_softirq() -009 |local_bh_enable() -010 |tcp_rcv_established(sk = 0x8BE6B2A0, skb = 0x87D3A9E0, th = 0x814EBE14, ?) -011 |tcp_v4_do_rcv(sk = 0x8BE6B2A0, skb = 0x87D3A9E0) -012 |tcp_delack_timer_handler(sk = 0x8BE6B2A0) -013 |tcp_release_cb(sk = 0x8BE6B2A0) -014 |release_sock(sk = 0x8BE6B2A0) -015 |tcp_sendmsg(?, sk = 0x8BE6B2A0, ?, ?) -016 |sock_sendmsg(sock = 0x8518C4C0, msg = 0x87D8DAA8, size = 4096) -017 |kernel_sendmsg(?, ?, ?, ?, size = 4096) -018 |smb_send_kvec() -019 |smb_send_rqst(server = 0x87C4D400, rqst = 0x87D8DBA0) -020 |cifs_call_async() -021 |cifs_async_writev(wdata = 0x87FD6580) -022 |cifs_writepages(mapping = 0x852096E4, wbc = 0x87D8DC88) -023 |__writeback_single_inode(inode = 0x852095D0, wbc = 0x87D8DC88) -024 |writeback_sb_inodes(sb = 0x87D6D800, wb = 0x87E4A9C0, work = 0x87D8DD88) -025 |__writeback_inodes_wb(wb = 0x87E4A9C0, work = 0x87D8DD88) -026 |wb_writeback(wb = 0x87E4A9C0, work = 0x87D8DD88) -027 |wb_do_writeback(wb = 0x87E4A9C0, force_wait = 0) -028 |bdi_writeback_workfn(work = 0x87E4A9CC) -029 |process_one_work(worker = 0x8B045880, work = 0x87E4A9CC) -030 |worker_thread(__worker = 0x8B045880) -031 |kthread(_create = 0x87CADD90) -032 |ret_from_kernel_thread(asm) Bug occurs because __tcp_checksum_complete_user() enables BH, assuming it is running from softirq context. Lars trace involved a NIC without RX checksum support but other points are problematic as well, like the prequeue stuff. Problem is triggered by a timer, that found socket being owned by user. tcp_release_cb() should call tcp_write_timer_handler() or tcp_delack_timer_handler() in the appropriate context : BH disabled and socket lock held, but 'owned' field cleared, as if they were running from timer handlers. Fixes: 6f458dfb4092 ("tcp: improve latencies of timer triggered events") Reported-by: Lars Persson Tested-by: Lars Persson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 5 ++++- net/ipv4/tcp_output.c | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 3ba527074f7f..d743099250f4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2309,10 +2309,13 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); + /* Warning : release_cb() might need to release sk ownership, + * ie call sock_release_ownership(sk) before us. + */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); - sk->sk_lock.owned = 0; + sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6da3d94a114b..4a4e8746d1b2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -755,6 +755,17 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); + /* Here begins the tricky part : + * We are called from release_sock() with : + * 1) BH disabled + * 2) sk_lock.slock spinlock held + * 3) socket owned by us (sk->sk_lock.owned == 1) + * + * But following code is meant to be called from BH handlers, + * so we should keep BH disabled, but early release socket ownership + */ + sock_release_ownership(sk); + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); __sock_put(sk); -- cgit v1.2.3 From 29a322f4572bcb20bc800e40b7f156f4ce0766fb Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Tue, 11 Mar 2014 11:58:27 +0000 Subject: net: socket: error on a negative msg_namelen [ Upstream commit dbb490b96584d4e958533fb637f08b557f505657 ] When copying in a struct msghdr from the user, if the user has set the msg_namelen parameter to a negative value it gets clamped to a valid size due to a comparison between signed and unsigned values. Ensure the syscall errors when the user passes in a negative value. Signed-off-by: Matthew Leach Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index ac72efc3d965..fc90b4f0da3c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1964,6 +1964,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, { if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; -- cgit v1.2.3 From 9b12db3dd5abcf63b452296f34440dbdff6c16e5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Mar 2014 22:13:19 +0100 Subject: ipv6: Avoid unnecessary temporary addresses being generated [ Upstream commit ecab67015ef6e3f3635551dcc9971cf363cc1cd5 ] tmp_prefered_lft is an offset to ifp->tstamp, not now. Therefore age needs to be added to the condition. Age calculation in ipv6_create_tempaddr is different from the one in addrconf_verify and doesn't consider ADDRCONF_TIMER_FUZZ_MINUS. This can cause age in ipv6_create_tempaddr to be less than the one in addrconf_verify and therefore unnecessary temporary address to be generated. Use age calculation as in addrconf_modify to avoid this. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b78a3ee93d52..7bcdd0df68db 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1111,8 +1111,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; -- cgit v1.2.3 From 8e5612a299431024c3b8c136ba4cb7c523dec934 Mon Sep 17 00:00:00 2001 From: lucien Date: Mon, 17 Mar 2014 12:51:01 +0800 Subject: ipv6: ip6_append_data_mtu do not handle the mtu of the second fragment properly [ Upstream commit e367c2d03dba4c9bcafad24688fadb79dd95b218 ] In ip6_append_data_mtu(), when the xfrm mode is not tunnel(such as transport),the ipsec header need to be added in the first fragment, so the mtu will decrease to reserve space for it, then the second fragment come, the mtu should be turn back, as the commit 0c1833797a5a6ec23ea9261d979aa18078720b74 said. however, in the commit a493e60ac4bbe2e977e7129d6d8cbb0dd236be, it use *mtu = min(*mtu, ...) to change the mtu, which lead to the new mtu is alway equal with the first fragment's. and cannot turn back. when I test through ping6 -c1 -s5000 $ip (mtu=1280): ...frag (0|1232) ESP(spi=0x00002000,seq=0xb), length 1232 ...frag (1232|1216) ...frag (2448|1216) ...frag (3664|1216) ...frag (4880|164) which should be: ...frag (0|1232) ESP(spi=0x00001000,seq=0x1), length 1232 ...frag (1232|1232) ...frag (2464|1232) ...frag (3696|1232) ...frag (4928|116) so delete the min() when change back the mtu. Signed-off-by: Xin Long Fixes: 75a493e60ac4bb ("ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size") Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 98a262b759ae..390b05d488b8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1108,21 +1108,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1139,7 +1137,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1221,6 +1219,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1300,8 +1299,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc == - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; -- cgit v1.2.3 From c0669e49cf6fdee800568b7085af0bcaaf27b9b3 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:49 +0100 Subject: rtnetlink: fix fdb notification flags [ Upstream commit 1c104a6bebf3c16b6248408b84f91d09ac8a26b6 ] Commit 3ff661c38c84 ("net: rtnetlink notify events for FDB NTF_SELF adds and deletes") reuses the function nlmsg_populate_fdb_fill() to notify fdb events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fd01eca52a13..4c3087dffe78 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1973,12 +1973,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u32 pid, u32 seq, - int type, unsigned int flags) + int type, unsigned int flags, + int nlflags) { struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); if (!nlh) return -EMSGSIZE; @@ -2016,7 +2017,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); + err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2249,7 +2250,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, portid, seq, - RTM_NEWNEIGH, NTF_SELF); + RTM_NEWNEIGH, NTF_SELF, + NLM_F_MULTI); if (err < 0) return err; skip: -- cgit v1.2.3 From 38c50cd4421b6d83ce58af63fdf105cf3fdc37e9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:50 +0100 Subject: ipmr: fix mfc notification flags [ Upstream commit 65886f439ab0fdc2dff20d1fa87afb98c6717472 ] Commit 8cd3ac9f9b7b ("ipmr: advertise new mfc entries via rtnl") reuses the function ipmr_fill_mroute() to notify mfc events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7dbad6835843..49797ed0917c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2255,13 +2255,14 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc_cache *c, int cmd) + u32 portid, u32 seq, struct mfc_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2329,7 +2330,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, if (skb == NULL) goto errout; - err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2368,7 +2369,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2382,7 +2384,8 @@ next_entry: if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit v1.2.3 From 29466c9fad0e72e57ded8dffabc68575d4399a4f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:51 +0100 Subject: ip6mr: fix mfc notification flags [ Upstream commit f518338b16038beeb73e155e60d0f70beb9379f4 ] Commit 812e44dd1829 ("ip6mr: advertise new mfc entries via rtnl") reuses the function ip6mr_fill_mroute() to notify mfc events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9f44ebc17759..2c84072b1da7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2351,13 +2351,14 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c, int cmd) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2425,7 +2426,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, if (skb == NULL) goto errout; - err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2464,7 +2465,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2478,7 +2480,8 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit v1.2.3 From debd53455f49ecd18b4cb1198e20416bd25bf46e Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 21 Mar 2014 20:53:57 +0800 Subject: netpoll: fix the skb check in pkt_is_ns [ Not applicable upstream commit, the code here has been removed upstream. ] Neighbor Solicitation is ipv6 protocol, so we should check skb->protocol with ETH_P_IPV6 Signed-off-by: Li RongQing Cc: WANG Cong Signed-off-by: Greg Kroah-Hartman --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 433a1051d323..e861438d5454 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -745,7 +745,7 @@ static bool pkt_is_ns(struct sk_buff *skb) struct nd_msg *msg; struct ipv6hdr *hdr; - if (skb->protocol != htons(ETH_P_ARP)) + if (skb->protocol != htons(ETH_P_IPV6)) return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) return false; -- cgit v1.2.3 From 48f77355b32ab30456a0d6fda1ee2d367b9a3d25 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 26 Mar 2014 11:47:56 -0400 Subject: vlan: Set hard_header_len according to available acceleration [ Upstream commit fc0d48b8fb449ca007b2057328abf736cb516168 ] Currently, if the card supports CTAG acceleration we do not account for the vlan header even if we are configuring an 8021AD vlan. This may not be best since we'll do software tagging for 8021AD which will cause data copy on skb head expansion Configure the length based on available hw offload capabilities and vlan protocol. CC: Patrick McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan.c | 4 +++- net/8021q/vlan_dev.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9424f3718ea7..86abb2e59aea 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -305,9 +305,11 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); + vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 48db8587cccf..cf35f383db4c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -611,7 +611,8 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { + if (vlan_hw_offload_capable(real_dev->features, + vlan_dev_priv(dev)->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { -- cgit v1.2.3 From 2e6f312e108b9f5609d601c4988c479e5cc9330d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 31 Mar 2014 20:14:10 +0200 Subject: ipv6: some ipv6 statistic counters failed to disable bh [ Upstream commit 43a43b6040165f7b40b5b489fe61a4cb7f8c4980 ] After commit c15b1ccadb323ea ("ipv6: move DAD and addrconf_verify processing to workqueue") some counters are now updated in process context and thus need to disable bh before doing so, otherwise deadlocks can happen on 32-bit archs. Fabio Estevam noticed this while while mounting a NFS volume on an ARM board. As a compensation for missing this I looked after the other *_STATS_BH and found three other calls which need updating: 1) icmp6_send: ip6_fragment -> icmpv6_send -> icmp6_send (error handling) 2) ip6_push_pending_frames: rawv6_sendmsg -> rawv6_push_pending_frames -> ... (only in case of icmp protocol with raw sockets in error handling) 3) ping6_v6_sendmsg (error handling) Fixes: c15b1ccadb323ea ("ipv6: move DAD and addrconf_verify processing to workqueue") Reported-by: Fabio Estevam Tested-by: Fabio Estevam Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/mcast.c | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 70e704d49007..2dee1d9d7305 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -508,7 +508,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) np->tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 390b05d488b8..32fb114b86bb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1554,8 +1554,8 @@ int ip6_push_pending_frames(struct sock *sk) if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 952eaed38808..734aec059ffd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1439,11 +1439,12 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); - } else - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); + ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); + } else { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + } rcu_read_unlock(); return; -- cgit v1.2.3 From 752e4086d0b553125f416ebb1563f5e72febc01c Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 29 Mar 2014 20:39:35 -0400 Subject: rds: prevent dereference of a NULL device in rds_iw_laddr_check [ Upstream commit bf39b4247b8799935ea91d90db250ab608a58e50 ] Binding might result in a NULL device which is later dereferenced without checking. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/iw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/iw.c b/net/rds/iw.c index 7826d46baa70..589935661d66 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support IB devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_RNIC) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_RNIC) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", -- cgit v1.2.3 From 2dbecc73f3b8aa4a6d4c0465eb276ba08161e79d Mon Sep 17 00:00:00 2001 From: Claudio Takahasi Date: Thu, 25 Jul 2013 16:34:24 -0300 Subject: Bluetooth: Fix removing Long Term Key commit 5981a8821b774ada0be512fd9bad7c241e17657e upstream. This patch fixes authentication failure on LE link re-connection when BlueZ acts as slave (peripheral). LTK is removed from the internal list after its first use causing PIN or Key missing reply when re-connecting the link. The LE Long Term Key Request event indicates that the master is attempting to encrypt or re-encrypt the link. Pre-condition: BlueZ host paired and running as slave. How to reproduce(master): 1) Establish an ACL LE encrypted link 2) Disconnect the link 3) Try to re-establish the ACL LE encrypted link (fails) > HCI Event: LE Meta Event (0x3e) plen 19 LE Connection Complete (0x01) Status: Success (0x00) Handle: 64 Role: Slave (0x01) ... @ Device Connected: 00:02:72:DC:29:C9 (1) flags 0x0000 > HCI Event: LE Meta Event (0x3e) plen 13 LE Long Term Key Request (0x05) Handle: 64 Random number: 875be18439d9aa37 Encryption diversifier: 0x76ed < HCI Command: LE Long Term Key Request Reply (0x08|0x001a) plen 18 Handle: 64 Long term key: 2aa531db2fce9f00a0569c7d23d17409 > HCI Event: Command Complete (0x0e) plen 6 LE Long Term Key Request Reply (0x08|0x001a) ncmd 1 Status: Success (0x00) Handle: 64 > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 64 Encryption: Enabled with AES-CCM (0x01) ... @ Device Disconnected: 00:02:72:DC:29:C9 (1) reason 3 < HCI Command: LE Set Advertise Enable (0x08|0x000a) plen 1 Advertising: Enabled (0x01) > HCI Event: Command Complete (0x0e) plen 4 LE Set Advertise Enable (0x08|0x000a) ncmd 1 Status: Success (0x00) > HCI Event: LE Meta Event (0x3e) plen 19 LE Connection Complete (0x01) Status: Success (0x00) Handle: 64 Role: Slave (0x01) ... @ Device Connected: 00:02:72:DC:29:C9 (1) flags 0x0000 > HCI Event: LE Meta Event (0x3e) plen 13 LE Long Term Key Request (0x05) Handle: 64 Random number: 875be18439d9aa37 Encryption diversifier: 0x76ed < HCI Command: LE Long Term Key Request Neg Reply (0x08|0x001b) plen 2 Handle: 64 > HCI Event: Command Complete (0x0e) plen 6 LE Long Term Key Request Neg Reply (0x08|0x001b) ncmd 1 Status: Success (0x00) Handle: 64 > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 64 Reason: Authentication Failure (0x05) @ Device Disconnected: 00:02:72:DC:29:C9 (1) reason 0 Signed-off-by: Claudio Takahasi Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dcaa6dbbab2c..cfca44f8d048 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3619,7 +3619,13 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); - if (ltk->type & HCI_SMP_STK) { + /* Ref. Bluetooth Core SPEC pages 1975 and 2004. STK is a + * temporary key used to encrypt a connection following + * pairing. It is used during the Encrypted Session Setup to + * distribute the keys. Later, security can be re-established + * using a distributed LTK. + */ + if (ltk->type == HCI_SMP_STK_SLAVE) { list_del(<k->list); kfree(ltk); } -- cgit v1.2.3 From 344f3a6bce11dd3d483d7e36dce33b22427ff621 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 26 Feb 2014 16:50:01 +0300 Subject: nfsd: check passed socket's net matches NFSd superblock's one commit 3064639423c48d6e0eb9ecc27c512a58e38c6c57 upstream. There could be a case, when NFSd file system is mounted in network, different to socket's one, like below: "ip netns exec" creates new network and mount namespace, which duplicates NFSd mount point, created in init_net context. And thus NFS server stop in nested network context leads to RPCBIND client destruction in init_net. Then, on NFSd start in nested network context, rpc.nfsd process creates socket in nested net and passes it into "write_ports", which leads to RPCBIND sockets creation in init_net context because of the same reason (NFSd monut point was created in init_net context). An attempt to register passed socket in nested net leads to panic, because no RPCBIND client present in nexted network namespace. This patch add check that passed socket's net matches NFSd superblock's one. And returns -EINVAL error to user psace otherwise. v2: Put socket on exit. Reported-by: Weng Meiling Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 305374d4fb98..422759bae60d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1393,6 +1393,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } +bool svc_alien_sock(struct net *net, int fd) +{ + int err; + struct socket *sock = sockfd_lookup(fd, &err); + bool ret = false; + + if (!sock) + goto out; + if (sock_net(sock->sk) != net) + ret = true; + sockfd_put(sock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(svc_alien_sock); + /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener -- cgit v1.2.3 From ea6dbcd027542fb255fa6cd34f88f8dfb697646b Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Thu, 6 Mar 2014 15:08:43 +0100 Subject: mac80211: fix WPA with VLAN on AP side with ps-sta again commit 112c44b2df0984121a52fbda89425843b8e1a457 upstream. commit de74a1d9032f4d37ea453ad2a647e1aff4cd2591 "mac80211: fix WPA with VLAN on AP side with ps-sta" fixed an issue where queued multicast packets would be sent out encrypted with the key of an other bss. commit "7cbf9d017dbb5e3276de7d527925d42d4c11e732" "mac80211: fix oops on mesh PS broadcast forwarding" essentially reverted it, because vif.type cannot be AP_VLAN due to the check to vif.type in ieee80211_get_buffered_bc before. As the later commit intended to fix the MESH case, fix it by checking for IFTYPE_AP instead of IFTYPE_AP_VLAN. Fixes: 7cbf9d017dbb ("mac80211: fix oops on mesh PS broadcast forwarding") Signed-off-by: Michael Braun Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6d5791d735f3..d566cdba24ec 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2711,7 +2711,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP) sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, skb)) break; -- cgit v1.2.3 From 007058ac423d001c996682baf10fed481186df37 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Mar 2014 15:39:20 +0100 Subject: mac80211: fix software remain-on-channel implementation commit 115b943a6ea12656088fa1ff6634c0d30815e55b upstream. Jouni reported that when doing off-channel transmissions mixed with on-channel transmissions, the on-channel ones ended up on the off-channel in some cases. The reason for that is that during the refactoring of the off- channel code, I lost the part that stopped all activity and as a consequence the on-channel frames (including data frames) were no longer queued but would be transmitted on the temporary channel. Fix this by simply restoring the lost activity stop call. Fixes: 2eb278e083549 ("mac80211: unify SW/offload remain-on-channel") Reported-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/offchannel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index acd1f71adc03..11d3f227e11e 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -355,6 +355,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) struct ieee80211_roc_work *dep; /* start this ROC */ + ieee80211_offchannel_stop_vifs(local); /* switch channel etc */ ieee80211_recalc_idle(local); -- cgit v1.2.3 From 4dc15ca34591231e91a50b10320b6265e10aa127 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 10 Apr 2014 15:06:48 +0200 Subject: mac80211: exclude AP_VLAN interfaces from tx power calculation commit 764152ff66f4a8be1f9d7981e542ffdaa5bd7aff upstream. Their power value is initialized to zero. This patch fixes an issue where the configured power drops to the minimum value when AP_VLAN interfaces are created/removed. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8a7bfc47d577..6658c5809353 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -157,6 +157,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!rcu_access_pointer(sdata->vif.chanctx_conf)) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + continue; power = min(power, sdata->vif.bss_conf.txpower); } rcu_read_unlock(); -- cgit v1.2.3