From 33cf71cee14743185305c61625c4544885055733 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Fri, 21 Nov 2008 16:42:58 -0800 Subject: tcp: Do not use TSO/GSO when there is urgent data This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=12014 Since most (if not all) implementations of TSO and even the in-kernel software GSO do not update the urgent pointer when splitting a large segment, it is necessary to turn off TSO/GSO for all outgoing traffic with the URG pointer set. Looking at tcp_current_mss (and the preceding comment) I even think this was the original intention. However, this approach is insufficient, because TSO/GSO is turned off only for newly created frames, not for frames which were already pending at the arrival of a message with MSG_OOB set. These frames were created when TSO/GSO was enabled, so they may be large, and they will have the urgent pointer set in tcp_transmit_skb(). With this patch, such large packets will be fragmented again before going to the transmit routine. As a side note, at least the following NICs are known to screw up the urgent pointer in the TCP header when doing TSO: Intel 82566MM (PCI ID 8086:1049) Intel 82566DC (PCI ID 8086:104b) Intel 82541GI (PCI ID 8086:1076) Broadcom NetXtreme II BCM5708 (PCI ID 14e4:164c) Signed-off-by: Petr Tesarik Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba85d8831893..85b07eba1879 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,7 +722,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + tcp_urg_mode(tcp_sk(sk))) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1163,7 +1164,9 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { + if (!tso_segs || + (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || + tcp_urg_mode(tcp_sk(sk))))) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } -- cgit v1.2.3 From 7e56b5d698707a9934833c47b24d78fb0bcaf764 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 21 Nov 2008 16:45:22 -0800 Subject: net: Fix memory leak in the proto_register function If the slub allocator is used, kmem_cache_create() may merge two or more kmem_cache's into one but the cache name pointer is not updated and kmem_cache_name() is no longer guaranteed to return the pointer passed to the former function. This patch stores the kmalloc'ed pointers in the corresponding request_sock_ops and timewait_sock_ops structures. Signed-off-by: Catalin Marinas Acked-by: Arnaldo Carvalho de Melo Reviewed-by: Christoph Lameter Signed-off-by: David S. Miller --- net/core/sock.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 341e39456952..edf7220889a4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2035,9 +2035,6 @@ static inline void release_proto_idx(struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name = NULL; - char *timewait_sock_slab_name; - if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2051,12 +2048,12 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->rsk_prot != NULL) { static const char mask[] = "request_sock_%s"; - request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (request_sock_slab_name == NULL) + prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + if (prot->rsk_prot->slab_name == NULL) goto out_free_sock_slab; - sprintf(request_sock_slab_name, mask, prot->name); - prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, + sprintf(prot->rsk_prot->slab_name, mask, prot->name); + prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, prot->rsk_prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2070,14 +2067,14 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->twsk_prot != NULL) { static const char mask[] = "tw_sock_%s"; - timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (timewait_sock_slab_name == NULL) + if (prot->twsk_prot->twsk_slab_name == NULL) goto out_free_request_sock_slab; - sprintf(timewait_sock_slab_name, mask, prot->name); + sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name); prot->twsk_prot->twsk_slab = - kmem_cache_create(timewait_sock_slab_name, + kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2093,14 +2090,14 @@ int proto_register(struct proto *prot, int alloc_slab) return 0; out_free_timewait_sock_slab_name: - kfree(timewait_sock_slab_name); + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: if (prot->rsk_prot && prot->rsk_prot->slab) { kmem_cache_destroy(prot->rsk_prot->slab); prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(request_sock_slab_name); + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; @@ -2123,18 +2120,14 @@ void proto_unregister(struct proto *prot) } if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - const char *name = kmem_cache_name(prot->rsk_prot->slab); - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(name); + kfree(prot->rsk_prot->slab_name); prot->rsk_prot->slab = NULL; } if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(name); + kfree(prot->twsk_prot->twsk_slab_name); prot->twsk_prot->twsk_slab = NULL; } } -- cgit v1.2.3 From b54ad409fd09a395b839fb81f300880d76861c0e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 24 Nov 2008 15:56:17 -0800 Subject: netfilter: ctnetlink: fix conntrack creation race Conntrack creation through ctnetlink has two races: - the timer may expire and free the conntrack concurrently, causing an invalid memory access when attempting to put it in the hash tables - an identical conntrack entry may be created in the packet processing path in the time between the lookup and hash insertion Hold the conntrack lock between the lookup and insertion to avoid this. Reported-by: Zoltan Borbely Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 -- net/netfilter/nf_conntrack_netlink.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 622d7c671cb7..233fdd2d7d21 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -305,9 +305,7 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - spin_lock_bh(&nf_conntrack_lock); __nf_conntrack_hash_insert(ct, hash, repl_hash); - spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a040d46f85d6..3b009a3e8549 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1090,7 +1090,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1212,13 +1212,14 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, atomic_inc(&master_ct->ct_general.use); } - spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, &otuple, &rtuple, master_ct); + spin_unlock_bh(&nf_conntrack_lock); + if (err < 0 && master_ct) nf_ct_put(master_ct); -- cgit v1.2.3 From 631339f1e544a4d39a63cfe6708c5bddcd5a2c48 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Nov 2008 16:06:50 -0800 Subject: bridge: netfilter: fix update_pmtu crash with GRE As GRE tries to call the update_pmtu function on skb->dst and bridge supplies an skb->dst that has a NULL ops field, all is not well. This patch fixes this by giving the bridge device an ops field with an update_pmtu function. For the moment I've left all other fields blank but we can fill them in later should the need arise. Based on report and patch by Philip Craig. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fa5cda4e552a..45f61c348e36 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -101,6 +101,18 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +{ +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .protocol = __constant_htons(ETH_P_IP), + .update_pmtu = fake_update_pmtu, + .entry_size = sizeof(struct rtable), + .entries = ATOMIC_INIT(0), +}; + /* * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter @@ -117,6 +129,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->u.dst.path = &rt->u.dst; rt->u.dst.metrics[RTAX_MTU - 1] = 1500; rt->u.dst.flags = DST_NOXFRM; + rt->u.dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) -- cgit v1.2.3 From 244f46ae6e9e18f6fc0be7d1f49febde4762c34b Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Mon, 24 Nov 2008 11:49:40 +0000 Subject: rose: zero length frame filtering in af_rose.c Since changeset e79ad711a0108475c1b3a03815527e7237020b08 from mainline, >From David S. Miller, empty packet can be transmitted on connected socket for datagram protocols. However, this patch broke a high level application using ROSE network protocol with connected datagram. Bulletin Board Stations perform bulletins forwarding between BBS stations via ROSE network using a forward protocol. Now, if for some reason, a buffer in the application software happens to be empty at a specific moment, ROSE sends an empty packet via unfiltered packet socket. When received, this ROSE packet introduces perturbations of data exchange of BBS forwarding, for the application message forwarding protocol is waiting for something else. We agree that a more careful programming of the application protocol would avoid this situation and we are willing to debug it. But, as an empty frame is no use and does not have any meaning for ROSE protocol, we may consider filtering zero length data both when sending and receiving socket data. The proposed patch repaired BBS data exchange through ROSE network that were broken since 2.6.22.11 kernel. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/rose/af_rose.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index a7f1ce11bc22..0c1cc7612800 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,6 +1072,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; + /* ROSE empty frame has no meaning : don't send */ + if (len == 0) + return 0; + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1265,6 +1269,12 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; + /* ROSE empty frame has no meaning : ignore it */ + if (copied == 0) { + skb_free_datagram(sk, skb); + return copied; + } + if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From 3dd3b79aeadc6f6abc5cc78724d7df3dfcc1bd0b Mon Sep 17 00:00:00 2001 From: Abhijeet Kolekar Date: Thu, 20 Nov 2008 10:20:31 -0800 Subject: mac80211 : Fix setting ad-hoc mode and non-ibss channel Patch fixes the kernel trace when user tries to set ad-hoc mode on non IBSS channel. e.g iwconfig wlan0 chan 36 mode ad-hoc Signed-off-by: Abhijeet Kolekar Signed-off-by: John W. Linville --- net/mac80211/wext.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 742f811ca416..ab4ddba874be 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -271,6 +271,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, __u32 *mode, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int type; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -281,6 +282,13 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: + /* Setting ad-hoc mode on non ibss channel is not + * supported. + */ + if (local->oper_channel && + (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)) + return -EOPNOTSUPP; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: -- cgit v1.2.3 From 020cf6ba7a91ccc5db359f91e9abba175fd3a0aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 20:09:54 +0100 Subject: net/wireless/reg.c: fix bad WARN_ON in if statement fix: net/wireless/reg.c:348:29: error: macro "if" passed 2 arguments, but takes just 1 triggered by the branch-tracer. Signed-off-by: Ingo Molnar Signed-off-by: John W. Linville --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 626dbb688499..eb3b1a9f9b12 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -343,9 +343,9 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return 0; return -EALREADY; } - if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)), + if (WARN(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2), "Invalid Country IE regulatory hint passed " - "to the wireless core\n") + "to the wireless core\n")) return -EINVAL; /* We ignore Country IE hints for now, as we haven't yet * added the dot11MultiDomainCapabilityEnabled flag -- cgit v1.2.3 From 8f480c0e4e120911a673ed7385359bf76ae01963 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 25 Nov 2008 21:08:13 -0800 Subject: net: make skb_truesize_bug() call WARN() The truesize message check is important enough to make it print "BUG" to the user console... lets also make it important enough to spit a backtrace/module list etc so that kerneloops.org can track them. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d49ef8301b5b..65f7757465bd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -149,7 +149,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) void skb_truesize_bug(struct sk_buff *skb) { - printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " + WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) " "len=%u, sizeof(sk_buff)=%Zd\n", skb->truesize, skb->len, sizeof(struct sk_buff)); } -- cgit v1.2.3 From 3ec192559033ed457f0d7856838654c100fc659f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Nov 2008 03:57:44 -0800 Subject: netfilter: ctnetlink: fix GFP_KERNEL allocation under spinlock The previous fix for the conntrack creation race (netfilter: ctnetlink: fix conntrack creation race) missed a GFP_KERNEL allocation that is now performed while holding a spinlock. Switch to GFP_ATOMIC. Reported-and-tested-by: Zoltan Borbely Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3b009a3e8549..5f4a6516b3b6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1138,7 +1138,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } - nf_ct_acct_ext_add(ct, GFP_KERNEL); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) -- cgit v1.2.3