From db048b69037e7fa6a7d9e95a1271a50dc08ae233 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 28 Jun 2010 08:44:07 +0000 Subject: ethtool: Fix potential kernel buffer overflow in ETHTOOL_GRXCLSRLALL On a 32-bit machine, info.rule_cnt >= 0x40000000 leads to integer overflow and the buffer may be smaller than needed. Since ETHTOOL_GRXCLSRLALL is unprivileged, this can presumably be used for at least denial of service. Signed-off-by: Ben Hutchings Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/core/ethtool.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a0f4964033d2..a3a7e9a48dff 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -347,8 +347,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { - rule_buf = kmalloc(info.rule_cnt * sizeof(u32), - GFP_USER); + if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) + rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + GFP_USER); if (!rule_buf) return -ENOMEM; } -- cgit v1.2.3 From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 28 Jun 2010 08:45:58 +0000 Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH struct ethtool_rxnfc was originally defined in 2.6.27 for the ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data fields. It was then extended in 2.6.30 to support various additional commands. These commands should have been defined to use a new structure, but it is too late to change that now. Since user-space may still be using the old structure definition for the ETHTOOL_{G,S}RXFH commands, and since they do not need the additional fields, only copy the originally defined fields to and from user-space. Signed-off-by: Ben Hutchings Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/core/ethtool.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a3a7e9a48dff..75e4ffeb8cc9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -318,23 +318,33 @@ out: } static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { - struct ethtool_rxnfc cmd; + struct ethtool_rxnfc info; + size_t info_size = sizeof(info); if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_SRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; - return dev->ethtool_ops->set_rxnfc(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &info); } static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { struct ethtool_rxnfc info; + size_t info_size = sizeof(info); const struct ethtool_ops *ops = dev->ethtool_ops; int ret; void *rule_buf = NULL; @@ -342,7 +352,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (!ops->get_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_GRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; if (info.cmd == ETHTOOL_GRXCLSRLALL) { @@ -360,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, goto err_out; ret = -EFAULT; - if (copy_to_user(useraddr, &info, sizeof(info))) + if (copy_to_user(useraddr, &info, info_size)) goto err_out; if (rule_buf) { @@ -1517,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: - rc = ethtool_get_rxnfc(dev, useraddr); + rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_SRXFH: case ETHTOOL_SRXCLSRLDEL: case ETHTOOL_SRXCLSRLINS: - rc = ethtool_set_rxnfc(dev, useraddr); + rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); -- cgit v1.2.3 From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Jul 2010 13:21:57 +0000 Subject: net: decreasing real_num_tx_queues needs to flush qdisc Reducing real_num_queues needs to flush the qdisc otherwise skbs with queue_mappings greater then real_num_tx_queues can be sent to the underlying driver. The flow for this is, dev_queue_xmit() dev_pick_tx() skb_tx_hash() => hash using real_num_tx_queues skb_set_queue_mapping() ... qdisc_enqueue_root() => enqueue skb on txq from hash ... dev->real_num_tx_queues -= n ... sch_direct_xmit() dev_hard_start_xmit() ndo_start_xmit(skb,dev) => skb queue set with old hash skbs are enqueued on the qdisc with skb->queue_mapping set 0 < queue_mappings < real_num_tx_queues. When the driver decreases real_num_tx_queues skb's may be dequeued from the qdisc with a queue_mapping greater then real_num_tx_queues. This fixes a case in ixgbe where this was occurring with DCB and FCoE. Because the driver is using queue_mapping to map skbs to tx descriptor rings we can potentially map skbs to rings that no longer exist. Signed-off-by: John Fastabend Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/core/dev.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 2b3bf53bc687..723a34710ad4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* + * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues + * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. + */ +void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if (txq < real_num) { + dev->real_num_tx_queues = txq; + qdisc_reset_all_tx_gt(dev, txq); + } +} +EXPORT_SYMBOL(netif_set_real_num_tx_queues); static inline void __netif_reschedule(struct Qdisc *q) { -- cgit v1.2.3 From 87fd308cfc6b2e880bf717a740bd5c58d2aed10c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Jul 2010 05:24:20 +0000 Subject: net: skb_tx_hash() fix relative to skb_orphan_try() commit fc6055a5ba31e2 (net: Introduce skb_orphan_try()) added early orphaning of skbs. This unfortunately added a performance regression in skb_tx_hash() in case of stacked devices (bonding, vlans, ...) Since skb->sk is now NULL, we cannot access sk->sk_hash anymore to spread tx packets to multiple NIC queues on multiqueue devices. skb_tx_hash() in this case only uses skb->protocol, same value for all flows. skb_orphan_try() can copy sk->sk_hash into skb->rxhash and skb_tx_hash() can use this saved sk_hash value to compute its internal hash value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 723a34710ad4..4b05fdf762ab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1911,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb) */ static inline void skb_orphan_try(struct sk_buff *skb) { - if (!skb_tx(skb)->flags) + struct sock *sk = skb->sk; + + if (sk && !skb_tx(skb)->flags) { + /* skb_tx_hash() wont be able to get sk. + * We copy sk_hash into skb->rxhash + */ + if (!skb->rxhash) + skb->rxhash = sk->sk_hash; skb_orphan(skb); + } } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -1998,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol; - + hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); -- cgit v1.2.3 From 91a72a70594e5212c97705ca6a694bd307f7a26b Mon Sep 17 00:00:00 2001 From: Doug Kehn Date: Wed, 14 Jul 2010 18:02:16 -0700 Subject: net/core: neighbour update Oops When configuring DMVPN (GRE + openNHRP) and a GRE remote address is configured a kernel Oops is observed. The obserseved Oops is caused by a NULL header_ops pointer (neigh->dev->header_ops) in neigh_update_hhs() when void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) = neigh->dev->header_ops->cache_update; is executed. The dev associated with the NULL header_ops is the GRE interface. This patch guards against the possibility that header_ops is NULL. This Oops was first observed in kernel version 2.6.26.8. Signed-off-by: Doug Kehn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6ba1c0eece03..a4e0a7482c2b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) - = neigh->dev->header_ops->cache_update; + = NULL; + + if (neigh->dev->header_ops) + update = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { -- cgit v1.2.3 From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 14 Jul 2010 20:50:29 -0700 Subject: net: fix problem in reading sock TX queue Fix problem in reading the tx_queue recorded in a socket. In dev_pick_tx, the TX queue is read by doing a check with sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get. The problem is that there is not mutual exclusion across these calls in the socket so it it is possible that the queue in the sock can be invalidated after sk_tx_queue_recorded is called so that sk_tx_queue get returns -1, which sets 65535 in queue_index and thus dev_pick_tx returns 65536 which is a bogus queue and can cause crash in dev_queue_xmit. We fix this by only calling sk_tx_queue_get which does the proper checks. The interface is that sk_tx_queue_get returns the TX queue if the sock argument is non-NULL and TX queue is recorded, else it returns -1. sk_tx_queue_recorded is no longer used so it can be completely removed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 4b05fdf762ab..0ea10f849be8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - u16 queue_index; + int queue_index; struct sock *sk = skb->sk; - if (sk_tx_queue_recorded(sk)) { - queue_index = sk_tx_queue_get(sk); - } else { + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) { -- cgit v1.2.3 From 00c5a9834b476a138158fb17d576da751727a9f1 Mon Sep 17 00:00:00 2001 From: Andrea Shepard Date: Thu, 22 Jul 2010 09:12:35 +0000 Subject: net: Fix corruption of skb csum field in pskb_expand_head() of net/core/skbuff.c Make pskb_expand_head() check ip_summed to make sure csum_start is really csum_start and not csum before adjusting it. This fixes a bug I encountered using a Sun Quad-Fast Ethernet card and VLANs. On my configuration, the sunhme driver produces skbs with differing amounts of headroom on receive depending on the packet size. See line 2030 of drivers/net/sunhme.c; packets smaller than RX_COPY_THRESHOLD have 52 bytes of headroom but packets larger than that cutoff have only 20 bytes. When these packets reach the VLAN driver, vlan_check_reorder_header() calls skb_cow(), which, if the packet has less than NET_SKB_PAD (== 32) bytes of headroom, uses pskb_expand_head() to make more. Then, pskb_expand_head() needs to adjust a lot of offsets into the skb, including csum_start. Since csum_start is a union with csum, if the packet has a valid csum value this will corrupt it, which was the effect I observed. The sunhme hardware computes receive checksums, so the skbs would be created by the driver with ip_summed == CHECKSUM_COMPLETE and a valid csum field, and then pskb_expand_head() would corrupt the csum field, leading to an "hw csum error" message later on, for example in icmp_rcv() for pings larger than the sunhme RX_COPY_THRESHOLD. On the basis of the comment at the beginning of include/linux/skbuff.h, I believe that the csum_start skb field is only meaningful if ip_csummed is CSUM_PARTIAL, so this patch makes pskb_expand_head() adjust it only in that case to avoid corrupting a valid csum value. Please see my more in-depth disucssion of tracking down this bug for more details if you like: http://puellavulnerata.livejournal.com/112186.html http://puellavulnerata.livejournal.com/112567.html http://puellavulnerata.livejournal.com/112891.html http://puellavulnerata.livejournal.com/113096.html http://puellavulnerata.livejournal.com/113591.html I am not subscribed to this list, so please CC me on replies. Signed-off-by: Andrea Shepard Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 34432b4e96bb..c699159b3ede 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -843,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; - skb->csum_start += nhead; + /* Only adjust this if it actually is csum_start rather than csum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; -- cgit v1.2.3 From be2b6e62357dd7ee56bdcb05e54002afb4830292 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 22 Jul 2010 13:27:09 -0700 Subject: net: Fix skb_copy_expand() handling of ->csum_start It should only be adjusted if ip_summed == CHECKSUM_PARTIAL. Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c699159b3ede..ce88293a34e2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -932,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); off = newheadroom - oldheadroom; - n->csum_start += off; + if (n->ip_summed == CHECKSUM_PARTIAL) + n->csum_start += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; -- cgit v1.2.3 From c736eefadb71a01a5e61e0de700f28f6952b4444 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 22 Jul 2010 09:54:47 +0000 Subject: net: dev_forward_skb should call nf_reset With conn-track zones and probably with different network namespaces, the netfilter logic needs to be re-calculated on packet receive. If the netfilter logic is not reset, it will not be recalculated properly. This patch adds the nf_reset logic to dev_forward_skb. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 0ea10f849be8..1f466e82ac33 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1488,6 +1488,7 @@ static inline void net_timestamp_check(struct sk_buff *skb) int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { skb_orphan(skb); + nf_reset(skb); if (!(dev->flags & IFF_UP) || (skb->len > (dev->mtu + dev->hard_header_len))) { -- cgit v1.2.3