From 5b9e12dbf92b441b37136ea71dac59f05f2673a9 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 13 Mar 2013 00:24:15 +0000 Subject: ipv4: fix definition of FIB_TABLE_HASHSZ a long time ago by the commit commit 93456b6d7753def8760b423ac6b986eb9d5a4a95 Author: Denis V. Lunev Date: Thu Jan 10 03:23:38 2008 -0800 [IPV4]: Unify access to the routing tables. the defenition of FIB_HASH_TABLE size has obtained wrong dependency: it should depend upon CONFIG_IP_MULTIPLE_TABLES (as was in the original code) but it was depended from CONFIG_IP_ROUTE_MULTIPATH This patch returns the situation to the original state. The problem was spotted by Tingwei Liu. Signed-off-by: Denis V. Lunev CC: Tingwei Liu CC: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9497be1ad4c0..e49db91593a9 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -152,18 +152,16 @@ struct fib_result_nl { }; #ifdef CONFIG_IP_ROUTE_MULTIPATH - #define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) - -#define FIB_TABLE_HASHSZ 2 - #else /* CONFIG_IP_ROUTE_MULTIPATH */ - #define FIB_RES_NH(res) ((res).fi->fib_nh[0]) +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +#ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 - -#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +#else +#define FIB_TABLE_HASHSZ 2 +#endif extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); -- cgit v1.2.3 From 16fad69cfe4adbbfa813de516757b87bcae36d93 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2013 05:40:32 +0000 Subject: tcp: fix skb_availroom() Chrome OS team reported a crash on a Pixel ChromeBook in TCP stack : https://code.google.com/p/chromium/issues/detail?id=182056 commit a21d45726acac (tcp: avoid order-1 allocations on wifi and tx path) did a poor choice adding an 'avail_size' field to skb, while what we really needed was a 'reserved_tailroom' one. It would have avoided commit 22b4a4f22da (tcp: fix retransmit of partially acked frames) and this commit. Crash occurs because skb_split() is not aware of the 'avail_size' management (and should not be aware) Signed-off-by: Eric Dumazet Reported-by: Mukesh Agrawal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 821c7f45d2a7..6f2bb860e051 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -500,7 +500,7 @@ struct sk_buff { union { __u32 mark; __u32 dropcount; - __u32 avail_size; + __u32 reserved_tailroom; }; sk_buff_data_t inner_transport_header; @@ -1447,7 +1447,10 @@ static inline int skb_tailroom(const struct sk_buff *skb) */ static inline int skb_availroom(const struct sk_buff *skb) { - return skb_is_nonlinear(skb) ? 0 : skb->avail_size - skb->len; + if (skb_is_nonlinear(skb)) + return 0; + + return skb->end - skb->tail - skb->reserved_tailroom; } /** -- cgit v1.2.3 From cca7af3889bfa343d33d5e657a38d876abd10e58 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 14 Mar 2013 03:29:40 +0000 Subject: skb: Propagate pfmemalloc on skb from head page only Hi. I'm trying to send big chunks of memory from application address space via TCP socket using vmsplice + splice like this mem = mmap(128Mb); vmsplice(pipe[1], mem); /* splice memory into pipe */ splice(pipe[0], tcp_socket); /* send it into network */ When I'm lucky and a huge page splices into the pipe and then into the socket _and_ client and server ends of the TCP connection are on the same host, communicating via lo, the whole connection gets stuck! The sending queue becomes full and app stops writing/splicing more into it, but the receiving queue remains empty, and that's why. The __skb_fill_page_desc observes a tail page of a huge page and erroneously propagates its page->pfmemalloc value onto socket (the pfmemalloc on tail pages contain garbage). Then this skb->pfmemalloc leaks through lo and due to the tcp_v4_rcv sk_filter if (skb->pfmemalloc && !sock_flag(sk, SOCK_MEMALLOC)) /* true */ return -ENOMEM goto release_and_discard; no packets reach the socket. Even TCP re-transmits are dropped by this, as skb cloning clones the pfmemalloc flag as well. That said, here's the proper page->pfmemalloc propagation onto socket: we must check the huge-page's head page only, other pages' pfmemalloc and mapping values do not contain what is expected in this place. However, I'm not sure whether this fix is _complete_, since pfmemalloc propagation via lo also oesn't look great. Both, bit propagation from page to skb and this check in sk_filter, were introduced by c48a11c7 (netvm: propagate page->pfmemalloc to skb), in v3.5 so Mel and stable@ are in Cc. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Acked-by: Mel Gorman Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f2bb860e051..441f5bfdab8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1288,11 +1288,13 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, * do not lose pfmemalloc information as the pages would not be * allocated using __GFP_MEMALLOC. */ - if (page->pfmemalloc && !page->mapping) - skb->pfmemalloc = true; frag->page.p = page; frag->page_offset = off; skb_frag_size_set(frag, size); + + page = compound_head(page); + if (page->pfmemalloc && !page->mapping) + skb->pfmemalloc = true; } /** -- cgit v1.2.3 From aaa0c23cb90141309f5076ba5e3bfbd39544b985 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Thu, 14 Mar 2013 17:21:50 +0000 Subject: Fix dst_neigh_lookup/dst_neigh_lookup_skb return value handling bug When neighbour table is full, dst_neigh_lookup/dst_neigh_lookup_skb will return -ENOBUFS which is absolutely non zero, while all the code in kernel which use above functions assume failure only on zero return which will cause panic. (for example: : https://bugzilla.kernel.org/show_bug.cgi?id=54731). This patch corrects above error with smallest changes to kernel source code and also correct two return value check missing bugs in drivers/infiniband/hw/cxgb4/cm.c Tested on my x86_64 SMP machine Reported-by: Zhouyi Zhou Tested-by: Zhouyi Zhou Signed-off-by: Zhouyi Zhou Signed-off-by: David S. Miller --- include/net/dst.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 853cda11e518..1f8fd109e225 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -413,13 +413,15 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->ops->neigh_lookup(dst, NULL, daddr); + struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); + return IS_ERR(n) ? NULL : n; } static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - return dst->ops->neigh_lookup(dst, skb, NULL); + struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL); + return IS_ERR(n) ? NULL : n; } static inline void dst_link_failure(struct sk_buff *skb) -- cgit v1.2.3 From 1e8bbe6cd02fc300c88bd48244ce61ad9c7d1776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 14 Mar 2013 01:05:13 +0000 Subject: net: cdc_ncm, cdc_mbim: allow user to prefer NCM for backwards compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bd329e1 ("net: cdc_ncm: do not bind to NCM compatible MBIM devices") introduced a new policy, preferring MBIM for dual NCM/MBIM functions if the cdc_mbim driver was enabled. This caused a regression for users wanting to use NCM. Devices implementing NCM backwards compatibility according to section 3.2 of the MBIM v1.0 specification allow either NCM or MBIM on a single USB function, using different altsettings. The cdc_ncm and cdc_mbim drivers will both probe such functions, and must agree on a common policy for selecting either MBIM or NCM. Until now, this policy has been set at build time based on CONFIG_USB_NET_CDC_MBIM. Use a module parameter to set the system policy at runtime, allowing the user to prefer NCM on systems with the cdc_mbim driver. Cc: Greg Suarez Cc: Alexey Orishko Reported-by: Geir Haatveit Reported-by: Tommi Kyntola Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=54791 Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 3b8f9d4fc3fe..cc25b70af33c 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -127,6 +127,7 @@ struct cdc_ncm_ctx { u16 connected; }; +extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign); -- cgit v1.2.3 From 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 15 Mar 2013 11:32:30 +0000 Subject: inet: limit length of fragment queue hash table bucket lists This patch introduces a constant limit of the fragment queue hash table bucket list lengths. Currently the limit 128 is choosen somewhat arbitrary and just ensures that we can fill up the fragment cache with empty packets up to the default ip_frag_high_thresh limits. It should just protect from list iteration eating considerable amounts of cpu. If we reach the maximum length in one hash bucket a warning is printed. This is implemented on the caller side of inet_frag_find to distinguish between the different users of inet_fragment.c. I dropped the out of memory warning in the ipv4 fragment lookup path, because we already get a warning by the slab allocator. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 76c3fe5ecc2e..0a1dcc2fa2f5 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,6 +43,13 @@ struct inet_frag_queue { #define INETFRAGS_HASHSZ 64 +/* averaged: + * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / + * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or + * struct frag_queue)) + */ +#define INETFRAGS_MAXDEPTH 128 + struct inet_frags { struct hlist_head hash[INETFRAGS_HASHSZ]; /* This rwlock is a global lock (seperate per IPv4, IPv6 and @@ -76,6 +83,8 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock); +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { -- cgit v1.2.3