From 5eb2471ef43ea672cef90c7c8d66313aae8745f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:29:49 -0700 Subject: inet: frags: change inet_frags_init_net() return value commit 787bea7748a76130566f881c2342a0be4127d182 upstream. We will soon initialize one rhashtable per struct netns_frags in inet_frags_init_net(). This patch changes the return value to eventually propagate an error. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 12e8cf4bda9f..e14962c1fca2 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -580,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&ieee802154_lowpan->frags); - - return lowpan_frags_ns_sysctl_register(net); + res = inet_frags_init_net(&ieee802154_lowpan->frags); + if (res < 0) + return res; + res = lowpan_frags_ns_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + return res; } static void __net_exit lowpan_frags_exit_net(struct net *net) -- cgit v1.2.3 From 9c6727de82e47238f389b415e25443ac46a06de5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:29:50 -0700 Subject: inet: frags: add a pointer to struct netns_frags commit 093ba72914b696521e4885756a68a3332782c8de upstream. In order to simplify the API, add a pointer to struct inet_frags. This will allow us to make things less complex. These functions no longer have a struct inet_frags parameter : inet_frag_destroy(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */) ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller [bwh: Backported to 4.4: inet_frag_{kill,put}() are called in some different places; update all calls] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index e14962c1fca2..8c532743e917 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -93,10 +93,10 @@ static void lowpan_frag_expire(unsigned long data) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q, &lowpan_frags); + inet_frag_kill(&fq->q); out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &lowpan_frags); + inet_frag_put(&fq->q); } static inline struct lowpan_frag_queue * @@ -229,7 +229,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, struct sk_buff *fp, *head = fq->q.fragments; int sum_truesize; - inet_frag_kill(&fq->q, &lowpan_frags); + inet_frag_kill(&fq->q); /* Make the one we just received the head. */ if (prev) { @@ -437,7 +437,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) ret = lowpan_frag_queue(fq, skb, frag_type); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &lowpan_frags); + inet_frag_put(&fq->q); return ret; } @@ -585,13 +585,14 @@ static int __net_init lowpan_frags_init_net(struct net *net) ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->frags.f = &lowpan_frags; res = inet_frags_init_net(&ieee802154_lowpan->frags); if (res < 0) return res; res = lowpan_frags_ns_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags); return res; } @@ -601,7 +602,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net) net_ieee802154_lowpan(net); lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags); } static struct pernet_operations lowpan_frags_ops = { -- cgit v1.2.3 From a7fb573c164488b36aeafe63a1531b9e7cf27fad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:29:53 -0700 Subject: inet: frags: refactor lowpan_net_frag_init() commit 807f1844df4ac23594268fa9f41902d0549e92aa upstream. We want to call lowpan_net_frag_init() earlier. Similar to commit "inet: frags: refactor ipv6_frag_init()" This is a prereq to "inet: frags: use rhashtables for reassembly units" Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 8c532743e917..dba0a34cf92a 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -614,14 +614,6 @@ int __init lowpan_net_frag_init(void) { int ret; - ret = lowpan_frags_sysctl_register(); - if (ret) - return ret; - - ret = register_pernet_subsys(&lowpan_frags_ops); - if (ret) - goto err_pernet; - lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; @@ -632,11 +624,21 @@ int __init lowpan_net_frag_init(void) lowpan_frags.frags_cache_name = lowpan_frags_cache_name; ret = inet_frags_init(&lowpan_frags); if (ret) - goto err_pernet; + goto out; + ret = lowpan_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&lowpan_frags_ops); + if (ret) + goto err_pernet; +out: return ret; err_pernet: lowpan_frags_sysctl_unregister(); +err_sysctl: + inet_frags_fini(&lowpan_frags); return ret; } -- cgit v1.2.3 From 493107105843f299662b3b664a83804645564f12 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:29:56 -0700 Subject: inet: frags: use rhashtables for reassembly units commit 648700f76b03b7e8149d13cc2bdb3355035258a9 upstream. Some applications still rely on IP fragmentation, and to be fair linux reassembly unit is not working under any serious load. It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!) A work queue is supposed to garbage collect items when host is under memory pressure, and doing a hash rebuild, changing seed used in hash computations. This work queue blocks softirqs for up to 25 ms when doing a hash rebuild, occurring every 5 seconds if host is under fire. Then there is the problem of sharing this hash table for all netns. It is time to switch to rhashtables, and allocate one of them per netns to speedup netns dismantle, since this is a critical metric these days. Lookup is now using RCU. A followup patch will even remove the refcount hold/release left from prior implementation and save a couple of atomic operations. Before this patch, 16 cpus (16 RX queue NIC) could not handle more than 1 Mpps frags DDOS. After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB of storage for the fragments (exact number depends on frags being evicted after timeout) $ grep FRAG /proc/net/sockstat FRAG: inuse 1966916 memory 2140004608 A followup patch will change the limits for 64bit arches. Signed-off-by: Eric Dumazet Cc: Kirill Tkhai Cc: Herbert Xu Cc: Florian Westphal Cc: Jesper Dangaard Brouer Cc: Alexander Aring Cc: Stefan Schmidt Signed-off-by: David S. Miller [bwh: Backported to 4.4: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 91 +++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 49 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index dba0a34cf92a..65c0b7349f9d 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, struct net_device *ldev); -static unsigned int lowpan_hash_frag(u16 tag, u16 d_size, - const struct ieee802154_addr *saddr, - const struct ieee802154_addr *daddr) -{ - net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd)); - return jhash_3words(ieee802154_addr_hash(saddr), - ieee802154_addr_hash(daddr), - (__force u32)(tag + (d_size << 16)), - lowpan_frags.rnd); -} - -static unsigned int lowpan_hashfn(const struct inet_frag_queue *q) -{ - const struct lowpan_frag_queue *fq; - - fq = container_of(q, struct lowpan_frag_queue, q); - return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr); -} - -static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct lowpan_frag_queue *fq; - const struct lowpan_create_arg *arg = a; - - fq = container_of(q, struct lowpan_frag_queue, q); - return fq->tag == arg->tag && fq->d_size == arg->d_size && - ieee802154_addr_equal(&fq->saddr, arg->src) && - ieee802154_addr_equal(&fq->daddr, arg->dst); -} - static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { - const struct lowpan_create_arg *arg = a; + const struct frag_lowpan_compare_key *key = a; struct lowpan_frag_queue *fq; fq = container_of(q, struct lowpan_frag_queue, q); - fq->tag = arg->tag; - fq->d_size = arg->d_size; - fq->saddr = *arg->src; - fq->daddr = *arg->dst; + BUILD_BUG_ON(sizeof(*key) > sizeof(q->key)); + memcpy(&q->key, key, sizeof(*key)); } static void lowpan_frag_expire(unsigned long data) @@ -104,21 +72,17 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, const struct ieee802154_addr *src, const struct ieee802154_addr *dst) { - struct inet_frag_queue *q; - struct lowpan_create_arg arg; - unsigned int hash; struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + struct frag_lowpan_compare_key key = { + .tag = cb->d_tag, + .d_size = cb->d_size, + .src = *src, + .dst = *dst, + }; + struct inet_frag_queue *q; - arg.tag = cb->d_tag; - arg.d_size = cb->d_size; - arg.src = src; - arg.dst = dst; - - hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst); - - q = inet_frag_find(&ieee802154_lowpan->frags, - &lowpan_frags, &arg, hash); + q = inet_frag_find(&ieee802154_lowpan->frags, &key); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; @@ -610,18 +574,47 @@ static struct pernet_operations lowpan_frags_ops = { .exit = lowpan_frags_exit_net, }; +static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); +} + +static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key, + sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); +} + +static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_lowpan_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +static const struct rhashtable_params lowpan_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = lowpan_key_hashfn, + .obj_hashfn = lowpan_obj_hashfn, + .obj_cmpfn = lowpan_obj_cmpfn, + .automatic_shrinking = true, +}; + int __init lowpan_net_frag_init(void) { int ret; - lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; lowpan_frags.skb_free = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); - lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; lowpan_frags.frags_cache_name = lowpan_frags_cache_name; + lowpan_frags.rhash_params = lowpan_rhash_params; ret = inet_frags_init(&lowpan_frags); if (ret) goto out; -- cgit v1.2.3 From 24641fb6453c4cc642f548e1f11502c7c0caaa54 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 14:54:13 -0400 Subject: net: ieee802154: 6lowpan: fix frag reassembly commit f18fa5de5ba7f1d6650951502bb96a6e4715a948 upstream. This patch initialize stack variables which are used in frag_lowpan_compare_key to zero. In my case there are padding bytes in the structures ieee802154_addr as well in frag_lowpan_compare_key. Otherwise the key variable contains random bytes. The result is that a compare of two keys by memcmp works incorrect. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Alexander Aring Reported-by: Stefan Schmidt Signed-off-by: Stefan Schmidt Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 65c0b7349f9d..510568c37476 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -74,14 +74,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - struct frag_lowpan_compare_key key = { - .tag = cb->d_tag, - .d_size = cb->d_size, - .src = *src, - .dst = *dst, - }; + struct frag_lowpan_compare_key key = {}; struct inet_frag_queue *q; + key.tag = cb->d_tag; + key.d_size = cb->d_size; + key.src = *src; + key.dst = *dst; + q = inet_frag_find(&ieee802154_lowpan->frags, &key); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); @@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); - struct ieee802154_hdr hdr; + struct ieee802154_hdr hdr = {}; int err; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) -- cgit v1.2.3 From 50fc08963b0ccc01bc9a01a4e699aed9eb0137f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:29:59 -0700 Subject: inet: frags: remove inet_frag_maybe_warn_overflow() commit 2d44ed22e607f9a285b049de2263e3840673a260 upstream. This function is obsolete, after rhashtable addition to inet defrag. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 510568c37476..c5ad89f5f028 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -83,10 +83,9 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, key.dst = *dst; q = inet_frag_find(&ieee802154_lowpan->frags, &key); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + if (!q) return NULL; - } + return container_of(q, struct lowpan_frag_queue, q); } -- cgit v1.2.3 From 567ef0554b91de121e9c1ad6b30d0077a5ea1fbf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:30:00 -0700 Subject: inet: frags: break the 2GB limit for frags storage commit 3e67f106f619dcfaf6f4e2039599bdb69848c714 upstream. Some users are willing to provision huge amounts of memory to be able to perform reassembly reasonnably well under pressure. Current memory tracking is using one atomic_t and integers. Switch to atomic_long_t so that 64bit arches can use more than 2GB, without any cost for 32bit arches. Note that this patch avoids an overflow error, if high_thresh was set to ~2GB, since this test in inet_frag_alloc() was never true : if (... || frag_mem_limit(nf) > nf->high_thresh) Tested: $ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh $ grep FRAG /proc/net/sockstat FRAG: inuse 14705885 memory 16000002880 $ nstat -n ; sleep 1 ; nstat | grep Reas IpReasmReqds 3317150 0.0 IpReasmFails 3317112 0.0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index c5ad89f5f028..94ef920530a5 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -410,23 +410,23 @@ err: } #ifdef CONFIG_SYSCTL -static int zero; +static long zero; static struct ctl_table lowpan_frags_ns_ctl_table[] = { { .procname = "6lowpanfrag_high_thresh", .data = &init_net.ieee802154_lowpan.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh }, { .procname = "6lowpanfrag_low_thresh", .data = &init_net.ieee802154_lowpan.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &zero, .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, -- cgit v1.2.3 From bf40801a018bbe8f2076fab89f538cc05eb15c03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:30:06 -0700 Subject: inet: frags: fix ip6frag_low_thresh boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d23401283e80ceb03f765842787e0e79ff598b7 upstream. Giving an integer to proc_doulongvec_minmax() is dangerous on 64bit arches, since linker might place next to it a non zero value preventing a change to ip6frag_low_thresh. ip6frag_low_thresh is not used anymore in the kernel, but we do not want to prematuraly break user scripts wanting to change it. Since specifying a minimal value of 0 for proc_doulongvec_minmax() is moot, let's remove these zero values in all defrag units. Fixes: 6e00f7dd5e4e ("ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ieee802154/6lowpan/reassembly.c') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 94ef920530a5..6183730d38db 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -410,7 +410,6 @@ err: } #ifdef CONFIG_SYSCTL -static long zero; static struct ctl_table lowpan_frags_ns_ctl_table[] = { { @@ -427,7 +426,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, { -- cgit v1.2.3