From c386578f1cdb4dac230395a951f88027f64346e3 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 29 Sep 2015 11:40:49 +0200 Subject: xfrm: Let the flowcache handle its size by default. The xfrm flowcache size is limited by the flowcache limit (4096 * number of online cpus) and the xfrm garbage collector threshold (2 * 32768), whatever is reached first. This means that we can hit the garbage collector limit only on systems with more than 16 cpus. On such systems we simply refuse new allocations if we reach the limit, so new flows are dropped. On syslems with 16 or less cpus, we hit the flowcache limit. In this case, we shrink the flow cache instead of refusing new flows. We increase the xfrm garbage collector threshold to INT_MAX to get the same behaviour, independent of the number of cpus. The xfrm garbage collector threshold can still be set below the flowcache limit to reduce the memory usage of the flowcache. Tested-by: Dan Streetman Signed-off-by: Steffen Klassert --- Documentation/networking/ip-sysctl.txt | 6 ++++-- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ebe94f2cab98..260f30b2def6 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1199,7 +1199,8 @@ tag - INTEGER xfrm4_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will - refuse new allocations. + refuse new allocations. The value must be set below the flowcache + limit (4096 * number of online cpus) to take effect. igmp_link_local_mcast_reports - BOOLEAN Enable IGMP reports for link local multicast groups in the @@ -1645,7 +1646,8 @@ ratelimit - INTEGER xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will - refuse new allocations. + refuse new allocations. The value must be set below the flowcache + limit (4096 * number of online cpus) to take effect. IPv6 Update by: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0304d1680ca2..75e8d48c03fb 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -246,7 +246,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 32768, + .gc_thresh = INT_MAX, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 30caa289c5db..2fad59320b6c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,7 +287,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 32768, + .gc_thresh = INT_MAX, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { -- cgit v1.2.3 From e33d4f13d21e9f604194ebc8730077ff39916c50 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 21 Oct 2015 11:48:25 -0400 Subject: xfrm: Fix unaligned access to stats in copy_to_user_state() On sparc, deleting established SAs (e.g., by restarting ipsec) results in unaligned access messages via xfrm_del_sa -> km_state_notify -> xfrm_send_state_notify(). Even though struct xfrm_usersa_info is aligned on 8-byte boundaries, netlink attributes are fundamentally only 4 byte aligned, and this cannot be changed for nla_data() that is passed up to userspace. As a result, the put_unaligned() macro needs to be used to set up potentially unaligned fields such as the xfrm_stats in copy_to_user_state() Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a8de9e300200..639e0d51b31a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { @@ -728,7 +729,9 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); - memcpy(&p->stats, &x->stats, sizeof(p->stats)); + put_unaligned(x->stats.replay_window, &p->stats.replay_window); + put_unaligned(x->stats.replay, &p->stats.replay); + put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed); memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); p->mode = x->props.mode; p->replay_window = x->props.replay_window; -- cgit v1.2.3 From 1a14f1e5550a341f76e5c8f596e9b5f8a886dfbc Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 23 Oct 2015 07:31:23 +0200 Subject: xfrm4: Fix header checks in _decode_session4. We skip the header informations if the data pointer points already behind the header in question for some protocols. This is because we call pskb_may_pull with a negative value converted to unsigened int from pskb_may_pull in this case. Skipping the header informations can lead to incorrect policy lookups, so fix it by a check of the data pointer position before we call pskb_may_pull. Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_policy.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 75e8d48c03fb..e4d533c6c796 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -137,7 +137,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ICMP: - if (pskb_may_pull(skb, xprth + 2 - skb->data)) { + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; fl4->fl4_icmp_type = icmp[0]; @@ -146,7 +147,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ESP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; @@ -154,7 +156,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_AH: - if (pskb_may_pull(skb, xprth + 8 - skb->data)) { + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; @@ -162,7 +165,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_COMP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); @@ -170,7 +174,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_GRE: - if (pskb_may_pull(skb, xprth + 12 - skb->data)) { + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { __be16 *greflags = (__be16 *)xprth; __be32 *gre_hdr = (__be32 *)xprth; -- cgit v1.2.3 From ea673a4d3a337184f3c314dcc6300bf02f39e077 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 23 Oct 2015 07:32:39 +0200 Subject: xfrm4: Reload skb header pointers after calling pskb_may_pull. A call to pskb_may_pull may change the pointers into the packet, so reload the pointers after the call. Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_policy.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e4d533c6c796..269b137c87ec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -129,7 +129,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports = (__be16 *)xprth; + __be16 *ports; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; fl4->fl4_dport = ports[!reverse]; @@ -139,7 +142,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ICMP: if (xprth + 2 < skb->data || pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp = xprth; + u8 *icmp; + + xprth = skb_network_header(skb) + iph->ihl * 4; + icmp = xprth; fl4->fl4_icmp_type = icmp[0]; fl4->fl4_icmp_code = icmp[1]; @@ -149,7 +155,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr = (__be32 *)xprth; + __be32 *ehdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; } @@ -158,7 +167,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_AH: if (xprth + 8 < skb->data || pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr = (__be32 *)xprth; + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; } @@ -167,7 +179,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_COMP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr = (__be16 *)xprth; + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } @@ -176,8 +191,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_GRE: if (xprth + 12 < skb->data || pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags = (__be16 *)xprth; - __be32 *gre_hdr = (__be32 *)xprth; + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) -- cgit v1.2.3 From cb866e3298cd7412503fc7e2c265753c853fab9d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 23 Oct 2015 07:52:58 +0200 Subject: xfrm: Increment statistic counter on inner mode error Increment the LINUX_MIB_XFRMINSTATEMODEERROR statistic counter to notify about dropped packets if we fail to fetch a inner mode. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 60ce7014e1b0..ad7f5b3f9b61 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -330,8 +330,10 @@ resume: if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) + if (inner_mode == NULL) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; + } } if (inner_mode->input(x, skb)) { -- cgit v1.2.3