summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c10
-rw-r--r--net/core/dev.c59
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/skbuff.c47
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/route.c146
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netlink/af_netlink.c29
14 files changed, 177 insertions, 164 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index add69d0fd99d..fbbf1fa00940 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -5,7 +5,7 @@
#include <linux/export.h>
#include "vlan.h"
-bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
+bool vlan_do_receive(struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
@@ -13,14 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
struct vlan_pcpu_stats *rx_stats;
vlan_dev = vlan_find_dev(skb->dev, vlan_id);
- if (!vlan_dev) {
- /* Only the last call to vlan_do_receive() should change
- * pkt_type to PACKET_OTHERHOST
- */
- if (vlan_id && last_handler)
- skb->pkt_type = PACKET_OTHERHOST;
+ if (!vlan_dev)
return false;
- }
skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e0a1847c3bb..09cb3f6dc40c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3300,18 +3300,18 @@ ncls:
&& !skb_pfmemalloc_protocol(skb))
goto drop;
- rx_handler = rcu_dereference(skb->dev->rx_handler);
if (vlan_tx_tag_present(skb)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
- if (vlan_do_receive(&skb, !rx_handler))
+ if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
goto unlock;
}
+ rx_handler = rcu_dereference(skb->dev->rx_handler);
if (rx_handler) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
@@ -3331,6 +3331,9 @@ ncls:
}
}
+ if (vlan_tx_nonzero_tag_present(skb))
+ skb->pkt_type = PACKET_OTHERHOST;
+
/* deliver only exact match when indicated */
null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -3471,17 +3474,31 @@ out:
return netif_receive_skb(skb);
}
-inline void napi_gro_flush(struct napi_struct *napi)
+/* napi->gro_list contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- struct sk_buff *skb, *next;
+ struct sk_buff *skb, *prev = NULL;
- for (skb = napi->gro_list; skb; skb = next) {
- next = skb->next;
+ /* scan list and build reverse chain */
+ for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
+ skb->prev = prev;
+ prev = skb;
+ }
+
+ for (skb = prev; skb; skb = prev) {
skb->next = NULL;
+
+ if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
+ return;
+
+ prev = skb->prev;
napi_gro_complete(skb);
+ napi->gro_count--;
}
- napi->gro_count = 0;
napi->gro_list = NULL;
}
EXPORT_SYMBOL(napi_gro_flush);
@@ -3542,6 +3559,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
napi->gro_count++;
NAPI_GRO_CB(skb)->count = 1;
+ NAPI_GRO_CB(skb)->age = jiffies;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
@@ -3631,20 +3649,22 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_skb_finish);
-void skb_gro_reset_offset(struct sk_buff *skb)
+static void skb_gro_reset_offset(struct sk_buff *skb)
{
+ const struct skb_shared_info *pinfo = skb_shinfo(skb);
+ const skb_frag_t *frag0 = &pinfo->frags[0];
+
NAPI_GRO_CB(skb)->data_offset = 0;
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
if (skb->mac_header == skb->tail &&
- !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
- NAPI_GRO_CB(skb)->frag0 =
- skb_frag_address(&skb_shinfo(skb)->frags[0]);
- NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
+ pinfo->nr_frags &&
+ !PageHighMem(skb_frag_page(frag0))) {
+ NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+ NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
}
}
-EXPORT_SYMBOL(skb_gro_reset_offset);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
@@ -3876,7 +3896,7 @@ void napi_complete(struct napi_struct *n)
if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
return;
- napi_gro_flush(n);
+ napi_gro_flush(n, false);
local_irq_save(flags);
__napi_complete(n);
local_irq_restore(flags);
@@ -3981,8 +4001,17 @@ static void net_rx_action(struct softirq_action *h)
local_irq_enable();
napi_complete(n);
local_irq_disable();
- } else
+ } else {
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ local_irq_enable();
+ napi_gro_flush(n, HZ >= 1000);
+ local_irq_disable();
+ }
list_move_tail(&n->poll_list, &sd->poll_list);
+ }
}
netpoll_poll_unlock(have);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index baca771caae2..22571488730a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1301,8 +1301,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
if (!dst)
goto discard;
- __skb_pull(skb, skb_network_offset(skb));
-
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
@@ -1312,6 +1310,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
neigh_hh_init(neigh, dst);
do {
+ __skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
@@ -1342,9 +1341,8 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
unsigned int seq;
int err;
- __skb_pull(skb, skb_network_offset(skb));
-
do {
+ __skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cdc28598f4ef..6e04b1fa11f2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -655,53 +655,6 @@ void consume_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(consume_skb);
-/**
- * skb_recycle - clean up an skb for reuse
- * @skb: buffer
- *
- * Recycles the skb to be reused as a receive buffer. This
- * function does any necessary reference count dropping, and
- * cleans up the skbuff as if it just came from __alloc_skb().
- */
-void skb_recycle(struct sk_buff *skb)
-{
- struct skb_shared_info *shinfo;
-
- skb_release_head_state(skb);
-
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
-
- memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->data = skb->head + NET_SKB_PAD;
- skb_reset_tail_pointer(skb);
-}
-EXPORT_SYMBOL(skb_recycle);
-
-/**
- * skb_recycle_check - check if skb can be reused for receive
- * @skb: buffer
- * @skb_size: minimum receive buffer size
- *
- * Checks that the skb passed in is not shared or cloned, and
- * that it is linear and its head portion at least as large as
- * skb_size so that it can be recycled as a receive buffer.
- * If these conditions are met, this function does any necessary
- * reference count dropping and cleans up the skbuff as if it
- * just came from __alloc_skb().
- */
-bool skb_recycle_check(struct sk_buff *skb, int skb_size)
-{
- if (!skb_is_recycleable(skb, skb_size))
- return false;
-
- skb_recycle(skb);
-
- return true;
-}
-EXPORT_SYMBOL(skb_recycle_check);
-
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
new->tstamp = old->tstamp;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 68c93d1bb03a..825c608826de 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -322,7 +322,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
- if (!r && !fib_num_tclassid_users(dev_net(dev))) {
+ if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
+ (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
*itag = 0;
return 0;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 267753060ffc..71b125cd5db1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -840,6 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
change_nexthops(fi) {
nexthop_nh->nh_parent = fi;
nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
+ if (!nexthop_nh->nh_pcpu_rth_output)
+ goto failure;
} endfor_nexthops(fi)
if (cfg->fc_mx) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f0c5b9c1a957..d34ce2972c8f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
return &rt->dst;
@@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
rcu_read_unlock();
return &rt->dst;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index ab09b126423c..694de3b7aebf 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
- if (opt->is_strictroute && opt->nexthop != rt->rt_gateway)
+ if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 24a29a39e9a8..6537a408a4fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr;
+ nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
@@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff622069fcef..1a0da8dc8180 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -802,7 +802,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
net = dev_net(rt->dst.dev);
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
if (!peer) {
- icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+ icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
+ rt_nexthop(rt, ip_hdr(skb)->daddr));
return;
}
@@ -827,7 +828,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
time_after(jiffies,
(peer->rate_last +
(ip_rt_redirect_load << peer->rate_tokens)))) {
- icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+ __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
+
+ icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->rate_tokens;
#ifdef CONFIG_IP_ROUTE_VERBOSE
@@ -835,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
peer->rate_tokens == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
- &ip_hdr(skb)->daddr, &rt->rt_gateway);
+ &ip_hdr(skb)->daddr, &gw);
#endif
}
out_put_peer:
@@ -904,22 +907,32 @@ out: kfree_skb(skb);
return 0;
}
-static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
+ struct dst_entry *dst = &rt->dst;
struct fib_result res;
+ if (dst->dev->mtu < mtu)
+ return;
+
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
+ if (!rt->rt_pmtu) {
+ dst->obsolete = DST_OBSOLETE_KILL;
+ } else {
+ rt->rt_pmtu = mtu;
+ dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
+ }
+
rcu_read_lock();
- if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
+ if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
- return mtu;
}
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -929,14 +942,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
- mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
-
- if (!rt->rt_pmtu) {
- dst->obsolete = DST_OBSOLETE_KILL;
- } else {
- rt->rt_pmtu = mtu;
- rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
- }
+ __ip_rt_update_pmtu(rt, &fl4, mtu);
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
@@ -1120,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
- if (rt->rt_gateway && mtu > 576)
+ if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}
@@ -1171,7 +1177,9 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
rt->rt_gateway = fnhe->fnhe_gw;
- }
+ rt->rt_uses_gateway = 1;
+ } else if (!rt->rt_gateway)
+ rt->rt_gateway = daddr;
orig = rcu_dereference(fnhe->fnhe_rth);
rcu_assign_pointer(fnhe->fnhe_rth, rt);
@@ -1180,13 +1188,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
fnhe->fnhe_stamp = jiffies;
ret = true;
- } else {
- /* Routes we intend to cache in nexthop exception have
- * the DST_NOCACHE bit clear. However, if we are
- * unsuccessful at storing this route into the cache
- * we really need to set it.
- */
- rt->dst.flags |= DST_NOCACHE;
}
spin_unlock_bh(&fnhe_lock);
@@ -1201,8 +1202,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
if (rt_is_input_route(rt)) {
p = (struct rtable **)&nh->nh_rth_input;
} else {
- if (!nh->nh_pcpu_rth_output)
- goto nocache;
p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
}
orig = *p;
@@ -1211,16 +1210,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
if (prev == orig) {
if (orig)
rt_free(orig);
- } else {
- /* Routes we intend to cache in the FIB nexthop have
- * the DST_NOCACHE bit clear. However, if we are
- * unsuccessful at storing this route into the cache
- * we really need to set it.
- */
-nocache:
- rt->dst.flags |= DST_NOCACHE;
+ } else
ret = false;
- }
return ret;
}
@@ -1281,8 +1272,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res);
- if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
+ if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
rt->rt_gateway = nh->nh_gw;
+ rt->rt_uses_gateway = 1;
+ }
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
@@ -1291,8 +1284,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
cached = rt_cache_route(nh, rt);
- }
- if (unlikely(!cached))
+ if (unlikely(!cached)) {
+ /* Routes we intend to cache in nexthop exception or
+ * FIB nexthop have the DST_NOCACHE bit clear.
+ * However, if we are unsuccessful at storing this
+ * route into the cache we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
+ if (!rt->rt_gateway)
+ rt->rt_gateway = daddr;
+ rt_add_uncached_list(rt);
+ }
+ } else
rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1360,6 +1363,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
if (our) {
rth->dst.input= ip_local_deliver;
@@ -1429,7 +1433,6 @@ static int __mkroute_input(struct sk_buff *skb,
return -EINVAL;
}
-
err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
@@ -1439,10 +1442,13 @@ static int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
- if (out_dev == in_dev && err &&
+ do_cache = res->fi && !itag;
+ if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
(IN_DEV_SHARED_MEDIA(out_dev) ||
- inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+ inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
flags |= RTCF_DOREDIRECT;
+ do_cache = false;
+ }
if (skb->protocol != htons(ETH_P_IP)) {
/* Not IP (i.e. ARP). Do not create route, if it is
@@ -1459,15 +1465,11 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- do_cache = false;
- if (res->fi) {
- if (!itag) {
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
- if (rt_cache_valid(rth)) {
- skb_dst_set_noref(skb, &rth->dst);
- goto out;
- }
- do_cache = true;
+ if (do_cache) {
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rt_cache_valid(rth)) {
+ skb_dst_set_noref(skb, &rth->dst);
+ goto out;
}
}
@@ -1486,6 +1488,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward;
@@ -1656,6 +1659,7 @@ local_input:
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
@@ -1758,6 +1762,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
+ bool do_cache;
in_dev = __in_dev_get_rcu(dev_out);
if (!in_dev)
@@ -1794,24 +1799,36 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
}
fnhe = NULL;
+ do_cache = fi != NULL;
if (fi) {
struct rtable __rcu **prth;
+ struct fib_nh *nh = &FIB_RES_NH(*res);
- fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
+ fnhe = find_exception(nh, fl4->daddr);
if (fnhe)
prth = &fnhe->fnhe_rth;
- else
- prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+ else {
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
+ }
rth = rcu_dereference(*prth);
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
}
}
+
+add:
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi);
+ do_cache);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1824,6 +1841,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2102,6 +2120,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
+ rt->rt_uses_gateway = ort->rt_uses_gateway;
INIT_LIST_HEAD(&rt->rt_uncached);
@@ -2180,12 +2199,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (rt->rt_gateway &&
+ if (rt->rt_uses_gateway &&
nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
goto nla_put_failure;
+ expires = rt->dst.expires;
+ if (expires) {
+ unsigned long now = jiffies;
+
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+ }
+
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
- if (rt->rt_pmtu)
+ if (rt->rt_pmtu && expires)
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
@@ -2195,13 +2224,6 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
goto nla_put_failure;
error = rt->dst.error;
- expires = rt->dst.expires;
- if (expires) {
- if (time_before(jiffies, expires))
- expires -= jiffies;
- else
- expires = 0;
- }
if (rt_is_input_route(rt)) {
if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 681ea2f413e2..05c5ab8d983c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway;
+ xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e22e6d88bac6..a974247a9ae4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -822,13 +822,6 @@ out:
return segs;
}
-struct ipv6_gro_cb {
- struct napi_gro_cb napi;
- int proto;
-};
-
-#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb)
-
static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -874,28 +867,31 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
iph = ipv6_hdr(skb);
}
- IPV6_GRO_CB(skb)->proto = proto;
+ NAPI_GRO_CB(skb)->proto = proto;
flush--;
nlen = skb_network_header_len(skb);
for (p = *head; p; p = p->next) {
- struct ipv6hdr *iph2;
+ const struct ipv6hdr *iph2;
+ __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
if (!NAPI_GRO_CB(p)->same_flow)
continue;
iph2 = ipv6_hdr(p);
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
- /* All fields must match except length. */
+ /* All fields must match except length and Traffic Class. */
if (nlen != skb_network_header_len(p) ||
- memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) ||
+ (first_word & htonl(0xF00FFFFF)) ||
memcmp(&iph->nexthdr, &iph2->nexthdr,
nlen - offsetof(struct ipv6hdr, nexthdr))) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
-
+ /* flush if Traffic Class fields are different */
+ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
NAPI_GRO_CB(p)->flush |= flush;
}
@@ -927,7 +923,7 @@ static int ipv6_gro_complete(struct sk_buff *skb)
sizeof(*iph));
rcu_read_lock();
- ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]);
+ ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]);
if (WARN_ON(!ops || !ops->gro_complete))
goto out_unlock;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 56f6d5d81a77..cc4c8095681a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -50,6 +50,7 @@ enum {
* local
*/
IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
+ IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
};
/*
@@ -113,6 +114,8 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_tos = rtos;
+ fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
+ FLOWI_FLAG_KNOWN_NH : 0;
retry:
rt = ip_route_output_key(net, &fl4);
@@ -1061,7 +1064,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(iph->tos),
IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL, NULL)))
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_KNOWN_NH, NULL)))
goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0f2e3ad69c47..01e944a017a4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -169,6 +169,8 @@ static void netlink_sock_destruct(struct sock *sk)
if (nlk->cb) {
if (nlk->cb->done)
nlk->cb->done(nlk->cb);
+
+ module_put(nlk->cb->module);
netlink_destroy_callback(nlk->cb);
}
@@ -1758,6 +1760,7 @@ static int netlink_dump(struct sock *sk)
nlk->cb = NULL;
mutex_unlock(nlk->cb_mutex);
+ module_put(cb->module);
netlink_consume_callback(cb);
return 0;
@@ -1767,9 +1770,9 @@ errout_skb:
return err;
}
-int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- struct netlink_dump_control *control)
+int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *control)
{
struct netlink_callback *cb;
struct sock *sk;
@@ -1784,6 +1787,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->done = control->done;
cb->nlh = nlh;
cb->data = control->data;
+ cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
atomic_inc(&skb->users);
cb->skb = skb;
@@ -1794,19 +1798,28 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
return -ECONNREFUSED;
}
nlk = nlk_sk(sk);
- /* A dump is in progress... */
+
mutex_lock(nlk->cb_mutex);
+ /* A dump is in progress... */
if (nlk->cb) {
mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
- sock_put(sk);
- return -EBUSY;
+ ret = -EBUSY;
+ goto out;
}
+ /* add reference of module which cb->dump belongs to */
+ if (!try_module_get(cb->module)) {
+ mutex_unlock(nlk->cb_mutex);
+ netlink_destroy_callback(cb);
+ ret = -EPROTONOSUPPORT;
+ goto out;
+ }
+
nlk->cb = cb;
mutex_unlock(nlk->cb_mutex);
ret = netlink_dump(sk);
-
+out:
sock_put(sk);
if (ret)
@@ -1817,7 +1830,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
*/
return -EINTR;
}
-EXPORT_SYMBOL(netlink_dump_start);
+EXPORT_SYMBOL(__netlink_dump_start);
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
{