diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 16 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 9 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 77 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_snmp_basic.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 188 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 20 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 10 |
13 files changed, 197 insertions, 205 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c8f7aee587d1..e4e8e00a2c91 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -553,7 +553,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); + return sk->sk_prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 30b88d7b4bd6..9b0f25930fbc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1007,9 +1007,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); - tn = (struct tnode *) resize(t, (struct tnode *)tn); + tn = (struct tnode *)resize(t, tn); - tnode_put_child_reorg((struct tnode *)tp, cindex, + tnode_put_child_reorg(tp, cindex, (struct rt_trie_node *)tn, wasfull); tp = node_parent((struct rt_trie_node *) tn); @@ -1024,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) /* Handle last (top) tnode */ if (IS_TNODE(tn)) - tn = (struct tnode *)resize(t, (struct tnode *)tn); + tn = (struct tnode *)resize(t, tn); rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); @@ -1125,7 +1125,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); + put_child(t, tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1160,8 +1160,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, - (struct rt_trie_node *)tn); + put_child(t, tp, cindex, (struct rt_trie_node *)tn); } else { rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; @@ -1620,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, NULL); + put_child(t, tp, cindex, NULL); trie_rebalance(t, tp); } else RCU_INIT_POINTER(t->trie, NULL); @@ -1844,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll); + inetpeer_invalidate_tree(&tb->tb_peers); + pr_debug("trie_flush found=%d\n", found); return found; } @@ -1992,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; + inet_peer_base_init(&tb->tb_peers); t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c75efbdc71cb..e1caa1abe5d1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, /* Limit if icmp type is enabled in ratemask. */ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - if (!rt->peer) - rt_bind_peer(rt, fl4->daddr, 1); - rc = inet_peer_xrlim_allow(rt->peer, + struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); + rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); } out: @@ -674,9 +673,7 @@ static void icmp_unreach(struct sk_buff *skb) LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); } else { - info = ip_rt_frag_needed(net, iph, - ntohs(icmph->un.frag.mtu), - skb->dev); + info = ntohs(icmph->un.frag.mtu); if (!info) goto out; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51b6d0c..85190e69297b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, if (q == NULL) return NULL; + q->net = nf; f->constructor(q, arg); atomic_add(f->qsize, &nf->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - q->net = nf; return q; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d4d61b694fab..cac02ad1425d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -82,23 +82,39 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -struct inet_peer_base { - struct inet_peer __rcu *root; - seqlock_t lock; - int total; -}; +void inet_peer_base_init(struct inet_peer_base *bp) +{ + bp->root = peer_avl_empty_rcu; + seqlock_init(&bp->lock); + bp->flush_seq = ~0U; + bp->total = 0; +} +EXPORT_SYMBOL_GPL(inet_peer_base_init); -static struct inet_peer_base v4_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), - .total = 0, -}; +static atomic_t v4_seq = ATOMIC_INIT(0); +static atomic_t v6_seq = ATOMIC_INIT(0); -static struct inet_peer_base v6_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), - .total = 0, -}; +static atomic_t *inetpeer_seq_ptr(int family) +{ + return (family == AF_INET ? &v4_seq : &v6_seq); +} + +static inline void flush_check(struct inet_peer_base *base, int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + if (unlikely(base->flush_seq != atomic_read(fp))) { + inetpeer_invalidate_tree(base); + base->flush_seq = atomic_read(fp); + } +} + +void inetpeer_invalidate_family(int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + atomic_inc(fp); +} #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ @@ -401,11 +417,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(int family) -{ - return family == AF_INET ? &v4_peers : &v6_peers; -} - /* perform garbage collect on all items stacked during a lookup */ static int inet_peer_gc(struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH], @@ -443,14 +454,17 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(struct inet_peer_base *base, + const struct inetpeer_addr *daddr, + int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; + flush_check(base, daddr->family); + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ @@ -560,10 +574,20 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); -void inetpeer_invalidate_tree(int family) +static void inetpeer_inval_rcu(struct rcu_head *head) +{ + struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu); + + spin_lock_bh(&gc_lock); + list_add_tail(&p->gc_list, &gc_list); + spin_unlock_bh(&gc_lock); + + schedule_delayed_work(&gc_work, gc_delay); +} + +void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(family); write_seqlock_bh(&base->lock); @@ -576,10 +600,7 @@ void inetpeer_invalidate_tree(int family) prev = cmpxchg(&base->root, old, new); if (prev == old) { base->total = 0; - spin_lock(&gc_lock); - list_add_tail(&prev->gc_list, &gc_list); - spin_unlock(&gc_lock); - schedule_delayed_work(&gc_work, gc_delay); + call_rcu(&prev->gc_rcu, inetpeer_inval_rcu); } out: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9dbd3dd6022d..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); + struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, + frags); + struct net *net = container_of(ipv4, struct net, ipv4); + struct ip4_create_arg *arg = a; qp->protocol = arg->iph->protocol; @@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 451f97c42eb4..b99ca4e154b9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -200,7 +200,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } if (skb->sk) skb_set_owner_w(skb2, skb->sk); - kfree_skb(skb); + consume_skb(skb); skb = skb2; } @@ -709,7 +709,7 @@ slow_path: IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); } - kfree_skb(skb); + consume_skb(skb); IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); return err; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 746edec8b86e..bac712293fd6 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -405,7 +405,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, ptr = *octets; while (ctx->pointer < eoc) { - if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { + if (!asn1_octet_decode(ctx, ptr++)) { kfree(*octets); *octets = NULL; return 0; @@ -759,7 +759,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, } break; case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { + if (!asn1_oid_decode(ctx, end, &lp, &len)) { kfree(id); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98b30d08efe9..842510d50453 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; @@ -680,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rth->peer && rth->peer->pmtu_expires); + (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -938,7 +935,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(AF_INET); + inetpeer_invalidate_family(AF_INET); } /* @@ -1328,14 +1325,20 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v4(daddr, create); + base = inetpeer_base_ptr(rt->_peer); + if (!base) + return; - if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) - inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); + peer = inet_getpeer_v4(base, daddr, create); + if (peer) { + if (!rt_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); + } } /* @@ -1363,14 +1366,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) struct rtable *rt = (struct rtable *) dst; if (rt && !(rt->dst.flags & DST_NOPEER)) { - if (rt->peer == NULL) - rt_bind_peer(rt, rt->rt_dst, 1); + struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); /* If peer is attached to destination, it is never detached, so that we need not to grab a lock to dereference it. */ - if (rt->peer) { - iph->id = htons(inet_getid(rt->peer, more)); + if (peer) { + iph->id = htons(inet_getid(peer, more)); return; } } else if (!rt) @@ -1480,10 +1482,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { if (peer->redirect_learned.a4 != new_gw) { peer->redirect_learned.a4 = new_gw; @@ -1539,8 +1538,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && peer_pmtu_expired(rt->peer)) { - dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); + } else if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_expired(peer)) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1578,9 +1579,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); return; @@ -1645,9 +1644,7 @@ static int ip_error(struct sk_buff *skb) break; } - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); send = true; if (peer) { @@ -1668,67 +1665,6 @@ out: kfree_skb(skb); return 0; } -/* - * The last two values are not from the RFC but - * are needed for AMPRnet AX.25 paths. - */ - -static const unsigned short mtu_plateau[] = -{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; - -static inline unsigned short guess_mtu(unsigned short old_mtu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) - if (old_mtu > mtu_plateau[i]) - return mtu_plateau[i]; - return 68; -} - -unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, - unsigned short new_mtu, - struct net_device *dev) -{ - unsigned short old_mtu = ntohs(iph->tot_len); - unsigned short est_mtu = 0; - struct inet_peer *peer; - - peer = inet_getpeer_v4(iph->daddr, 1); - if (peer) { - unsigned short mtu = new_mtu; - - if (new_mtu < 68 || new_mtu >= old_mtu) { - /* BSD 4.2 derived systems incorrectly adjust - * tot_len by the IP header length, and report - * a zero MTU in the ICMP message. - */ - if (mtu == 0 && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; - mtu = guess_mtu(old_mtu); - } - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - est_mtu = mtu; - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; - atomic_inc(&__rt_peer_genid); - } - - inet_putpeer(peer); - } - return est_mtu ? : new_mtu; -} - static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); @@ -1753,9 +1689,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_confirm(dst); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); @@ -1781,12 +1715,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static void ipv4_validate_peer(struct rtable *rt) { if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer; - - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 0); + struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - peer = rt->peer; if (peer) { check_peer_pmtu(&rt->dst, peer); @@ -1812,14 +1742,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer = rt->peer; if (rt->fi) { fib_info_put(rt->fi); rt->fi = NULL; } - if (peer) { - rt->peer = NULL; + if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); inet_putpeer(peer); } } @@ -1832,8 +1761,11 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + if (rt && rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_cleaned(peer)) + dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1935,6 +1867,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { + struct inet_peer_base *base; struct inet_peer *peer; int create = 0; @@ -1944,8 +1877,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); + base = inetpeer_base_ptr(rt->_peer); + BUG_ON(!base); + + peer = inet_getpeer_v4(base, rt->rt_dst, create); if (peer) { + __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, @@ -2061,7 +1998,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -2189,7 +2126,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; rth->dst.input = ip_forward; @@ -2372,7 +2309,7 @@ local_input: rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -2576,7 +2513,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, (res->table ? + &res->table->tb_peers : + dev_net(dev_out)->ipv4.peers)); rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2625,6 +2564,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) int orig_oif; res.fi = NULL; + res.table = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif @@ -2730,6 +2670,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) if (fib_lookup(net, fl4, &res)) { res.fi = NULL; + res.table = NULL; if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2913,9 +2854,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; - rt->peer = ort->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) atomic_inc(&rt->fi->fib_clntref); @@ -2953,7 +2892,6 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -3009,8 +2947,9 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; error = rt->dst.error; - if (peer) { - inet_peer_refcheck(rt->peer); + if (rt_has_peer(rt)) { + const struct inet_peer *peer = rt_peer_ptr(rt); + inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; if (peer->tcp_ts_stamp) { ts = peer->tcp_ts; @@ -3400,6 +3339,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, }; +static int __net_init ipv4_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv4.peers = bp; + return 0; +} + +static void __net_exit ipv4_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv4.peers; + + net->ipv4.peers = NULL; + inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { + .init = ipv4_inetpeer_init, + .exit = ipv4_inetpeer_exit, +}; #ifdef CONFIG_IP_ROUTE_CLASSID struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; @@ -3480,6 +3443,7 @@ int __init ip_rt_init(void) register_pernet_subsys(&sysctl_route_ops); #endif register_pernet_subsys(&rt_genid_ops); + register_pernet_subsys(&ipv4_inetpeer_ops); return rc; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c8d28c433b2b..fda2ca17135e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -848,7 +848,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, err = net_xmit_eval(err); } - dst_release(dst); return err; } @@ -1821,40 +1820,25 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) +struct inet_peer *tcp_v4_get_peer(struct sock *sk) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct inet_peer *peer; - if (!rt || - inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(inet->inet_daddr, 1); - *release_it = true; - } else { - if (!rt->peer) - rt_bind_peer(rt, inet->inet_daddr, 1); - peer = rt->peer; - *release_it = false; - } - - return peer; + /* If we don't have a valid cached route, or we're doing IP + * options which make the IPv4 header destination address + * different from our peer's, do not bother with this. + */ + if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) + return NULL; + return rt_get_peer_create(rt, inet->inet_daddr); } EXPORT_SYMBOL(tcp_v4_get_peer); -void *tcp_v4_tw_get_peer(struct sock *sk) -{ - const struct inet_timewait_sock *tw = inet_twsk(sk); - - return inet_getpeer_v4(tw->tw_daddr, 1); -} -EXPORT_SYMBOL(tcp_v4_tw_get_peer); - static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v4_tw_get_peer, }; const struct inet_connection_sock_af_ops ipv4_specific = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b85d9fe7d663..cb015317c9f7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct inet_peer *peer; - bool release_it; - peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + peer = icsk->icsk_af_ops->get_peer(sk); if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && @@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk) peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; } - if (release_it) - inet_putpeer(peer); return true; } @@ -80,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk) static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) { + const struct tcp_timewait_sock *tcptw; struct sock *sk = (struct sock *) tw; struct inet_peer *peer; - peer = twsk_getpeer(sk); + tcptw = tcp_twsk(sk); + peer = tcptw->tw_peer; if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; } - inet_putpeer(peer); return true; } return false; @@ -317,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); bool recycle_ok = false; + bool recycle_on = false; - if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) { recycle_ok = tcp_remember_stamp(sk); + recycle_on = true; + } if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); @@ -327,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + struct inet_sock *inet = inet_sk(sk); + struct inet_peer *peer = NULL; - tw->tw_transparent = inet_sk(sk)->transparent; + tw->tw_transparent = inet->transparent; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; @@ -350,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } #endif + if (recycle_on) + peer = icsk->icsk_af_ops->get_peer(sk); + tcptw->tw_peer = peer; + if (peer) + atomic_inc(&peer->refcnt); + #ifdef CONFIG_TCP_MD5SIG /* * The timewait bucket does not have the key DB from the @@ -401,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) void tcp_twsk_destructor(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); + + if (twsk->tw_peer) + inet_putpeer(twsk->tw_peer); +#ifdef CONFIG_TCP_MD5SIG if (twsk->tw_md5_key) { tcp_free_md5sig_pool(); kfree_rcu(twsk->tw_md5_key, rcu); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 803cbfe82fbc..c465d3e51e28 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2442,7 +2442,16 @@ int tcp_send_synack(struct sock *sk) return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } -/* Prepare a SYN-ACK. */ +/** + * tcp_make_synack - Prepare a SYN-ACK. + * sk: listener socket + * dst: dst entry attached to the SYNACK + * req: request_sock pointer + * rvp: request_values pointer + * + * Allocate one skb and build a SYNACK packet. + * @dst is consumed : Caller should not use it again. + */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct request_values *rvp) @@ -2461,14 +2470,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); - if (skb == NULL) + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + if (unlikely(!skb)) { + dst_release(dst); return NULL; - + } /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set(skb, dst); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0d3426cb5c4f..8855d8268552 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt.peer = rt->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(&xdst->u.rt, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ @@ -212,8 +210,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt.peer)) - inet_putpeer(xdst->u.rt.peer); + if (rt_has_peer(&xdst->u.rt)) { + struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } |