From 5a2ef92023506d4e9cd13617b5a46b4d0f1b6747 Mon Sep 17 00:00:00 2001 From: Herbert Xu <herbert@gondor.apana.org.au> Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Remove unused sk_sndmsg_* from UFO UFO doesn't really use the sk_sndmsg_* parameters so touching them is pointless. It can't use them anyway since the whole point of UFO is to use the original pages without copying. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3ba6b39..d3a4540cd308 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -767,7 +767,6 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; -- cgit v1.2.3 From 1470ddf7f8cecf776921e5ccee72e3d2b3d60cbc Mon Sep 17 00:00:00 2001 From: Herbert Xu <herbert@gondor.apana.org.au> Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Remove explicit write references to sk/inet in ip_append_data In order to allow simultaneous calls to ip_append_data on the same socket, it must not modify any shared state in sk or inet (other than those that are designed to allow that such as atomic counters). This patch abstracts out write references to sk and inet_sk in ip_append_data and its friends so that we may use the underlying code in parallel. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 238 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 140 insertions(+), 98 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d3a4540cd308..1dd5ecc9a27e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -733,6 +733,7 @@ csum_page(struct page *page, int offset, int copy) } static inline int ip_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -745,7 +746,7 @@ static inline int ip_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + if ((skb = skb_peek_tail(queue)) == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -771,35 +772,24 @@ static inline int ip_ufo_append_data(struct sock *sk, /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } -/* - * ip_append_data() and ip_append_page() can make one large IP datagram - * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Each piece can be a page - * or non-page data. - * - * Not only UDP, other transport protocols - e.g. raw sockets - can use - * this interface potentially. - * - * LATER: length must be adjusted by pad at tail, when it is required. - */ -int ip_append_data(struct sock *sk, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags) +static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, + struct inet_cork *cork, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - struct ip_options *opt = NULL; + struct ip_options *opt = inet->cork.opt; int hh_len; int exthdrlen; int mtu; @@ -808,58 +798,19 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; - struct rtable *rt; - - if (flags&MSG_PROBE) - return 0; + struct rtable *rt = (struct rtable *)cork->dst; - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking. - */ - opt = ipc->opt; - if (opt) { - if (inet->cork.opt == NULL) { - inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); - if (unlikely(inet->cork.opt == NULL)) - return -ENOBUFS; - } - memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); - inet->cork.flags |= IPCORK_OPT; - inet->cork.addr = ipc->addr; - } - rt = *rtp; - if (unlikely(!rt)) - return -EFAULT; - /* - * We steal reference to this route, caller should not release it - */ - *rtp = NULL; - inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path); - inet->cork.dst = &rt->dst; - inet->cork.length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; - exthdrlen = rt->dst.header_len; - length += exthdrlen; - transhdrlen += exthdrlen; - } else { - rt = (struct rtable *)inet->cork.dst; - if (inet->cork.flags & IPCORK_OPT) - opt = inet->cork.opt; + exthdrlen = transhdrlen ? rt->dst.header_len : 0; + length += exthdrlen; + transhdrlen += exthdrlen; + mtu = inet->cork.fragsize; - transhdrlen = 0; - exthdrlen = 0; - mtu = inet->cork.fragsize; - } hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - if (inet->cork.length + length > 0xFFFF - fragheaderlen) { + if (cork->length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu-exthdrlen); return -EMSGSIZE; @@ -875,15 +826,15 @@ int ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); - inet->cork.length += length; + cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); + err = ip_ufo_append_data(sk, queue, getfrag, from, length, + hh_len, fragheaderlen, transhdrlen, + mtu, flags); if (err) goto error; return 0; @@ -960,7 +911,7 @@ alloc_new_skb: else /* only the initial fragment is time stamped */ - ipc->tx_flags = 0; + cork->tx_flags = 0; } if (skb == NULL) goto error; @@ -971,7 +922,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); - skb_shinfo(skb)->tx_flags = ipc->tx_flags; + skb_shinfo(skb)->tx_flags = cork->tx_flags; /* * Find where to start putting bytes. @@ -1008,7 +959,7 @@ alloc_new_skb: /* * Put the packet on the pending queue. */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1028,8 +979,8 @@ alloc_new_skb: } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; + struct page *page = cork->page; + int off = cork->off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { @@ -1041,7 +992,7 @@ alloc_new_skb: goto error; } get_page(page); - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + skb_fill_page_desc(skb, i, page, off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -1052,8 +1003,8 @@ alloc_new_skb: err = -ENOMEM; goto error; } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + cork->page = page; + cork->off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; @@ -1065,7 +1016,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - sk->sk_sndmsg_off += copy; + cork->off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; @@ -1079,11 +1030,87 @@ alloc_new_skb: return 0; error: - inet->cork.length -= length; + cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } +static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, + struct ipcm_cookie *ipc, struct rtable **rtp) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options *opt; + struct rtable *rt; + + /* + * setup for corking. + */ + opt = ipc->opt; + if (opt) { + if (cork->opt == NULL) { + cork->opt = kmalloc(sizeof(struct ip_options) + 40, + sk->sk_allocation); + if (unlikely(cork->opt == NULL)) + return -ENOBUFS; + } + memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); + cork->flags |= IPCORK_OPT; + cork->addr = ipc->addr; + } + rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; + cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + cork->dst = &rt->dst; + cork->length = 0; + cork->tx_flags = ipc->tx_flags; + cork->page = NULL; + cork->off = 0; + + return 0; +} + +/* + * ip_append_data() and ip_append_page() can make one large IP datagram + * from many pieces of data. Each pieces will be holded on the socket + * until ip_push_pending_frames() is called. Each piece can be a page + * or non-page data. + * + * Not only UDP, other transport protocols - e.g. raw sockets - can use + * this interface potentially. + * + * LATER: length must be adjusted by pad at tail, when it is required. + */ +int ip_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + struct ipcm_cookie *ipc, struct rtable **rtp, + unsigned int flags) +{ + struct inet_sock *inet = inet_sk(sk); + int err; + + if (flags&MSG_PROBE) + return 0; + + if (skb_queue_empty(&sk->sk_write_queue)) { + err = ip_setup_cork(sk, &inet->cork, ipc, rtp); + if (err) + return err; + } else { + transhdrlen = 0; + } + + return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, + from, length, transhdrlen, flags); +} + ssize_t ip_append_page(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1227,40 +1254,42 @@ error: return err; } -static void ip_cork_release(struct inet_sock *inet) +static void ip_cork_release(struct inet_cork *cork) { - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - dst_release(inet->cork.dst); - inet->cork.dst = NULL; + cork->flags &= ~IPCORK_OPT; + kfree(cork->opt); + cork->opt = NULL; + dst_release(cork->dst); + cork->dst = NULL; } /* * Combined all pending IP fragments on the socket as one IP datagram * and push them out. */ -int ip_push_pending_frames(struct sock *sk) +static int __ip_push_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options *opt = NULL; - struct rtable *rt = (struct rtable *)inet->cork.dst; + struct rtable *rt = (struct rtable *)cork->dst; struct iphdr *iph; __be16 df = 0; __u8 ttl; int err = 0; - if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) + if ((skb = __skb_dequeue(queue)) == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1286,8 +1315,8 @@ int ip_push_pending_frames(struct sock *sk) ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); - if (inet->cork.flags & IPCORK_OPT) - opt = inet->cork.opt; + if (cork->flags & IPCORK_OPT) + opt = cork->opt; if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; @@ -1299,7 +1328,7 @@ int ip_push_pending_frames(struct sock *sk) iph->ihl = 5; if (opt) { iph->ihl += opt->optlen>>2; - ip_options_build(skb, opt, inet->cork.addr, rt, 0); + ip_options_build(skb, opt, cork->addr, rt, 0); } iph->tos = inet->tos; iph->frag_off = df; @@ -1315,7 +1344,7 @@ int ip_push_pending_frames(struct sock *sk) * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount */ - inet->cork.dst = NULL; + cork->dst = NULL; skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) @@ -1332,7 +1361,7 @@ int ip_push_pending_frames(struct sock *sk) } out: - ip_cork_release(inet); + ip_cork_release(cork); return err; error: @@ -1340,17 +1369,30 @@ error: goto out; } +int ip_push_pending_frames(struct sock *sk) +{ + return __ip_push_pending_frames(sk, &sk->sk_write_queue, + &inet_sk(sk)->cork); +} + /* * Throw away all pending data on the socket. */ -void ip_flush_pending_frames(struct sock *sk) +static void __ip_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) + while ((skb = __skb_dequeue_tail(queue)) != NULL) kfree_skb(skb); - ip_cork_release(inet_sk(sk)); + ip_cork_release(cork); +} + +void ip_flush_pending_frames(struct sock *sk) +{ + __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } -- cgit v1.2.3 From 1c32c5ad6fac8cee1a77449f5abf211e911ff830 Mon Sep 17 00:00:00 2001 From: Herbert Xu <herbert@gondor.apana.org.au> Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Add ip_make_skb and ip_finish_skb This patch adds the helper ip_make_skb which is like ip_append_data and ip_push_pending_frames all rolled into one, except that it does not send the skb produced. The sending part is carried out by ip_send_skb, which the transport protocol can call after it has tweaked the skb. It is meant to be called in cases where corking is not used should have a one-to-one correspondence to sendmsg. This patch also adds the helper ip_finish_skb which is meant to be replace ip_push_pending_frames when corking is required. Previously the protocol stack would peek at the socket write queue and add its header to the first packet. With ip_finish_skb, the protocol stack can directly operate on the final skb instead, just like the non-corking case with ip_make_skb. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 65 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 14 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1dd5ecc9a27e..460308c35028 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1267,9 +1267,9 @@ static void ip_cork_release(struct inet_cork *cork) * Combined all pending IP fragments on the socket as one IP datagram * and push them out. */ -static int __ip_push_pending_frames(struct sock *sk, - struct sk_buff_head *queue, - struct inet_cork *cork) +struct sk_buff *__ip_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; @@ -1280,7 +1280,6 @@ static int __ip_push_pending_frames(struct sock *sk, struct iphdr *iph; __be16 df = 0; __u8 ttl; - int err = 0; if ((skb = __skb_dequeue(queue)) == NULL) goto out; @@ -1351,28 +1350,37 @@ static int __ip_push_pending_frames(struct sock *sk, icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - /* Netfilter gets whole the not fragmented skb. */ + ip_cork_release(cork); +out: + return skb; +} + +int ip_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + int err; + err = ip_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); } -out: - ip_cork_release(cork); return err; - -error: - IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); - goto out; } int ip_push_pending_frames(struct sock *sk) { - return __ip_push_pending_frames(sk, &sk->sk_write_queue, - &inet_sk(sk)->cork); + struct sk_buff *skb; + + skb = ip_finish_skb(sk); + if (!skb) + return 0; + + /* Netfilter gets whole the not fragmented skb. */ + return ip_send_skb(skb); } /* @@ -1395,6 +1403,35 @@ void ip_flush_pending_frames(struct sock *sk) __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } +struct sk_buff *ip_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + struct ipcm_cookie *ipc, struct rtable **rtp, + unsigned int flags) +{ + struct inet_cork cork = {}; + struct sk_buff_head queue; + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + err = ip_setup_cork(sk, &cork, ipc, rtp); + if (err) + return ERR_PTR(err); + + err = __ip_append_data(sk, &queue, &cork, getfrag, + from, length, transhdrlen, flags); + if (err) { + __ip_flush_pending_frames(sk, &queue, &cork); + return ERR_PTR(err); + } + + return __ip_make_skb(sk, &queue, &cork); +} /* * Fetch data from kernel space and fill in checksum if needed. -- cgit v1.2.3 From 420d44daa7aa1cc847e9e527f0a27a9ce61768ca Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Tue, 1 Mar 2011 14:19:23 -0800 Subject: ipv4: Make final arg to ip_route_output_flow to be boolean "can_sleep" Since that is what the current vague "flags" argument means. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 460308c35028..e6905c562fb7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false)) goto no_route; } sk_setup_caps(sk, &rt->dst); -- cgit v1.2.3 From 273447b352e69c327efdecfd6e1d6fe3edbdcd14 Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Tue, 1 Mar 2011 14:27:04 -0800 Subject: ipv4: Kill can_sleep arg to ip_route_output_flow() This boolean state is now available in the flow flags. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e6905c562fb7..68dbe2d93d9d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk)) goto no_route; } sk_setup_caps(sk, &rt->dst); -- cgit v1.2.3 From 07df5294a753dfac2cc9f75e6159fc25fdc22149 Mon Sep 17 00:00:00 2001 From: Herbert Xu <herbert@gondor.apana.org.au> Date: Tue, 1 Mar 2011 23:00:58 -0800 Subject: inet: Replace left-over references to inet->cork The patch to replace inet->cork with cork left out two spots in __ip_append_data that can result in bogus packet construction. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 68dbe2d93d9d..33316b3534ca 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -789,7 +789,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - struct ip_options *opt = inet->cork.opt; + struct ip_options *opt = cork->opt; int hh_len; int exthdrlen; int mtu; @@ -803,7 +803,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, exthdrlen = transhdrlen ? rt->dst.header_len : 0; length += exthdrlen; transhdrlen += exthdrlen; - mtu = inet->cork.fragsize; + mtu = cork->fragsize; hh_len = LL_RESERVED_SPACE(rt->dst.dev); -- cgit v1.2.3 From b23dd4fe42b455af5c6e20966b7d6959fa8352ea Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Wed, 2 Mar 2011 14:31:35 -0800 Subject: ipv4: Make output route lookup return rtable directly. Instead of on the stack. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 33316b3534ca..171f483b21d5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -355,7 +355,8 @@ int ip_queue_xmit(struct sk_buff *skb) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk)) + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + if (IS_ERR(rt)) goto no_route; } sk_setup_caps(sk, &rt->dst); @@ -1489,7 +1490,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .proto = sk->sk_protocol, .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(sock_net(sk), &rt, &fl)) + rt = ip_route_output_key(sock_net(sk), &fl); + if (IS_ERR(rt)) return; } -- cgit v1.2.3 From 78fbfd8a653ca972afe479517a40661bfff6d8c3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Sat, 12 Mar 2011 00:00:52 -0500 Subject: ipv4: Create and use route lookup helpers. The idea here is this minimizes the number of places one has to edit in order to make changes to how flows are defined and used. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 171f483b21d5..916152dbdce4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -339,26 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) if(opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - if (IS_ERR(rt)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, + inet->inet_sport, + sk->sk_protocol, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set_noref(skb, &rt->dst); -- cgit v1.2.3 From 1d28f42c1bd4bb2363d88df74d0128b4da135b4a Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Sat, 12 Mar 2011 00:29:39 -0500 Subject: net: Put flowi_* prefix on AF independent members of struct flowi I intend to turn struct flowi into a union of AF specific flowi structs. There will be a common structure that each variant includes first, much like struct sock_common. This is the first step to move in that direction. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 916152dbdce4..e35ca40df03b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1474,14 +1474,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, - .proto = sk->sk_protocol, - .flags = ip_reply_arg_flowi_flags(arg) }; + struct flowi fl = { + .flowi_oif = arg->bound_dev_if, + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl_ip_sport = tcp_hdr(skb)->dest, + .fl_ip_dport = tcp_hdr(skb)->source, + .flowi_proto = sk->sk_protocol, + .flowi_flags = ip_reply_arg_flowi_flags(arg), + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_key(sock_net(sk), &fl); if (IS_ERR(rt)) -- cgit v1.2.3 From 6281dcc94a96bd73017b2baa8fa83925405109ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Sat, 12 Mar 2011 00:43:55 -0500 Subject: net: Make flowi ports AF dependent. Create two sets of port member accessors, one set prefixed by fl4_* and the other prefixed by fl6_* This will let us to create AF optimal flow instances. It will work because every context in which we access the ports, we have to be fully aware of which AF the flowi is anyways. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e35ca40df03b..67e5f7130322 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1479,8 +1479,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .fl4_dst = daddr, .fl4_src = rt->rt_spec_dst, .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, + .fl4_sport = tcp_hdr(skb)->dest, + .fl4_dport = tcp_hdr(skb)->source, .flowi_proto = sk->sk_protocol, .flowi_flags = ip_reply_arg_flowi_flags(arg), }; -- cgit v1.2.3 From 9d6ec938019c6b16cb9ec96598ebe8f20de435fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Sat, 12 Mar 2011 01:12:47 -0500 Subject: ipv4: Use flowi4 in public route lookup interfaces. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 67e5f7130322..2b9cc40397ee 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1474,18 +1474,18 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { - .flowi_oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl4_sport = tcp_hdr(skb)->dest, - .fl4_dport = tcp_hdr(skb)->source, - .flowi_proto = sk->sk_protocol, - .flowi_flags = ip_reply_arg_flowi_flags(arg), + struct flowi4 fl4 = { + .flowi4_oif = arg->bound_dev_if, + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .uli.ports.sport = tcp_hdr(skb)->dest, + .uli.ports.dport = tcp_hdr(skb)->source, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = ip_reply_arg_flowi_flags(arg), }; - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_key(sock_net(sk), &fl); + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) return; } -- cgit v1.2.3 From 9cce96df5b76691712dba22e83ff5efe900361e1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" <davem@davemloft.net> Date: Sat, 12 Mar 2011 03:00:33 -0500 Subject: net: Put fl4_* macros to struct flowi4 and use them again. Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2b9cc40397ee..67f241b97649 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1479,8 +1479,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .daddr = daddr, .saddr = rt->rt_spec_dst, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), - .uli.ports.sport = tcp_hdr(skb)->dest, - .uli.ports.dport = tcp_hdr(skb)->source, + .fl4_sport = tcp_hdr(skb)->dest, + .fl4_dport = tcp_hdr(skb)->source, .flowi4_proto = sk->sk_protocol, .flowi4_flags = ip_reply_arg_flowi_flags(arg), }; -- cgit v1.2.3