summaryrefslogtreecommitdiff
path: root/net/ipv6/udp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-06 09:38:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-06 09:38:14 -0700
commitae045e2455429c418a418a3376301a9e5753a0a8 (patch)
treeb445bdeecd3f38aa0d0a29c9585cee49e4ccb0f1 /net/ipv6/udp.c
parentf4f142ed4ef835709c7e6d12eaca10d190bcebed (diff)
parentd247b6ab3ce6dd43665780865ec5fa145d9ab6bd (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Steady transitioning of the BPF instructure to a generic spot so all kernel subsystems can make use of it, from Alexei Starovoitov. 2) SFC driver supports busy polling, from Alexandre Rames. 3) Take advantage of hash table in UDP multicast delivery, from David Held. 4) Lighten locking, in particular by getting rid of the LRU lists, in inet frag handling. From Florian Westphal. 5) Add support for various RFC6458 control messages in SCTP, from Geir Ola Vaagland. 6) Allow to filter bridge forwarding database dumps by device, from Jamal Hadi Salim. 7) virtio-net also now supports busy polling, from Jason Wang. 8) Some low level optimization tweaks in pktgen from Jesper Dangaard Brouer. 9) Add support for ipv6 address generation modes, so that userland can have some input into the process. From Jiri Pirko. 10) Consolidate common TCP connection request code in ipv4 and ipv6, from Octavian Purdila. 11) New ARP packet logger in netfilter, from Pablo Neira Ayuso. 12) Generic resizable RCU hash table, with intial users in netlink and nftables. From Thomas Graf. 13) Maintain a name assignment type so that userspace can see where a network device name came from (enumerated by kernel, assigned explicitly by userspace, etc.) From Tom Gundersen. 14) Automatic flow label generation on transmit in ipv6, from Tom Herbert. 15) New packet timestamping facilities from Willem de Bruijn, meant to assist in measuring latencies going into/out-of the packet scheduler, latency from TCP data transmission to ACK, etc" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1536 commits) cxgb4 : Disable recursive mailbox commands when enabling vi net: reduce USB network driver config options. tg3: Modify tg3_tso_bug() to handle multiple TX rings amd-xgbe: Perform phy connect/disconnect at dev open/stop amd-xgbe: Use dma_set_mask_and_coherent to set DMA mask net: sun4i-emac: fix memory leak on bad packet sctp: fix possible seqlock seadlock in sctp_packet_transmit() Revert "net: phy: Set the driver when registering an MDIO bus device" cxgb4vf: Turn off SGE RX/TX Callback Timers and interrupts in PCI shutdown routine team: Simplify return path of team_newlink bridge: Update outdated comment on promiscuous mode net-timestamp: ACK timestamp for bytestreams net-timestamp: TCP timestamping net-timestamp: SCHED timestamp on entering packet scheduler net-timestamp: add key to disambiguate concurrent datagrams net-timestamp: move timestamp flags out of sk_flags net-timestamp: extend SCM_TIMESTAMPING ancillary data struct cxgb4i : Move stray CPL definitions to cxgb4 driver tcp: reduce spurious retransmits due to transient SACK reneging qlcnic: Initialize dcbnl_ops before register_netdev ...
Diffstat (limited to 'net/ipv6/udp.c')
-rw-r--r--net/ipv6/udp.c131
1 files changed, 63 insertions, 68 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7092ff78fd84..4836af8f582d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net,
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (addr_type == IPV6_ADDR_ANY &&
- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
@@ -473,7 +472,7 @@ try_again:
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
*addr_len = sizeof(*sin6);
}
@@ -534,11 +533,15 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct udphdr *uh = (struct udphdr*)(skb->data+offset);
struct sock *sk;
int err;
+ struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
+ sk = __udp6_lib_lookup(net, daddr, uh->dest,
saddr, uh->source, inet6_iif(skb), udptable);
- if (sk == NULL)
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
+ }
if (type == ICMPV6_PKT_TOOBIG) {
if (!ip6_sk_accept_pmtu(sk))
@@ -674,7 +677,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
@@ -703,43 +706,26 @@ drop:
return -1;
}
-static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
- __be16 loc_port, const struct in6_addr *loc_addr,
- __be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif)
+static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif, unsigned short hnum)
{
- struct hlist_nulls_node *node;
- unsigned short num = ntohs(loc_port);
-
- sk_nulls_for_each_from(sk, node) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (!net_eq(sock_net(sk), net))
- continue;
-
- if (udp_sk(sk)->udp_port_hash == num &&
- sk->sk_family == PF_INET6) {
- if (inet->inet_dport) {
- if (inet->inet_dport != rmt_port)
- continue;
- }
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
- continue;
-
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
- continue;
+ struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
- continue;
- }
- if (!inet6_mc_check(sk, loc_addr, rmt_addr))
- continue;
- return sk;
- }
- }
- return NULL;
+ if (!net_eq(sock_net(sk), net))
+ return false;
+
+ if (udp_sk(sk)->udp_port_hash != hnum ||
+ sk->sk_family != PF_INET6 ||
+ (inet->inet_dport && inet->inet_dport != rmt_port) ||
+ (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ return false;
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
+ return false;
+ return true;
}
static void flush_stack(struct sock **stack, unsigned int count,
@@ -763,6 +749,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
skb1 = NULL;
+ sock_put(sk);
}
if (unlikely(skb1))
kfree_skb(skb1);
@@ -788,43 +775,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
- struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
- int dif;
- unsigned int i, count = 0;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(uh->dest);
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
+ int dif = inet6_iif(skb);
+ unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+
+ if (use_hash2) {
+ hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ udp_table.mask;
+ hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+start_lookup:
+ hslot = &udp_table.hash2[hash2];
+ offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
+ }
spin_lock(&hslot->lock);
- sk = sk_nulls_head(&hslot->head);
- dif = inet6_iif(skb);
- sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- while (sk) {
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- if (uh->check || udp_sk(sk)->no_check6_rx)
+ sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
+ if (__udp_v6_is_mcast_sock(net, sk,
+ uh->dest, daddr,
+ uh->source, saddr,
+ dif, hnum) &&
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ (uh->check || udp_sk(sk)->no_check6_rx)) {
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
+ }
stack[count++] = sk;
-
- sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
- if (unlikely(count == ARRAY_SIZE(stack))) {
- if (!sk)
- break;
- flush_stack(stack, count, skb, ~0);
- count = 0;
+ sock_hold(sk);
}
}
- /*
- * before releasing the lock, we must take reference on sockets
- */
- for (i = 0; i < count; i++)
- sock_hold(stack[i]);
spin_unlock(&hslot->lock);
+ /* Also lookup *:port if we are using hash2 and haven't done so yet. */
+ if (use_hash2 && hash2 != hash2_any) {
+ hash2 = hash2_any;
+ goto start_lookup;
+ }
+
if (count) {
flush_stack(stack, count, skb, count - 1);
-
- for (i = 0; i < count; i++)
- sock_put(stack[i]);
} else {
kfree_skb(skb);
}