From aa395145165cb06a0d0885221bbe0ce4a564391d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 13:03:51 +0000 Subject: net: sk_sleep() helper Define a new function to return the waitqueue of a "struct sock". static inline wait_queue_head_t *sk_sleep(struct sock *sk) { return sk->sk_sleep; } Change all read occurrences of sk_sleep by a call to this function. Needed for a future RCU conversion. sk_sleep wont be a field directly available. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 43265207d463..20a17938c62b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -868,8 +868,8 @@ static void tun_sock_write_space(struct sock *sk) if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); tun = tun_sk(sk)->tun; -- cgit v1.2.3 From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Apr 2010 11:01:49 +0000 Subject: net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 20a17938c62b..e525a6cf5587 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -109,7 +109,7 @@ struct tun_struct { struct tap_filter txflt; struct socket socket; - + struct socket_wq wq; #ifdef TUN_DEBUG int debug; #endif @@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev) /* Inform the methods they need to stop using the dev. */ if (tfile) { - wake_up_all(&tun->socket.wait); + wake_up_all(&tun->wq.wait); if (atomic_dec_and_test(&tfile->count)) __tun_detach(tun); } @@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tun->socket.wait, POLLIN | + wake_up_interruptible_poll(&tun->wq.wait, POLLIN | POLLRDNORM | POLLRDBAND); return NETDEV_TX_OK; @@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); - poll_wait(file, &tun->socket.wait, wait); + poll_wait(file, &tun->wq.wait, wait); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); - add_wait_queue(&tun->socket.wait, &wait); + add_wait_queue(&tun->wq.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; @@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, } current->state = TASK_RUNNING; - remove_wait_queue(&tun->socket.wait, &wait); + remove_wait_queue(&tun->wq.wait, &wait); return ret; } @@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { static void tun_sock_write_space(struct sock *sk) { struct tun_struct *tun; + wait_queue_head_t *wqueue; if (!sock_writeable(sk)) return; @@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk) if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_sync_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); tun = tun_sk(sk)->tun; @@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!sk) goto err_free_dev; - init_waitqueue_head(&tun->socket.wait); + tun->socket.wq = &tun->wq; + init_waitqueue_head(&tun->wq.wait); tun->socket.ops = &tun_socket_ops; sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; -- cgit v1.2.3 From d9d52b5178af586d679c1052fb161ee05ea2e83f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 17 Mar 2010 17:45:01 +0200 Subject: tun: add ioctl to modify vnet header size virtio added mergeable buffers mode where 2 bytes of extra info is put after vnet header but before actual data (tun does not need this data). In hindsight, it would have been better to add the new info *before* the packet: as it is, users need a lot of tricky code to skip the extra 2 bytes in the middle of the iovec, and in fact applications seem to get it wrong, and only work with specific iovec layout. The fact we might need to split iovec also means we might in theory overflow iovec max size. This patch adds a simpler way for applications to handle this, and future proofs the interface against further extensions, by making the size of the virtio net header configurable from userspace. As a result, tun driver will simply skip the extra 2 bytes on both input and output. Signed-off-by: Michael S. Tsirkin Acked-by: David S. Miller --- drivers/net/tun.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e525a6cf5587..6b150c072a41 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -110,6 +110,9 @@ struct tun_struct { struct tap_filter txflt; struct socket socket; struct socket_wq wq; + + int vnet_hdr_sz; + #ifdef TUN_DEBUG int debug; #endif @@ -563,7 +566,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, } if (tun->flags & TUN_VNET_HDR) { - if ((len -= sizeof(gso)) > count) + if ((len -= tun->vnet_hdr_sz) > count) return -EINVAL; if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) @@ -575,7 +578,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, if (gso.hdr_len > len) return -EINVAL; - offset += sizeof(gso); + offset += tun->vnet_hdr_sz; } if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) { @@ -718,7 +721,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, if (tun->flags & TUN_VNET_HDR) { struct virtio_net_hdr gso = { 0 }; /* no info leak */ - if ((len -= sizeof(gso)) < 0) + if ((len -= tun->vnet_hdr_sz) < 0) return -EINVAL; if (skb_is_gso(skb)) { @@ -749,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total, sizeof(gso)))) return -EFAULT; - total += sizeof(gso); + total += tun->vnet_hdr_sz; } len = min_t(int, skb->len, len); @@ -1035,6 +1038,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->dev = dev; tun->flags = flags; tun->txflt.count = 0; + tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); err = -ENOMEM; sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto); @@ -1177,6 +1181,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, struct sock_fprog fprog; struct ifreq ifr; int sndbuf; + int vnet_hdr_sz; int ret; if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) @@ -1322,6 +1327,25 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun->socket.sk->sk_sndbuf = sndbuf; break; + case TUNGETVNETHDRSZ: + vnet_hdr_sz = tun->vnet_hdr_sz; + if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) + ret = -EFAULT; + break; + + case TUNSETVNETHDRSZ: + if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { + ret = -EFAULT; + break; + } + if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { + ret = -EINVAL; + break; + } + + tun->vnet_hdr_sz = vnet_hdr_sz; + break; + case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; -- cgit v1.2.3 From 1ae5dc342ac78d7a42965fd1f323815f6f5ef2c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 May 2010 05:01:31 -0700 Subject: net: trans_start cleanups Now that core network takes care of trans_start updates, dont do it in drivers themselves, if possible. Drivers can avoid one cache miss (on dev->trans_start) in their start_xmit() handler. Exceptions are NETIF_F_LLTX drivers Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 6b150c072a41..dbdfb1ff7ca0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -396,7 +396,6 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Enqueue packet */ skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb); - dev->trans_start = jiffies; /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) -- cgit v1.2.3 From a4b770972b8f819e408d7cc3ae9637e15bff62f6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 14 May 2010 00:19:28 -0700 Subject: drivers/net: Remove unnecessary returns from void function()s This patch removes from drivers/net/ all the unnecessary return; statements that precede the last closing brace of void functions. It does not remove the returns that are immediately preceded by a label as gcc doesn't like that. It also does not remove null void functions with return. Done via: $ grep -rP --include=*.[ch] -l "return;\n}" net/ | \ xargs perl -i -e 'local $/ ; while (<>) { s/\n[ \t\n]+return;\n}/\n}/g; print; }' with some cleanups by hand. Compile tested x86 allmodconfig only. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index dbdfb1ff7ca0..01b5cfcfa870 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -417,7 +417,6 @@ static void tun_net_mclist(struct net_device *dev) * _rx_ path and has nothing to do with the _tx_ path. * In rx path we always accept everything userspace gives us. */ - return; } #define MIN_MTU 68 -- cgit v1.2.3 From ee289b6440c3b0ccb9459495783e8c299bec6604 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 17 May 2010 22:47:34 -0700 Subject: drivers/net: remove useless semicolons switch and while statements don't need semicolons at end of statement [ Fixup minor conflicts with recent wimax merge... -DaveM ] Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 01b5cfcfa870..97b25533e5fb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1367,7 +1367,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, default: ret = -EINVAL; break; - }; + } unlock: rtnl_unlock(); -- cgit v1.2.3