From 842509b8591fd9a40f5532a5f049bd29804af6d6 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Tue, 6 Apr 2010 05:39:52 +0000 Subject: socket: remove duplicate declaration of struct timespec struct timespec ts was alreay defined. Reuse the previously defined one and reduce the memory footprint on the stack by 16 bytes. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 769c386bd428..ae904b58d9f5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -619,10 +619,9 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); } else { - struct timespec ts; - skb_get_timestampns(skb, &ts); + skb_get_timestampns(skb, &ts[0]); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts), &ts); + sizeof(ts[0]), &ts[0]); } } -- cgit v1.2.3 From 989a2979205dd34269382b357e6d4b4b6956b889 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Apr 2010 09:55:35 +0000 Subject: fasync: RCU and fine grained locking kill_fasync() uses a central rwlock, candidate for RCU conversion, to avoid cache line ping pongs on SMP. fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short section instead during whole list scan. Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync() doesnt need its own implementation and can use fasync_helper(), to reduce code size and complexity. We can remove __kill_fasync() direct use in net/socket.c, and rename it to kill_fasync_rcu(). Signed-off-by: Eric Dumazet Cc: Paul E. McKenney Cc: Lai Jiangshan Signed-off-by: David S. Miller --- net/socket.c | 73 +++++++++--------------------------------------------------- 1 file changed, 11 insertions(+), 62 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 35bc198bbf68..9822081eab38 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp) * 1. fasync_list is modified only under process context socket lock * i.e. under semaphore. * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) - * or under socket lock. - * 3. fasync_list can be used from softirq context, so that - * modification under socket lock have to be enhanced with - * write_lock_bh(&sk->sk_callback_lock). - * --ANK (990710) + * or under socket lock */ static int sock_fasync(int fd, struct file *filp, int on) { - struct fasync_struct *fa, *fna = NULL, **prev; - struct socket *sock; - struct sock *sk; - - if (on) { - fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if (fna == NULL) - return -ENOMEM; - } - - sock = filp->private_data; + struct socket *sock = filp->private_data; + struct sock *sk = sock->sk; - sk = sock->sk; - if (sk == NULL) { - kfree(fna); + if (sk == NULL) return -EINVAL; - } lock_sock(sk); - spin_lock(&filp->f_lock); - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - spin_unlock(&filp->f_lock); - - prev = &(sock->fasync_list); + fasync_helper(fd, filp, on, &sock->fasync_list); - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) - if (fa->fa_file == filp) - break; - - if (on) { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd = fd; - write_unlock_bh(&sk->sk_callback_lock); - - kfree(fna); - goto out; - } - fna->fa_file = filp; - fna->fa_fd = fd; - fna->magic = FASYNC_MAGIC; - fna->fa_next = sock->fasync_list; - write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list = fna; + if (!sock->fasync_list) + sock_reset_flag(sk, SOCK_FASYNC); + else sock_set_flag(sk, SOCK_FASYNC); - write_unlock_bh(&sk->sk_callback_lock); - } else { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - *prev = fa->fa_next; - if (!sock->fasync_list) - sock_reset_flag(sk, SOCK_FASYNC); - write_unlock_bh(&sk->sk_callback_lock); - kfree(fa); - } - } -out: - release_sock(sock->sk); + release_sock(sk); return 0; } @@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case SOCK_WAKE_IO: call_kill: - __kill_fasync(sock->fasync_list, SIGIO, band); + kill_fasync(&sock->fasync_list, SIGIO, band); break; case SOCK_WAKE_URG: - __kill_fasync(sock->fasync_list, SIGURG, band); + kill_fasync(&sock->fasync_list, SIGURG, band); } return 0; } -- cgit v1.2.3 From 767dd03369ac18af58efdef0383d6eb986eab426 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Apr 2010 19:14:43 +0000 Subject: net: speedup sock_recv_ts_and_drops() sock_recv_ts_and_drops() is fat and slow (~ 4% of cpu time on some profiles) We can test all socket flags at once to make fast path fast again. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 9822081eab38..cb7c1f6c0d6e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -655,13 +655,13 @@ inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff sizeof(__u32), &skb->dropcount); } -void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); } -EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) -- cgit v1.2.3 From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Apr 2010 11:01:49 +0000 Subject: net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/socket.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index cb7c1f6c0d6e..dae8c6b84a09 100644 --- a/net/socket.c +++ b/net/socket.c @@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - init_waitqueue_head(&ei->socket.wait); + ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); + if (!ei->socket.wq) { + kmem_cache_free(sock_inode_cachep, ei); + return NULL; + } + init_waitqueue_head(&ei->socket.wq->wait); + ei->socket.wq->fasync_list = NULL; - ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; @@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } + +static void wq_free_rcu(struct rcu_head *head) +{ + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + + kfree(wq); +} + static void sock_destroy_inode(struct inode *inode) { - kmem_cache_free(sock_inode_cachep, - container_of(inode, struct socket_alloc, vfs_inode)); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) @@ -513,7 +529,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->fasync_list) + if (sock->wq->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on) lock_sock(sk); - fasync_helper(fd, filp, on, &sock->fasync_list); + fasync_helper(fd, filp, on, &sock->wq->fasync_list); - if (!sock->fasync_list) + if (!sock->wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); @@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on) return 0; } -/* This function may be called only under socket lock or callback_lock */ +/* This function may be called only under socket lock or callback_lock or rcu_lock */ int sock_wake_async(struct socket *sock, int how, int band) { - if (!sock || !sock->fasync_list) + struct socket_wq *wq; + + if (!sock) return -1; + rcu_read_lock(); + wq = rcu_dereference(sock->wq); + if (!wq || !wq->fasync_list) { + rcu_read_unlock(); + return -1; + } switch (how) { case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) @@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case SOCK_WAKE_IO: call_kill: - kill_fasync(&sock->fasync_list, SIGIO, band); + kill_fasync(&wq->fasync_list, SIGIO, band); break; case SOCK_WAKE_URG: - kill_fasync(&sock->fasync_list, SIGURG, band); + kill_fasync(&wq->fasync_list, SIGURG, band); } + rcu_read_unlock(); return 0; } -- cgit v1.2.3 From ccbd6a5a4f76e821ed36f69fdaf59817c3a7f18e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 14 May 2010 10:58:26 +0000 Subject: net: Remove unnecessary semicolons after switch statements Also added an explicit break; to avoid a fallthrough in net/ipv4/tcp_input.c Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index dae8c6b84a09..f9f7d0872cac 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2615,7 +2615,7 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, uifr); default: return -EINVAL; - }; + } } static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, -- cgit v1.2.3