diff options
| author | Eric Dumazet <edumazet@google.com> | 2012-08-22 17:19:46 +0000 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2012-08-22 21:50:36 -0700 | 
| commit | 0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 (patch) | |
| tree | 725026e2192e57b98366947ab53c73689ca040a8 | |
| parent | bf277b0ccea7d2422b85e232017ce3fddbe9c49c (diff) | |
net: remove delay at device dismantle
I noticed extra one second delay in device dismantle, tracked down to
a call to dst_dev_event() while some call_rcu() are still in RCU queues.
These call_rcu() were posted by rt_free(struct rtable *rt) calls.
We then wait a little (but one second) in netdev_wait_allrefs() before
kicking again NETDEV_UNREGISTER.
As the call_rcu() are now completed, dst_dev_event() can do the needed
device swap on busy dst.
To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called
after a rcu_barrier(), but outside of RTNL lock.
Use NETDEV_UNREGISTER_FINAL with care !
Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL
Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after
IP cache removal.
With help from Gao feng
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/linux/netdevice.h | 2 | ||||
| -rw-r--r-- | net/core/dev.c | 22 | ||||
| -rw-r--r-- | net/core/dst.c | 2 | ||||
| -rw-r--r-- | net/core/fib_rules.c | 3 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 2 | ||||
| -rw-r--r-- | net/ipv4/devinet.c | 6 | ||||
| -rw-r--r-- | net/ipv4/fib_frontend.c | 8 | ||||
| -rw-r--r-- | net/ipv6/addrconf.c | 6 | 
8 files changed, 27 insertions, 24 deletions
| diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4936f09a9333..9ad7fa8c10e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1553,7 +1553,7 @@ struct packet_type {  #define NETDEV_PRE_TYPE_CHANGE	0x000E  #define NETDEV_POST_TYPE_CHANGE	0x000F  #define NETDEV_POST_INIT	0x0010 -#define NETDEV_UNREGISTER_BATCH 0x0011 +#define NETDEV_UNREGISTER_FINAL 0x0011  #define NETDEV_RELEASE		0x0012  #define NETDEV_NOTIFY_PEERS	0x0013  #define NETDEV_JOIN		0x0014 diff --git a/net/core/dev.c b/net/core/dev.c index 088923fe4066..0640d2a859c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1406,7 +1406,6 @@ rollback:  				nb->notifier_call(nb, NETDEV_DOWN, dev);  			}  			nb->notifier_call(nb, NETDEV_UNREGISTER, dev); -			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);  		}  	} @@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)  				nb->notifier_call(nb, NETDEV_DOWN, dev);  			}  			nb->notifier_call(nb, NETDEV_UNREGISTER, dev); -			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);  		}  	}  unlock: @@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);  int call_netdevice_notifiers(unsigned long val, struct net_device *dev)  { -	ASSERT_RTNL(); +	if (val != NETDEV_UNREGISTER_FINAL) +		ASSERT_RTNL();  	return raw_notifier_call_chain(&netdev_chain, val, dev);  }  EXPORT_SYMBOL(call_netdevice_notifiers); @@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head)  		netdev_unregister_kobject(dev);  	} -	/* Process any work delayed until the end of the batch */ -	dev = list_first_entry(head, struct net_device, unreg_list); -	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); -  	synchronize_net();  	list_for_each_entry(dev, head, unreg_list) @@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev)  			/* Rebroadcast unregister notification */  			call_netdevice_notifiers(NETDEV_UNREGISTER, dev); -			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users -			 * should have already handle it the first time */ - +			rcu_barrier(); +			call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);  			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,  				     &dev->state)) {  				/* We must not have linkwatch events @@ -5851,9 +5845,8 @@ void netdev_run_todo(void)  	__rtnl_unlock(); -	/* Wait for rcu callbacks to finish before attempting to drain -	 * the device list.  This usually avoids a 250ms wait. -	 */ + +	/* Wait for rcu callbacks to finish before next phase */  	if (!list_empty(&list))  		rcu_barrier(); @@ -5862,6 +5855,8 @@ void netdev_run_todo(void)  			= list_first_entry(&list, struct net_device, todo_list);  		list_del(&dev->todo_list); +		call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); +  		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {  			pr_err("network todo '%s' but state %d\n",  			       dev->name, dev->reg_state); @@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char  	   the device is just moving and can keep their slaves up.  	*/  	call_netdevice_notifiers(NETDEV_UNREGISTER, dev); -	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);  	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);  	/* diff --git a/net/core/dst.c b/net/core/dst.c index 56d63612e1e4..f6593d238e9a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,  	struct dst_entry *dst, *last = NULL;  	switch (event) { -	case NETDEV_UNREGISTER: +	case NETDEV_UNREGISTER_FINAL:  	case NETDEV_DOWN:  		mutex_lock(&dst_gc_mutex);  		for (dst = dst_busy_list; dst; dst = dst->next) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..585093755c23 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,  	struct net *net = dev_net(dev);  	struct fib_rules_ops *ops; -	ASSERT_RTNL();  	switch (event) {  	case NETDEV_REGISTER: +		ASSERT_RTNL();  		list_for_each_entry(ops, &net->rules_ops, list)  			attach_rules(&ops->rules_list, dev);  		break;  	case NETDEV_UNREGISTER: +		ASSERT_RTNL();  		list_for_each_entry(ops, &net->rules_ops, list)  			detach_rules(&ops->rules_list, dev);  		break; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34d975b0f277..c64efcff8078 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi  	case NETDEV_PRE_TYPE_CHANGE:  	case NETDEV_GOING_DOWN:  	case NETDEV_UNREGISTER: -	case NETDEV_UNREGISTER_BATCH: +	case NETDEV_UNREGISTER_FINAL:  	case NETDEV_RELEASE:  	case NETDEV_JOIN:  		break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index adf273f8ad2e..6a5e6e4b142c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,  			 void *ptr)  {  	struct net_device *dev = ptr; -	struct in_device *in_dev = __in_dev_get_rtnl(dev); +	struct in_device *in_dev; +	if (event == NETDEV_UNREGISTER_FINAL) +		goto out; + +	in_dev = __in_dev_get_rtnl(dev);  	ASSERT_RTNL();  	if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7f073a38c87d..fd7d9ae64f16 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,  static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)  {  	struct net_device *dev = ptr; -	struct in_device *in_dev = __in_dev_get_rtnl(dev); +	struct in_device *in_dev;  	struct net *net = dev_net(dev);  	if (event == NETDEV_UNREGISTER) { @@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo  		return NOTIFY_DONE;  	} -	if (!in_dev) +	if (event == NETDEV_UNREGISTER_FINAL)  		return NOTIFY_DONE; +	in_dev = __in_dev_get_rtnl(dev); +  	switch (event) {  	case NETDEV_UP:  		for_ifa(in_dev) { @@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo  	case NETDEV_CHANGE:  		rt_cache_flush(dev_net(dev), 0);  		break; -	case NETDEV_UNREGISTER_BATCH: -		break;  	}  	return NOTIFY_DONE;  } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6bc85f7c31e3..e581009cb09e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,  			   void *data)  {  	struct net_device *dev = (struct net_device *) data; -	struct inet6_dev *idev = __in6_dev_get(dev); +	struct inet6_dev *idev;  	int run_pending = 0;  	int err; +	if (event == NETDEV_UNREGISTER_FINAL) +		return NOTIFY_DONE; + +	idev = __in6_dev_get(dev);  	switch (event) {  	case NETDEV_REGISTER:  		if (!idev && dev->mtu >= IPV6_MIN_MTU) { | 
