diff options
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r-- | drivers/net/bonding/bond_main.c | 330 |
1 files changed, 201 insertions, 129 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6f85cc31f8a2..6909becb10f6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1590,15 +1590,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) case BOND_MODE_TLB: case BOND_MODE_ALB: new_slave->state = BOND_STATE_ACTIVE; - if ((!bond->curr_active_slave) && - (new_slave->link != BOND_LINK_DOWN)) { - /* first slave or no active slave yet, and this link - * is OK, so make this interface the active one - */ - bond_change_active_slave(bond, new_slave); - } else { - bond_set_slave_inactive_flags(new_slave); - } + bond_set_slave_inactive_flags(new_slave); break; default: dprintk("This slave is always active in trunk mode\n"); @@ -1754,9 +1746,23 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) bond_alb_deinit_slave(bond, slave); } - if (oldcurrent == slave) + if (oldcurrent == slave) { + /* + * Note that we hold RTNL over this sequence, so there + * is no concern that another slave add/remove event + * will interfere. + */ + write_unlock_bh(&bond->lock); + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + write_lock_bh(&bond->lock); + } + if (bond->slave_cnt == 0) { bond_set_carrier(bond); @@ -1840,9 +1846,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) */ void bond_destroy(struct bonding *bond) { + unregister_netdevice(bond->dev); bond_deinit(bond->dev); bond_destroy_sysfs_entry(bond); - unregister_netdevice(bond->dev); } /* @@ -2012,16 +2018,19 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi return -EINVAL; } - write_lock_bh(&bond->lock); + read_lock(&bond->lock); + read_lock(&bond->curr_slave_lock); old_active = bond->curr_active_slave; + read_unlock(&bond->curr_slave_lock); + new_active = bond_get_slave_by_dev(bond, slave_dev); /* * Changing to the current active: do nothing; return success. */ if (new_active && (new_active == old_active)) { - write_unlock_bh(&bond->lock); + read_unlock(&bond->lock); return 0; } @@ -2029,12 +2038,14 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi (old_active) && (new_active->link == BOND_LINK_UP) && IS_UP(new_active->dev)) { + write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, new_active); + write_unlock_bh(&bond->curr_slave_lock); } else { res = -EINVAL; } - write_unlock_bh(&bond->lock); + read_unlock(&bond->lock); return res; } @@ -2046,9 +2057,9 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) info->bond_mode = bond->params.mode; info->miimon = bond->params.miimon; - read_lock_bh(&bond->lock); + read_lock(&bond->lock); info->num_slaves = bond->slave_cnt; - read_unlock_bh(&bond->lock); + read_unlock(&bond->lock); return 0; } @@ -2063,7 +2074,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in return -ENODEV; } - read_lock_bh(&bond->lock); + read_lock(&bond->lock); bond_for_each_slave(bond, slave, i) { if (i == (int)info->slave_id) { @@ -2072,7 +2083,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in } } - read_unlock_bh(&bond->lock); + read_unlock(&bond->lock); if (found) { strcpy(info->slave_name, slave->dev->name); @@ -2088,26 +2099,25 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in /*-------------------------------- Monitoring -------------------------------*/ -/* this function is called regularly to monitor each slave's link. */ -void bond_mii_monitor(struct net_device *bond_dev) +/* + * if !have_locks, return nonzero if a failover is necessary. if + * have_locks, do whatever failover activities are needed. + * + * This is to separate the inspection and failover steps for locking + * purposes; failover requires rtnl, but acquiring it for every + * inspection is undesirable, so a wrapper first does inspection, and + * the acquires the necessary locks and calls again to perform + * failover if needed. Since all locks are dropped, a complete + * restart is needed between calls. + */ +static int __bond_mii_monitor(struct bonding *bond, int have_locks) { - struct bonding *bond = bond_dev->priv; struct slave *slave, *oldcurrent; int do_failover = 0; - int delta_in_ticks; int i; - read_lock(&bond->lock); - - delta_in_ticks = (bond->params.miimon * HZ) / 1000; - - if (bond->kill_timers) { + if (bond->slave_cnt == 0) goto out; - } - - if (bond->slave_cnt == 0) { - goto re_arm; - } /* we will try to read the link status of each of our slaves, and * set their IFF_RUNNING flag appropriately. For each slave not @@ -2141,7 +2151,11 @@ void bond_mii_monitor(struct net_device *bond_dev) switch (slave->link) { case BOND_LINK_UP: /* the link was up */ if (link_state == BMSR_LSTATUS) { - /* link stays up, nothing more to do */ + if (!oldcurrent) { + if (!have_locks) + return 1; + do_failover = 1; + } break; } else { /* link going down */ slave->link = BOND_LINK_FAIL; @@ -2156,7 +2170,7 @@ void bond_mii_monitor(struct net_device *bond_dev) ": %s: link status down for %s " "interface %s, disabling it in " "%d ms.\n", - bond_dev->name, + bond->dev->name, IS_UP(slave_dev) ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) ? ((slave == oldcurrent) @@ -2174,6 +2188,9 @@ void bond_mii_monitor(struct net_device *bond_dev) if (link_state != BMSR_LSTATUS) { /* link stays down */ if (slave->delay <= 0) { + if (!have_locks) + return 1; + /* link down for too long time */ slave->link = BOND_LINK_DOWN; @@ -2189,7 +2206,7 @@ void bond_mii_monitor(struct net_device *bond_dev) ": %s: link status definitely " "down for interface %s, " "disabling it\n", - bond_dev->name, + bond->dev->name, slave_dev->name); /* notify ad that the link status has changed */ @@ -2215,7 +2232,7 @@ void bond_mii_monitor(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: link status up again after %d " "ms for interface %s.\n", - bond_dev->name, + bond->dev->name, (bond->params.downdelay - slave->delay) * bond->params.miimon, slave_dev->name); } @@ -2235,7 +2252,7 @@ void bond_mii_monitor(struct net_device *bond_dev) ": %s: link status up for " "interface %s, enabling it " "in %d ms.\n", - bond_dev->name, + bond->dev->name, slave_dev->name, bond->params.updelay * bond->params.miimon); } @@ -2251,12 +2268,15 @@ void bond_mii_monitor(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: link status down again after %d " "ms for interface %s.\n", - bond_dev->name, + bond->dev->name, (bond->params.updelay - slave->delay) * bond->params.miimon, slave_dev->name); } else { /* link stays up */ if (slave->delay == 0) { + if (!have_locks) + return 1; + /* now the link has been up for long time enough */ slave->link = BOND_LINK_UP; slave->jiffies = jiffies; @@ -2275,7 +2295,7 @@ void bond_mii_monitor(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: link status definitely " "up for interface %s.\n", - bond_dev->name, + bond->dev->name, slave_dev->name); /* notify ad that the link status has changed */ @@ -2301,7 +2321,7 @@ void bond_mii_monitor(struct net_device *bond_dev) /* Should not happen */ printk(KERN_ERR DRV_NAME ": %s: Error: %s Illegal value (link=%d)\n", - bond_dev->name, + bond->dev->name, slave->dev->name, slave->link); goto out; @@ -2322,22 +2342,52 @@ void bond_mii_monitor(struct net_device *bond_dev) } /* end of for */ if (do_failover) { - write_lock(&bond->curr_slave_lock); + ASSERT_RTNL(); + + write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock(&bond->curr_slave_lock); + write_unlock_bh(&bond->curr_slave_lock); + } else bond_set_carrier(bond); -re_arm: - if (bond->params.miimon) { - mod_timer(&bond->mii_timer, jiffies + delta_in_ticks); - } out: - read_unlock(&bond->lock); + return 0; } +/* + * bond_mii_monitor + * + * Really a wrapper that splits the mii monitor into two phases: an + * inspection, then (if inspection indicates something needs to be + * done) an acquisition of appropriate locks followed by another pass + * to implement whatever link state changes are indicated. + */ +void bond_mii_monitor(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + mii_work.work); + unsigned long delay; + + read_lock(&bond->lock); + if (bond->kill_timers) { + read_unlock(&bond->lock); + return; + } + if (__bond_mii_monitor(bond, 0)) { + read_unlock(&bond->lock); + rtnl_lock(); + read_lock(&bond->lock); + __bond_mii_monitor(bond, 1); + rtnl_unlock(); + } + + delay = ((bond->params.miimon * HZ) / 1000) ? : 1; + read_unlock(&bond->lock); + queue_delayed_work(bond->wq, &bond->mii_work, delay); +} static __be32 bond_glean_dev_ip(struct net_device *dev) { @@ -2636,9 +2686,10 @@ out: * arp is transmitted to generate traffic. see activebackup_arp_monitor for * arp monitoring in active backup mode. */ -void bond_loadbalance_arp_mon(struct net_device *bond_dev) +void bond_loadbalance_arp_mon(struct work_struct *work) { - struct bonding *bond = bond_dev->priv; + struct bonding *bond = container_of(work, struct bonding, + arp_work.work); struct slave *slave, *oldcurrent; int do_failover = 0; int delta_in_ticks; @@ -2685,13 +2736,13 @@ void bond_loadbalance_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: link status definitely " "up for interface %s, ", - bond_dev->name, + bond->dev->name, slave->dev->name); do_failover = 1; } else { printk(KERN_INFO DRV_NAME ": %s: interface %s is now up\n", - bond_dev->name, + bond->dev->name, slave->dev->name); } } @@ -2715,7 +2766,7 @@ void bond_loadbalance_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: interface %s is now down.\n", - bond_dev->name, + bond->dev->name, slave->dev->name); if (slave == oldcurrent) { @@ -2737,17 +2788,19 @@ void bond_loadbalance_arp_mon(struct net_device *bond_dev) } if (do_failover) { - write_lock(&bond->curr_slave_lock); + rtnl_lock(); + write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); - write_unlock(&bond->curr_slave_lock); + write_unlock_bh(&bond->curr_slave_lock); + rtnl_unlock(); + } re_arm: - if (bond->params.arp_interval) { - mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); - } + if (bond->params.arp_interval) + queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); out: read_unlock(&bond->lock); } @@ -2767,9 +2820,10 @@ out: * may have received. * see loadbalance_arp_monitor for arp monitoring in load balancing mode */ -void bond_activebackup_arp_mon(struct net_device *bond_dev) +void bond_activebackup_arp_mon(struct work_struct *work) { - struct bonding *bond = bond_dev->priv; + struct bonding *bond = container_of(work, struct bonding, + arp_work.work); struct slave *slave; int delta_in_ticks; int i; @@ -2798,7 +2852,9 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) slave->link = BOND_LINK_UP; - write_lock(&bond->curr_slave_lock); + rtnl_lock(); + + write_lock_bh(&bond->curr_slave_lock); if ((!bond->curr_active_slave) && ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) { @@ -2821,18 +2877,19 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: %s is up and now the " "active interface\n", - bond_dev->name, + bond->dev->name, slave->dev->name); netif_carrier_on(bond->dev); } else { printk(KERN_INFO DRV_NAME ": %s: backup interface %s is " "now up\n", - bond_dev->name, + bond->dev->name, slave->dev->name); } - write_unlock(&bond->curr_slave_lock); + write_unlock_bh(&bond->curr_slave_lock); + rtnl_unlock(); } } else { read_lock(&bond->curr_slave_lock); @@ -2864,7 +2921,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: backup interface %s is now down\n", - bond_dev->name, + bond->dev->name, slave->dev->name); } else { read_unlock(&bond->curr_slave_lock); @@ -2899,15 +2956,18 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: link status down for active interface " "%s, disabling it\n", - bond_dev->name, + bond->dev->name, slave->dev->name); - write_lock(&bond->curr_slave_lock); + rtnl_lock(); + write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); slave = bond->curr_active_slave; - write_unlock(&bond->curr_slave_lock); + write_unlock_bh(&bond->curr_slave_lock); + + rtnl_unlock(); bond->current_arp_slave = slave; @@ -2921,14 +2981,17 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: changing from interface %s to primary " "interface %s\n", - bond_dev->name, + bond->dev->name, slave->dev->name, bond->primary_slave->dev->name); /* primary is up so switch to it */ - write_lock(&bond->curr_slave_lock); + rtnl_lock(); + write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, bond->primary_slave); - write_unlock(&bond->curr_slave_lock); + write_unlock_bh(&bond->curr_slave_lock); + + rtnl_unlock(); slave = bond->primary_slave; slave->jiffies = jiffies; @@ -2985,7 +3048,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) printk(KERN_INFO DRV_NAME ": %s: backup interface %s is " "now down.\n", - bond_dev->name, + bond->dev->name, slave->dev->name); } } @@ -2994,7 +3057,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) re_arm: if (bond->params.arp_interval) { - mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); + queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); } out: read_unlock(&bond->lock); @@ -3015,7 +3078,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) /* make sure the bond won't be taken away */ read_lock(&dev_base_lock); - read_lock_bh(&bond->lock); + read_lock(&bond->lock); if (*pos == 0) { return SEQ_START_TOKEN; @@ -3049,7 +3112,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v) { struct bonding *bond = seq->private; - read_unlock_bh(&bond->lock); + read_unlock(&bond->lock); read_unlock(&dev_base_lock); } @@ -3582,15 +3645,11 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, static int bond_open(struct net_device *bond_dev) { struct bonding *bond = bond_dev->priv; - struct timer_list *mii_timer = &bond->mii_timer; - struct timer_list *arp_timer = &bond->arp_timer; bond->kill_timers = 0; if ((bond->params.mode == BOND_MODE_TLB) || (bond->params.mode == BOND_MODE_ALB)) { - struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); - /* bond_alb_initialize must be called before the timer * is started. */ @@ -3599,44 +3658,31 @@ static int bond_open(struct net_device *bond_dev) return -1; } - init_timer(alb_timer); - alb_timer->expires = jiffies + 1; - alb_timer->data = (unsigned long)bond; - alb_timer->function = (void *)&bond_alb_monitor; - add_timer(alb_timer); + INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); + queue_delayed_work(bond->wq, &bond->alb_work, 0); } if (bond->params.miimon) { /* link check interval, in milliseconds. */ - init_timer(mii_timer); - mii_timer->expires = jiffies + 1; - mii_timer->data = (unsigned long)bond_dev; - mii_timer->function = (void *)&bond_mii_monitor; - add_timer(mii_timer); + INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); + queue_delayed_work(bond->wq, &bond->mii_work, 0); } if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ - init_timer(arp_timer); - arp_timer->expires = jiffies + 1; - arp_timer->data = (unsigned long)bond_dev; - if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { - arp_timer->function = (void *)&bond_activebackup_arp_mon; - } else { - arp_timer->function = (void *)&bond_loadbalance_arp_mon; - } + if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) + INIT_DELAYED_WORK(&bond->arp_work, + bond_activebackup_arp_mon); + else + INIT_DELAYED_WORK(&bond->arp_work, + bond_loadbalance_arp_mon); + + queue_delayed_work(bond->wq, &bond->arp_work, 0); if (bond->params.arp_validate) bond_register_arp(bond); - - add_timer(arp_timer); } if (bond->params.mode == BOND_MODE_8023AD) { - struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); - init_timer(ad_timer); - ad_timer->expires = jiffies + 1; - ad_timer->data = (unsigned long)bond; - ad_timer->function = (void *)&bond_3ad_state_machine_handler; - add_timer(ad_timer); - + INIT_DELAYED_WORK(&bond->ad_work, bond_alb_monitor); + queue_delayed_work(bond->wq, &bond->ad_work, 0); /* register to receive LACPDUs */ bond_register_lacpdu(bond); } @@ -3664,25 +3710,21 @@ static int bond_close(struct net_device *bond_dev) write_unlock_bh(&bond->lock); - /* del_timer_sync must run without holding the bond->lock - * because a running timer might be trying to hold it too - */ - if (bond->params.miimon) { /* link check interval, in milliseconds. */ - del_timer_sync(&bond->mii_timer); + cancel_delayed_work(&bond->mii_work); } if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ - del_timer_sync(&bond->arp_timer); + cancel_delayed_work(&bond->arp_work); } switch (bond->params.mode) { case BOND_MODE_8023AD: - del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); + cancel_delayed_work(&bond->ad_work); break; case BOND_MODE_TLB: case BOND_MODE_ALB: - del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); + cancel_delayed_work(&bond->alb_work); break; default: break; @@ -3779,13 +3821,13 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd if (mii->reg_num == 1) { struct bonding *bond = bond_dev->priv; mii->val_out = 0; - read_lock_bh(&bond->lock); + read_lock(&bond->lock); read_lock(&bond->curr_slave_lock); if (netif_carrier_ok(bond->dev)) { mii->val_out = BMSR_LSTATUS; } read_unlock(&bond->curr_slave_lock); - read_unlock_bh(&bond->lock); + read_unlock(&bond->lock); } return 0; @@ -4077,8 +4119,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev { struct bonding *bond = bond_dev->priv; struct slave *slave, *start_at; - int i; - int res = 1; + int i, slave_no, res = 1; read_lock(&bond->lock); @@ -4086,29 +4127,29 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev goto out; } - read_lock(&bond->curr_slave_lock); - slave = start_at = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); + /* + * Concurrent TX may collide on rr_tx_counter; we accept that + * as being rare enough not to justify using an atomic op here + */ + slave_no = bond->rr_tx_counter++ % bond->slave_cnt; - if (!slave) { - goto out; + bond_for_each_slave(bond, slave, i) { + slave_no--; + if (slave_no < 0) { + break; + } } + start_at = slave; bond_for_each_slave_from(bond, slave, i, start_at) { if (IS_UP(slave->dev) && (slave->link == BOND_LINK_UP) && (slave->state == BOND_STATE_ACTIVE)) { res = bond_dev_queue_xmit(bond, skb, slave->dev); - - write_lock(&bond->curr_slave_lock); - bond->curr_active_slave = slave->next; - write_unlock(&bond->curr_slave_lock); - break; } } - out: if (res) { /* no suitable interface, frame not sent */ @@ -4340,6 +4381,10 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) bond->params = *params; /* copy params struct */ + bond->wq = create_singlethread_workqueue(bond_dev->name); + if (!bond->wq) + return -ENOMEM; + /* Initialize pointers */ bond->first_slave = NULL; bond->curr_active_slave = NULL; @@ -4428,8 +4473,8 @@ static void bond_free_all(void) bond_mc_list_destroy(bond); /* Release the bonded slaves */ bond_release_all(bond_dev); - bond_deinit(bond_dev); unregister_netdevice(bond_dev); + bond_deinit(bond_dev); } #ifdef CONFIG_PROC_FS @@ -4826,10 +4871,32 @@ out_rtnl: return res; } +static void bond_work_cancel_all(struct bonding *bond) +{ + write_lock_bh(&bond->lock); + bond->kill_timers = 1; + write_unlock_bh(&bond->lock); + + if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) + cancel_delayed_work(&bond->mii_work); + + if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) + cancel_delayed_work(&bond->arp_work); + + if (bond->params.mode == BOND_MODE_ALB && + delayed_work_pending(&bond->alb_work)) + cancel_delayed_work(&bond->alb_work); + + if (bond->params.mode == BOND_MODE_8023AD && + delayed_work_pending(&bond->ad_work)) + cancel_delayed_work(&bond->ad_work); +} + static int __init bonding_init(void) { int i; int res; + struct bonding *bond, *nxt; printk(KERN_INFO "%s", version); @@ -4856,6 +4923,11 @@ static int __init bonding_init(void) goto out; err: + list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { + bond_work_cancel_all(bond); + destroy_workqueue(bond->wq); + } + rtnl_lock(); bond_free_all(); bond_destroy_sysfs(); |