diff options
author | David S. Miller <davem@davemloft.net> | 2017-02-13 22:23:23 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-02-13 22:23:23 -0500 |
commit | 417d18d38bc63201f6442810476048d2984054c0 (patch) | |
tree | 75ab69d616de79a67fea6874dc6ecb11c54e7c13 | |
parent | 8f9000a565d01cb1f1688dc5dc32ac8026a7e993 (diff) | |
parent | b7eaf8f16e87c3896e9b8c0b2e54d71210d43b48 (diff) |
Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says:
====================
40GbE Intel Wired LAN Driver Updates 2017-02-11
This series contains updates to i40e and i40evf only.
Jake makes a minor change to prevent a minor bit of work, if it is not
necessary. In the case where we do not have a client, there is no need
to check the client params, so move the check till after we have ensured
we have a client. Correct a code comment which incorrectly implied
that raw_packet buffers were freed in i40e_clean_tx_ring(), so fixed
the code comment to better explain where memory is freed. Reduce the
severity and frequency of the message notifying we cleared the receive
timestamp register, since the logic has a much better detection scheme
that could detect a stalled receive timestamp register. The improved
logic was actually causing the notification message to occur more
frequently and was giving the user a false perception that a timestamp
event was missed for a valid packet, so reduce the severity from
dev_warn to dev_dbg and only fire off the message when 3 or 4 of the
RXTIME registers are stalled and get cleared within the same
watchdog event. Fixed a bug, where we were modifying the mac_filter
outside a lock when handling the addition of broadcast filters. Fix
this by updating i40e_update_filter_state logic so that it knows to
avoid broadcast filters, which ensures that we do not have to remove
the filter separately and can put it back using the normal flow.
Refactored how we add new filters to firmware to avoid a race condition
that can occur due to removing filters from the hash temporarily.
Mitch adds a sleep (without timeout) so that we wait for a reply from
the PF before we continue, since the iWarp client cannot continue until
the operation is completed. Fixed up a function which could never
return an error, to be void and cleaned up the checking of the now
null and void return value.
Scott limits the DMA sync to CPU to the actual length of the incoming
packet, versus the syncing of the entire buffer. Also reduces the
receive buffer struct (by a single pointer) and align the driver to be
more consistent with other Intel drivers with respect to packets that
span buffers.
Sudheer adds a field to track the bus number info and modified log
statements to print bus, device and function information.
Henry adds the ability to store the FEC status bits from the link up
event. Also adds the ethtool support for FEC capabilities and 25G
link types.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e.h | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_client.c | 33 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_common.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 192 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_osdep.h | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_ptp.c | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 223 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.h | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_type.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 215 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_type.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40evf.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40evf_main.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 4 |
17 files changed, 463 insertions, 285 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index fdd9069b6cec..7a23d3e47c6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -467,6 +467,22 @@ struct i40e_mac_filter { enum i40e_filter_state state; }; +/* Wrapper structure to keep track of filters while we are preparing to send + * firmware commands. We cannot send firmware commands while holding a + * spinlock, since it might sleep. To avoid this, we wrap the added filters in + * a separate structure, which will track the state change and update the real + * filter while under lock. We can't simply hold the filters in a separate + * list, as this opens a window for a race condition when adding new MAC + * addresses to all VLANs, or when adding new VLANs to all MAC addresses. + */ +struct i40e_new_mac_filter { + struct hlist_node hlist; + struct i40e_mac_filter *f; + + /* Track future changes to state separately */ + enum i40e_filter_state state; +}; + struct i40e_veb { struct i40e_pf *pf; u16 idx; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 7ca048f0b159..d570219efd9f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -174,8 +174,6 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) if (!vsi) return; - memset(¶ms, 0, sizeof(params)); - i40e_client_get_params(vsi, ¶ms); mutex_lock(&i40e_client_instance_mutex); list_for_each_entry(cdev, &i40e_client_instances, list) { if (cdev->lan_info.pf == vsi->back) { @@ -186,6 +184,8 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) "Cannot locate client instance l2_param_change routine\n"); continue; } + memset(¶ms, 0, sizeof(params)); + i40e_client_get_params(vsi, ¶ms); if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n"); @@ -510,9 +510,10 @@ void i40e_client_subtask(struct i40e_pf *pf) continue; if (!existing) { - dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", + dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", client->name, pf->hw.pf_id, - pf->hw.bus.device, pf->hw.bus.func); + pf->hw.bus.bus_id, pf->hw.bus.device, + pf->hw.bus.func); } mutex_lock(&i40e_client_instance_mutex); @@ -561,8 +562,9 @@ int i40e_lan_add_device(struct i40e_pf *pf) ldev->pf = pf; INIT_LIST_HEAD(&ldev->list); list_add(&ldev->list, &i40e_devices); - dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n", - pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); + dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.bus_id, + pf->hw.bus.device, pf->hw.bus.func); /* Since in some cases register may have happened before a device gets * added, we can schedule a subtask to go initiate the clients if @@ -590,9 +592,9 @@ int i40e_lan_del_device(struct i40e_pf *pf) mutex_lock(&i40e_device_mutex); list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) { if (ldev->pf == pf) { - dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n", - pf->hw.pf_id, pf->hw.bus.device, - pf->hw.bus.func); + dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.bus_id, + pf->hw.bus.device, pf->hw.bus.func); list_del(&ldev->list); kfree(ldev); ret = 0; @@ -653,13 +655,11 @@ static int i40e_client_release(struct i40e_client *client) * i40e_client_prepare - prepare client specific resources * @client: pointer to the registered client * - * Return 0 on success or < 0 on error **/ -static int i40e_client_prepare(struct i40e_client *client) +static void i40e_client_prepare(struct i40e_client *client) { struct i40e_device *ldev; struct i40e_pf *pf; - int ret = 0; mutex_lock(&i40e_device_mutex); list_for_each_entry(ldev, &i40e_devices, list) { @@ -669,7 +669,6 @@ static int i40e_client_prepare(struct i40e_client *client) i40e_service_event_schedule(pf); } mutex_unlock(&i40e_device_mutex); - return ret; } /** @@ -926,13 +925,9 @@ int i40e_register_client(struct i40e_client *client) set_bit(__I40E_CLIENT_REGISTERED, &client->state); mutex_unlock(&i40e_client_mutex); - if (i40e_client_prepare(client)) { - ret = -EIO; - goto out; - } + i40e_client_prepare(client); - pr_info("i40e: Registered client %s with return code %d\n", - client->name, ret); + pr_info("i40e: Registered client %s\n", client->name); out: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 128735975caa..fc73e4ef27ac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1838,6 +1838,8 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed; hw_link_info->link_info = resp->link_info; hw_link_info->an_info = resp->an_info; + hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | + I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; hw_link_info->loopback = resp->loopback; hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c4ab3c1ae02a..a22e26200bcc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -803,9 +803,12 @@ static int i40e_set_settings(struct net_device *netdev, if (change || (abilities.link_speed != config.link_speed)) { /* copy over the rest of the abilities */ config.phy_type = abilities.phy_type; + config.phy_type_ext = abilities.phy_type_ext; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; /* save the requested speeds */ hw->phy.link_info.requested_speeds = config.link_speed; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9f785c015a2f..e83a8ca5dd65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 25 +#define DRV_VERSION_BUILD 27 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -1255,6 +1255,7 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, int vlan_filters) { struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new; struct hlist_node *h; int bkt, new_vlan; @@ -1273,13 +1274,13 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, */ /* Update the filters about to be added in place */ - hlist_for_each_entry(f, tmp_add_list, hlist) { - if (vsi->info.pvid && f->vlan != vsi->info.pvid) - f->vlan = vsi->info.pvid; - else if (vlan_filters && f->vlan == I40E_VLAN_ANY) - f->vlan = 0; - else if (!vlan_filters && f->vlan == 0) - f->vlan = I40E_VLAN_ANY; + hlist_for_each_entry(new, tmp_add_list, hlist) { + if (vsi->info.pvid && new->f->vlan != vsi->info.pvid) + new->f->vlan = vsi->info.pvid; + else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY) + new->f->vlan = 0; + else if (!vlan_filters && new->f->vlan == 0) + new->f->vlan = I40E_VLAN_ANY; } /* Update the remaining active filters */ @@ -1305,9 +1306,16 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, if (!add_head) return -ENOMEM; - /* Put the replacement filter into the add list */ - hash_del(&add_head->hlist); - hlist_add_head(&add_head->hlist, tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + return -ENOMEM; + + new->f = add_head; + new->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new->hlist, tmp_add_list); /* Put the original filter into the delete list */ f->state = I40E_FILTER_REMOVE; @@ -1819,16 +1827,15 @@ static void i40e_set_rx_mode(struct net_device *netdev) } /** - * i40e_undo_filter_entries - Undo the changes made to MAC filter entries + * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * - * MAC filter entries from list were slated to be sent to firmware, either for - * addition or deletion. + * MAC filter entries from this list were slated for deletion. **/ -static void i40e_undo_filter_entries(struct i40e_vsi *vsi, - struct hlist_head *from) +static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) { struct i40e_mac_filter *f; struct hlist_node *h; @@ -1843,6 +1850,53 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, } /** + * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries + * @vsi: Pointer to vsi struct + * @from: Pointer to list which contains MAC filter entries - changes to + * those entries needs to be undone. + * + * MAC filter entries from this list were slated for addition. + **/ +static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) +{ + struct i40e_new_mac_filter *new; + struct hlist_node *h; + + hlist_for_each_entry_safe(new, h, from, hlist) { + /* We can simply free the wrapper structure */ + hlist_del(&new->hlist); + kfree(new); + } +} + +/** + * i40e_next_entry - Get the next non-broadcast filter from a list + * @next: pointer to filter in list + * + * Returns the next non-broadcast filter in the list. Required so that we + * ignore broadcast filters within the list, since these are not handled via + * the normal firmware update path. + */ +static +struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) +{ + while (next) { + next = hlist_entry(next->hlist.next, + typeof(struct i40e_new_mac_filter), + hlist); + + /* keep going if we found a broadcast filter */ + if (next && is_broadcast_ether_addr(next->f->macaddr)) + continue; + + break; + } + + return next; +} + +/** * i40e_update_filter_state - Update filter state based on return data * from firmware * @count: Number of filters added @@ -1855,7 +1909,7 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, static int i40e_update_filter_state(int count, struct i40e_aqc_add_macvlan_element_data *add_list, - struct i40e_mac_filter *add_head) + struct i40e_new_mac_filter *add_head) { int retval = 0; int i; @@ -1874,9 +1928,9 @@ i40e_update_filter_state(int count, retval++; } - add_head = hlist_entry(add_head->hlist.next, - typeof(struct i40e_mac_filter), - hlist); + add_head = i40e_next_filter(add_head); + if (!add_head) + break; } return retval; @@ -1933,7 +1987,7 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, - struct i40e_mac_filter *add_head, + struct i40e_new_mac_filter *add_head, int num_add, bool *promisc_changed) { struct i40e_hw *hw = &vsi->back->hw; @@ -1961,10 +2015,12 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, * This function sets or clears the promiscuous broadcast flags for VLAN * filters in order to properly receive broadcast frames. Assumes that only * broadcast filters are passed. + * + * Returns status indicating success or failure; **/ -static -void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, - struct i40e_mac_filter *f) +static i40e_status +i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, + struct i40e_mac_filter *f) { bool enable = f->state == I40E_FILTER_NEW; struct i40e_hw *hw = &vsi->back->hw; @@ -1983,15 +2039,13 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) { + if (aq_ret) dev_warn(&vsi->back->pdev->dev, "Error %s setting broadcast promiscuous mode on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); - f->state = I40E_FILTER_FAILED; - } else if (enable) { - f->state = I40E_FILTER_ACTIVE; - } + + return aq_ret; } /** @@ -2005,7 +2059,8 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct hlist_head tmp_add_list, tmp_del_list; - struct i40e_mac_filter *f, *add_head = NULL; + struct i40e_mac_filter *f; + struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; @@ -2059,8 +2114,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) continue; } if (f->state == I40E_FILTER_NEW) { - hash_del(&f->hlist); - hlist_add_head(&f->hlist, &tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + goto err_no_memory_locked; + + /* Store pointer to the real filter */ + new->f = f; + new->state = f->state; + + /* Add it to the hash list */ + hlist_add_head(&new->hlist, &tmp_add_list); } /* Count the number of active (current and new) VLAN @@ -2095,7 +2159,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cmd_flags = 0; /* handle broadcast filters by updating the broadcast - * promiscuous flag instead of deleting a MAC filter. + * promiscuous flag and release filter list. */ if (is_broadcast_ether_addr(f->macaddr)) { i40e_aqc_broadcast_filter(vsi, vsi_name, f); @@ -2153,36 +2217,37 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto err_no_memory; num_add = 0; - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) { - f->state = I40E_FILTER_FAILED; + new->state = I40E_FILTER_FAILED; continue; } /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ - if (is_broadcast_ether_addr(f->macaddr)) { - u64 key = i40e_addr_to_hkey(f->macaddr); - i40e_aqc_broadcast_filter(vsi, vsi_name, f); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + if (is_broadcast_ether_addr(new->f->macaddr)) { + if (i40e_aqc_broadcast_filter(vsi, vsi_name, + new->f)) + new->state = I40E_FILTER_FAILED; + else + new->state = I40E_FILTER_ACTIVE; continue; } /* add to add array */ if (num_add == 0) - add_head = f; + add_head = new; cmd_flags = 0; - ether_addr_copy(add_list[num_add].mac_addr, f->macaddr); - if (f->vlan == I40E_VLAN_ANY) { + ether_addr_copy(add_list[num_add].mac_addr, + new->f->macaddr); + if (new->f->vlan == I40E_VLAN_ANY) { add_list[num_add].vlan_tag = 0; cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { add_list[num_add].vlan_tag = - cpu_to_le16((u16)(f->vlan)); + cpu_to_le16((u16)(new->f->vlan)); } add_list[num_add].queue_number = 0; /* set invalid match method for later detection */ @@ -2208,11 +2273,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) * the VSI's list. */ spin_lock_bh(&vsi->mac_filter_hash_lock); - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { - u64 key = i40e_addr_to_hkey(f->macaddr); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { + /* Only update the state if we're still NEW */ + if (new->f->state == I40E_FILTER_NEW) + new->f->state = new->state; + hlist_del(&new->hlist); + kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); kfree(add_list); @@ -2373,8 +2439,8 @@ err_no_memory: /* Restore elements on the temporary add and delete lists */ spin_lock_bh(&vsi->mac_filter_hash_lock); err_no_memory_locked: - i40e_undo_filter_entries(vsi, &tmp_del_list); - i40e_undo_filter_entries(vsi, &tmp_add_list); + i40e_undo_del_filter_entries(vsi, &tmp_del_list); + i40e_undo_add_filter_entries(vsi, &tmp_add_list); spin_unlock_bh(&vsi->mac_filter_hash_lock); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; @@ -5272,6 +5338,8 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) enum i40e_aq_link_speed new_speed; char *speed = "Unknown"; char *fc = "Unknown"; + char *fec = ""; + char *an = ""; new_speed = vsi->back->hw.phy.link_info.link_speed; @@ -5331,8 +5399,23 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - netdev_info(vsi->netdev, "NIC Link is Up %sbps Full Duplex, Flow Control: %s\n", - speed, fc); + if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + fec = ", FEC: None"; + an = ", Autoneg: False"; + + if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + an = ", Autoneg: True"; + + if (vsi->back->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_KR_ENA) + fec = ", FEC: CL74 FC-FEC/BASE-R"; + else if (vsi->back->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_RS_ENA) + fec = ", FEC: CL108 RS-FEC"; + } + + netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s, Flow Control: %s\n", + speed, fec, an, fc); } /** @@ -10990,6 +11073,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; pf->instance = pfs_found; /* set up the locks for the AQ, do this only once in probe diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h index be74bcf9c961..fea81ed065db 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h @@ -69,12 +69,12 @@ struct i40e_virt_mem { #define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s) #define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m) -#define i40e_debug(h, m, s, ...) \ -do { \ - if (((m) & (h)->debug_mask)) \ - pr_info("i40e %02x.%x " s, \ - (h)->bus.device, (h)->bus.func, \ - ##__VA_ARGS__); \ +#define i40e_debug(h, m, s, ...) \ +do { \ + if (((m) & (h)->debug_mask)) \ + pr_info("i40e %02x:%02x.%x " s, \ + (h)->bus.bus_id, (h)->bus.device, \ + (h)->bus.func, ##__VA_ARGS__); \ } while (0) typedef enum i40e_status_code i40e_status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 9e49ffafce28..2caee35528fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -280,7 +280,7 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - int i; + unsigned int i, cleared = 0; /* Since we cannot turn off the Rx timestamp logic if the device is * configured for Tx timestamping, we check if Rx timestamping is @@ -306,14 +306,25 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) time_is_before_jiffies(pf->latch_events[i] + HZ)) { rd32(hw, I40E_PRTTSYN_RXTIME_H(i)); pf->latch_event_flags &= ~BIT(i); - pf->rx_hwtstamp_cleared++; - dev_warn(&pf->pdev->dev, - "Clearing a missed Rx timestamp event for RXTIME[%d]\n", - i); + cleared++; } } spin_unlock_bh(&pf->ptp_rx_lock); + + /* Log a warning if more than 2 timestamps got dropped in the same + * check. We don't want to warn about all drops because it can occur + * in normal scenarios such as PTP frames on multicast addresses we + * aren't listening to. However, administrator should know if this is + * the reason packets aren't receiving timestamps. + */ + if (cleared > 2) + dev_dbg(&pf->pdev->dev, + "Dropped %d missed RXTIME timestamp events\n", + cleared); + + /* Finally, update the rx_hwtstamp_cleared counter */ + pf->rx_hwtstamp_cleared += cleared; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f5baeb154d39..09f09ea7a5e5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -432,7 +432,12 @@ unsupported_flow: ret = -EINVAL; } - /* The buffer allocated here is freed by the i40e_clean_tx_ring() */ + /* The buffer allocated here will be normally be freed by + * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit + * completion. In the event of an error adding the buffer to the FDIR + * ring, it will immediately be freed. It may also be freed by + * i40e_clean_tx_ring() when closing the VSI. + */ return ret; } @@ -1013,14 +1018,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -1425,45 +1431,6 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -1478,10 +1445,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -1513,19 +1476,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1538,30 +1567,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1569,34 +1597,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1611,18 +1631,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1646,19 +1669,17 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1700,7 +1721,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define staterrlen rx_desc->wb.qword1.status_error_len if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { i40e_clean_programming_status(rx_ring, rx_desc); - rx_ring->rx_bi[ntc].skb = skb; return true; } /* if we are the last buffer then there is nothing else to do */ @@ -1708,8 +1728,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1730,12 +1748,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1764,7 +1782,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40e_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1783,8 +1801,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1809,11 +1829,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 1ea820e9debe..f80979025c01 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -253,7 +253,6 @@ struct i40e_tx_buffer { }; struct i40e_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; unsigned int page_offset; @@ -354,6 +353,14 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; + struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * i40e_clean_rx_ring_irq() is called + * for this ring. + */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index b6cf8d2670a4..939f9fdc8f85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -184,6 +184,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -469,6 +470,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index d4e488267988..b758846d4dc5 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -501,14 +501,15 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -903,45 +904,6 @@ void i40evf_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -956,10 +918,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -991,19 +949,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1016,30 +1040,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1047,34 +1070,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1089,18 +1104,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1124,19 +1142,17 @@ struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1180,8 +1196,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1202,12 +1216,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1236,7 +1250,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1255,8 +1269,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1273,11 +1289,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index a5fc789f78eb..8274ba68bd32 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -239,7 +239,6 @@ struct i40e_tx_buffer { }; struct i40e_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; unsigned int page_offset; @@ -340,6 +339,14 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; + struct sk_buff *skb; /* When i40evf_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * i40evf_clean_rx_ring_irq() is called + * for this ring. + */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 92ac60da5201..16bb88084bb9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -158,6 +158,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -442,6 +443,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index fc374f833aa9..d38a2b2aea2b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -81,6 +81,7 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_GET_STATS = 15, I40E_VIRTCHNL_OP_FCOE = 16, I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index fffe4cf2c20b..00c42d803276 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -195,6 +195,7 @@ struct i40evf_adapter { u64 hw_csum_rx_error; u32 rx_desc_count; int num_msix_vectors; + u32 client_pending; struct msix_entry *msix_entries; u32 flags; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 3fe87e021148..920c1cb06a92 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 25 +#define DRV_VERSION_BUILD 27 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -2726,6 +2726,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 2059a8e88908..bee58af390e1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -999,6 +999,10 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (v_opcode != adapter->current_op) return; break; + case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + adapter->client_pending &= + ~(BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP)); + break; case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: { struct i40e_virtchnl_rss_hena *vrh = (struct i40e_virtchnl_rss_hena *)msg; |