From 50698d80f8bb1db989b7b9fa433f588fade5e382 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 27 Feb 2017 10:26:47 -0800 Subject: netvsc: don't overload variable in same function There are two variables named packet in the same function. One is the metadata descriptor from host (vmpacket_descriptor) and the other is the control block in the skb used to hold metadata from send. Change name to avoid possible confusion and bugs. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d35ebd993b38..5dedbc36c326 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -600,9 +600,9 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device, static void netvsc_send_tx_complete(struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, struct hv_device *device, - struct vmpacket_descriptor *packet) + const struct vmpacket_descriptor *desc) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id; + struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; -- cgit v1.2.3 From f3dd3f4797652c311df9c074436d420f1ad3566e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 27 Feb 2017 10:26:48 -0800 Subject: vmbus: introduce in-place packet iterator This is mostly just a refactoring of previous functions (get_pkt_next_raw, put_pkt_raw and commit_rd_index) to make it easier to use for other drivers and NAPI. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5dedbc36c326..3681fb59bdbe 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -647,14 +647,11 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, static void netvsc_send_completion(struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, struct hv_device *device, - struct vmpacket_descriptor *packet) + const struct vmpacket_descriptor *desc) { - struct nvsp_message *nvsp_packet; + struct nvsp_message *nvsp_packet = hv_pkt_data(desc); struct net_device *ndev = hv_get_drvdata(device); - nvsp_packet = (struct nvsp_message *)((unsigned long)packet + - (packet->offset8 << 3)); - switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: @@ -668,7 +665,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: netvsc_send_tx_complete(net_device, incoming_channel, - device, packet); + device, desc); break; default: @@ -1071,9 +1068,11 @@ static void netvsc_receive(struct net_device *ndev, struct net_device_context *net_device_ctx, struct hv_device *device, struct vmbus_channel *channel, - struct vmtransfer_page_packet_header *vmxferpage_packet, + const struct vmpacket_descriptor *desc, struct nvsp_message *nvsp) { + const struct vmtransfer_page_packet_header *vmxferpage_packet + = container_of(desc, const struct vmtransfer_page_packet_header, d); char *recv_buf = net_device->recv_buf; u32 status = NVSP_STAT_SUCCESS; int i; @@ -1185,12 +1184,10 @@ static void netvsc_process_raw_pkt(struct hv_device *device, struct netvsc_device *net_device, struct net_device *ndev, u64 request_id, - struct vmpacket_descriptor *desc) + const struct vmpacket_descriptor *desc) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct nvsp_message *nvmsg - = (struct nvsp_message *)((unsigned long)desc - + (desc->offset8 << 3)); + struct nvsp_message *nvmsg = hv_pkt_data(desc); switch (desc->type) { case VM_PKT_COMP: @@ -1199,9 +1196,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, case VM_PKT_DATA_USING_XFER_PAGES: netvsc_receive(ndev, net_device, net_device_ctx, - device, channel, - (struct vmtransfer_page_packet_header *)desc, - nvmsg); + device, channel, desc, nvmsg); break; case VM_PKT_DATA_INBAND: @@ -1223,7 +1218,6 @@ void netvsc_channel_cb(void *context) struct netvsc_device *net_device; struct vmpacket_descriptor *desc; struct net_device *ndev; - bool need_to_commit = false; if (channel->primary_channel != NULL) device = channel->primary_channel->device_obj; @@ -1239,20 +1233,12 @@ void netvsc_channel_cb(void *context) netvsc_channel_idle(net_device, q_idx)) return; - /* commit_rd_index() -> hv_signal_on_read() needs this. */ - init_cached_read_index(channel); - - while ((desc = get_next_pkt_raw(channel)) != NULL) { + foreach_vmbus_pkt(desc, channel) { netvsc_process_raw_pkt(device, channel, net_device, ndev, desc->trans_id, desc); - put_pkt_raw(channel, desc); - need_to_commit = true; } - if (need_to_commit) - commit_rd_index(channel); - netvsc_chk_recv_comp(net_device, channel, q_idx); } -- cgit v1.2.3 From 15a863bf7436124e799ba175a801e25f7b57191e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 27 Feb 2017 10:26:49 -0800 Subject: netvsc: implement NAPI Use NAPI (softirq), to handle receive packets and send completions. Previously this was handled by tasklet. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 140 +++++++++++++++++++++++++++++++------------- 1 file changed, 98 insertions(+), 42 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 3681fb59bdbe..b1328cef9d5a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -556,6 +556,7 @@ void netvsc_device_remove(struct hv_device *device) struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); struct netvsc_device *net_device = net_device_ctx->nvdev; + int i; netvsc_disconnect_vsp(device); @@ -570,6 +571,9 @@ void netvsc_device_remove(struct hv_device *device) /* Now, we can close the channel safely */ vmbus_close(device->channel); + for (i = 0; i < VRSS_CHANNEL_MAX; i++) + napi_disable(&net_device->chan_table[0].napi); + /* Release all resources */ free_netvsc_device(net_device); } @@ -1063,7 +1067,7 @@ static inline struct recv_comp_data *get_recv_comp_slot( return rcd; } -static void netvsc_receive(struct net_device *ndev, +static int netvsc_receive(struct net_device *ndev, struct netvsc_device *net_device, struct net_device_context *net_device_ctx, struct hv_device *device, @@ -1073,20 +1077,19 @@ static void netvsc_receive(struct net_device *ndev, { const struct vmtransfer_page_packet_header *vmxferpage_packet = container_of(desc, const struct vmtransfer_page_packet_header, d); + u16 q_idx = channel->offermsg.offer.sub_channel_index; char *recv_buf = net_device->recv_buf; u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; int ret; - struct recv_comp_data *rcd; - u16 q_idx = channel->offermsg.offer.sub_channel_index; /* Make sure this is a valid nvsp packet */ if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { netif_err(net_device_ctx, rx_err, ndev, "Unknown nvsp packet type received %u\n", nvsp->hdr.msg_type); - return; + return 0; } if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { @@ -1094,7 +1097,7 @@ static void netvsc_receive(struct net_device *ndev, "Invalid xfer page set id - expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID, vmxferpage_packet->xfer_pageset_id); - return; + return 0; } count = vmxferpage_packet->range_cnt; @@ -1110,26 +1113,26 @@ static void netvsc_receive(struct net_device *ndev, channel, data, buflen); } - if (!net_device->chan_table[q_idx].mrc.buf) { + if (net_device->chan_table[q_idx].mrc.buf) { + struct recv_comp_data *rcd; + + rcd = get_recv_comp_slot(net_device, channel, q_idx); + if (rcd) { + rcd->tid = vmxferpage_packet->d.trans_id; + rcd->status = status; + } else { + netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", + q_idx, vmxferpage_packet->d.trans_id); + } + } else { ret = netvsc_send_recv_completion(channel, vmxferpage_packet->d.trans_id, status); if (ret) netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n", q_idx, vmxferpage_packet->d.trans_id, ret); - return; } - - rcd = get_recv_comp_slot(net_device, channel, q_idx); - - if (!rcd) { - netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", - q_idx, vmxferpage_packet->d.trans_id); - return; - } - - rcd->tid = vmxferpage_packet->d.trans_id; - rcd->status = status; + return count; } static void netvsc_send_table(struct hv_device *hdev, @@ -1179,12 +1182,12 @@ static inline void netvsc_receive_inband(struct hv_device *hdev, } } -static void netvsc_process_raw_pkt(struct hv_device *device, - struct vmbus_channel *channel, - struct netvsc_device *net_device, - struct net_device *ndev, - u64 request_id, - const struct vmpacket_descriptor *desc) +static int netvsc_process_raw_pkt(struct hv_device *device, + struct vmbus_channel *channel, + struct netvsc_device *net_device, + struct net_device *ndev, + u64 request_id, + const struct vmpacket_descriptor *desc) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct nvsp_message *nvmsg = hv_pkt_data(desc); @@ -1195,8 +1198,8 @@ static void netvsc_process_raw_pkt(struct hv_device *device, break; case VM_PKT_DATA_USING_XFER_PAGES: - netvsc_receive(ndev, net_device, net_device_ctx, - device, channel, desc, nvmsg); + return netvsc_receive(ndev, net_device, net_device_ctx, + device, channel, desc, nvmsg); break; case VM_PKT_DATA_INBAND: @@ -1208,22 +1211,64 @@ static void netvsc_process_raw_pkt(struct hv_device *device, desc->type, request_id); break; } + + return 0; +} + +static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel) +{ + struct vmbus_channel *primary = channel->primary_channel; + + return primary ? primary->device_obj : channel->device_obj; +} + +int netvsc_poll(struct napi_struct *napi, int budget) +{ + struct netvsc_channel *nvchan + = container_of(napi, struct netvsc_channel, napi); + struct vmbus_channel *channel = nvchan->channel; + struct hv_device *device = netvsc_channel_to_device(channel); + u16 q_idx = channel->offermsg.offer.sub_channel_index; + struct net_device *ndev = hv_get_drvdata(device); + struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); + const struct vmpacket_descriptor *desc; + int work_done = 0; + + desc = hv_pkt_iter_first(channel); + while (desc) { + int count; + + count = netvsc_process_raw_pkt(device, channel, net_device, + ndev, desc->trans_id, desc); + work_done += count; + desc = __hv_pkt_iter_next(channel, desc); + + /* If receive packet budget is exhausted, reschedule */ + if (work_done >= budget) { + work_done = budget; + break; + } + } + hv_pkt_iter_close(channel); + + /* If ring is empty and NAPI is not doing polling */ + if (work_done < budget && + napi_complete_done(napi, work_done) && + hv_end_read(&channel->inbound) != 0) + napi_reschedule(napi); + + netvsc_chk_recv_comp(net_device, channel, q_idx); + return work_done; } void netvsc_channel_cb(void *context) { struct vmbus_channel *channel = context; + struct hv_device *device = netvsc_channel_to_device(channel); u16 q_idx = channel->offermsg.offer.sub_channel_index; - struct hv_device *device; struct netvsc_device *net_device; - struct vmpacket_descriptor *desc; struct net_device *ndev; - if (channel->primary_channel != NULL) - device = channel->primary_channel->device_obj; - else - device = channel->device_obj; - ndev = hv_get_drvdata(device); if (unlikely(!ndev)) return; @@ -1233,13 +1278,9 @@ void netvsc_channel_cb(void *context) netvsc_channel_idle(net_device, q_idx)) return; - foreach_vmbus_pkt(desc, channel) { - netvsc_process_raw_pkt(device, channel, net_device, - ndev, desc->trans_id, desc); - - } - - netvsc_chk_recv_comp(net_device, channel, q_idx); + /* disable interupts from host */ + hv_begin_read(&channel->inbound); + napi_schedule(&net_device->chan_table[q_idx].napi); } /* @@ -1261,6 +1302,11 @@ int netvsc_device_add(struct hv_device *device, net_device->ring_size = ring_size; + /* Because the device uses NAPI, all the interrupt batching and + * control is done via Net softirq, not the channel handling + */ + set_channel_read_mode(device->channel, HV_CALL_ISR); + /* Open the channel */ ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, ring_size * PAGE_SIZE, NULL, 0, @@ -1278,8 +1324,16 @@ int netvsc_device_add(struct hv_device *device, * chn_table with the default channel to use it before subchannels are * opened. */ - for (i = 0; i < VRSS_CHANNEL_MAX; i++) - net_device->chan_table[i].channel = device->channel; + for (i = 0; i < VRSS_CHANNEL_MAX; i++) { + struct netvsc_channel *nvchan = &net_device->chan_table[i]; + + nvchan->channel = device->channel; + netif_napi_add(ndev, &nvchan->napi, + netvsc_poll, NAPI_POLL_WEIGHT); + } + + /* Enable NAPI handler for init callbacks */ + napi_enable(&net_device->chan_table[0].napi); /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is * populated. @@ -1299,6 +1353,8 @@ int netvsc_device_add(struct hv_device *device, return ret; close: + napi_disable(&net_device->chan_table[0].napi); + /* Now, we can close the channel safely */ vmbus_close(device->channel); -- cgit v1.2.3 From 0d6dd35784e76bc3e052ffa13c6839d5c9bcb449 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 9 Mar 2017 15:04:14 -0800 Subject: netvsc: need napi scheduled during removal Since rndis_halt_device waits until all outstanding sends and receives are completed. Netvsc device needs to still schedule NAPI to see those completions. Fixes: 2506b1dc4bbe ("netvsc: implement NAPI") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b1328cef9d5a..0e0c757c1681 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -91,15 +91,6 @@ static void free_netvsc_device(struct netvsc_device *nvdev) } -static inline bool netvsc_channel_idle(const struct netvsc_device *net_device, - u16 q_idx) -{ - const struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; - - return atomic_read(&net_device->num_outstanding_recvs) == 0 && - atomic_read(&nvchan->queue_sends) == 0; -} - static struct netvsc_device *get_outbound_net_device(struct hv_device *device) { struct netvsc_device *net_device = hv_device_to_netvsc_device(device); @@ -1273,13 +1264,10 @@ void netvsc_channel_cb(void *context) if (unlikely(!ndev)) return; - net_device = net_device_to_netvsc_device(ndev); - if (unlikely(net_device->destroy) && - netvsc_channel_idle(net_device, q_idx)) - return; - /* disable interupts from host */ hv_begin_read(&channel->inbound); + + net_device = net_device_to_netvsc_device(ndev); napi_schedule(&net_device->chan_table[q_idx].napi); } -- cgit v1.2.3 From 79cd874c96b38694fb2b4ee3850f6fbbc3015439 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 9 Mar 2017 15:04:15 -0800 Subject: netvsc: fix hang on netvsc module removal The code in netvsc_device_remove was incorrectly calling napi_disable repeatedly on the same element. This would cause attempts to remove netvsc module to hang. Fixes: 2506b1dc4bbe ("netvsc: implement NAPI") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0e0c757c1681..8f9aeec2ce0f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -562,8 +562,8 @@ void netvsc_device_remove(struct hv_device *device) /* Now, we can close the channel safely */ vmbus_close(device->channel); - for (i = 0; i < VRSS_CHANNEL_MAX; i++) - napi_disable(&net_device->chan_table[0].napi); + for (i = 0; i < net_device->num_chn; i++) + napi_disable(&net_device->chan_table[i].napi); /* Release all resources */ free_netvsc_device(net_device); -- cgit v1.2.3 From 6de38af611ca81a970965c06231cd2d5f30b2566 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Mar 2017 16:12:37 -0700 Subject: netvsc: avoid race with callback Change the argument to channel callback from the channel pointer to the internal data structure containing per-channel info. This avoids any possible races when callback happens during initialization and makes IRQ code simpler. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0e71164849dd..0a2e9bd98d2c 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1250,21 +1250,12 @@ int netvsc_poll(struct napi_struct *napi, int budget) void netvsc_channel_cb(void *context) { - struct vmbus_channel *channel = context; - struct hv_device *device = netvsc_channel_to_device(channel); - u16 q_idx = channel->offermsg.offer.sub_channel_index; - struct netvsc_device *net_device; - struct net_device *ndev; - - ndev = hv_get_drvdata(device); - if (unlikely(!ndev)) - return; + struct netvsc_channel *nvchan = context; /* disable interupts from host */ - hv_begin_read(&channel->inbound); + hv_begin_read(&nvchan->channel->inbound); - net_device = net_device_to_netvsc_device(ndev); - napi_schedule(&net_device->chan_table[q_idx].napi); + napi_schedule(&nvchan->napi); } /* @@ -1294,7 +1285,8 @@ int netvsc_device_add(struct hv_device *device, /* Open the channel */ ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, ring_size * PAGE_SIZE, NULL, 0, - netvsc_channel_cb, device->channel); + netvsc_channel_cb, + net_device->chan_table); if (ret != 0) { netdev_err(ndev, "unable to open channel: %d\n", ret); -- cgit v1.2.3 From 262b7f142a50badde4032e925902a0d0ad89eb7f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Mar 2017 16:12:38 -0700 Subject: netvsc: add comments about callback's and NAPI Add some short description of how callback's and NAPI interoperate. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0a2e9bd98d2c..989b7cd99380 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1209,6 +1209,10 @@ static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel) return primary ? primary->device_obj : channel->device_obj; } +/* Network processing softirq + * Process data in incoming ring buffer from host + * Stops when ring is empty or budget is met or exceeded. + */ int netvsc_poll(struct napi_struct *napi, int budget) { struct netvsc_channel *nvchan @@ -1238,7 +1242,11 @@ int netvsc_poll(struct napi_struct *napi, int budget) } hv_pkt_iter_close(channel); - /* If ring is empty and NAPI is not doing polling */ + /* If budget was not exhausted and + * not doing busy poll + * then re-enable host interrupts + * and reschedule if ring is not empty. + */ if (work_done < budget && napi_complete_done(napi, work_done) && hv_end_read(&channel->inbound) != 0) @@ -1248,6 +1256,9 @@ int netvsc_poll(struct napi_struct *napi, int budget) return work_done; } +/* Call back when data is available in host ring buffer. + * Processing is deferred until network softirq (NAPI) + */ void netvsc_channel_cb(void *context) { struct netvsc_channel *nvchan = context; -- cgit v1.2.3 From f4f1c23d6e41f5ee4973a6da65cba1e1c536ec29 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:50:57 -0700 Subject: netvsc: fix NAPI performance regression When using NAPI, the single stream performance declined signifcantly because the poll routine was updating host after every burst of packets. This excess signalling caused host throttling. This fix restores the old behavior. Host is only signalled after the ring has been emptied. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 989b7cd99380..727762d0f13b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1173,7 +1173,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device, struct vmbus_channel *channel, struct netvsc_device *net_device, struct net_device *ndev, - u64 request_id, const struct vmpacket_descriptor *desc) { struct net_device_context *net_device_ctx = netdev_priv(ndev); @@ -1195,7 +1194,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device, default: netdev_err(ndev, "unhandled packet type %d, tid %llx\n", - desc->type, request_id); + desc->type, desc->trans_id); break; } @@ -1222,28 +1221,20 @@ int netvsc_poll(struct napi_struct *napi, int budget) u16 q_idx = channel->offermsg.offer.sub_channel_index; struct net_device *ndev = hv_get_drvdata(device); struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); - const struct vmpacket_descriptor *desc; int work_done = 0; - desc = hv_pkt_iter_first(channel); - while (desc) { - int count; + /* If starting a new interval */ + if (!nvchan->desc) + nvchan->desc = hv_pkt_iter_first(channel); - count = netvsc_process_raw_pkt(device, channel, net_device, - ndev, desc->trans_id, desc); - work_done += count; - desc = __hv_pkt_iter_next(channel, desc); - - /* If receive packet budget is exhausted, reschedule */ - if (work_done >= budget) { - work_done = budget; - break; - } + while (nvchan->desc && work_done < budget) { + work_done += netvsc_process_raw_pkt(device, channel, net_device, + ndev, nvchan->desc); + nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } - hv_pkt_iter_close(channel); - /* If budget was not exhausted and - * not doing busy poll + /* If receive ring was exhausted + * and not doing busy poll * then re-enable host interrupts * and reschedule if ring is not empty. */ @@ -1253,7 +1244,9 @@ int netvsc_poll(struct napi_struct *napi, int budget) napi_reschedule(napi); netvsc_chk_recv_comp(net_device, channel, q_idx); - return work_done; + + /* Driver may overshoot since multiple packets per descriptor */ + return min(work_done, budget); } /* Call back when data is available in host ring buffer. @@ -1263,10 +1256,12 @@ void netvsc_channel_cb(void *context) { struct netvsc_channel *nvchan = context; - /* disable interupts from host */ - hv_begin_read(&nvchan->channel->inbound); + if (napi_schedule_prep(&nvchan->napi)) { + /* disable interupts from host */ + hv_begin_read(&nvchan->channel->inbound); - napi_schedule(&nvchan->napi); + __napi_schedule(&nvchan->napi); + } } /* -- cgit v1.2.3 From 545a8e79bd1cc8774877a26275171a2ec8881c9e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:00 -0700 Subject: netvsc: use RCU to protect inner device structure The netvsc driver has an internal structure (netvsc_device) which is created when device is opened and released when device is closed. And also opened/released when MTU or number of channels change. Since this is referenced in the receive and transmit path, it is safer to use RCU to protect/prevent use after free problems. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 727762d0f13b..ab9118d620ab 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -80,8 +80,10 @@ static struct netvsc_device *alloc_net_device(void) return net_device; } -static void free_netvsc_device(struct netvsc_device *nvdev) +static void free_netvsc_device(struct rcu_head *head) { + struct netvsc_device *nvdev + = container_of(head, struct netvsc_device, rcu); int i; for (i = 0; i < VRSS_CHANNEL_MAX; i++) @@ -90,6 +92,10 @@ static void free_netvsc_device(struct netvsc_device *nvdev) kfree(nvdev); } +static void free_netvsc_device_rcu(struct netvsc_device *nvdev) +{ + call_rcu(&nvdev->rcu, free_netvsc_device); +} static struct netvsc_device *get_outbound_net_device(struct hv_device *device) { @@ -551,7 +557,7 @@ void netvsc_device_remove(struct hv_device *device) netvsc_disconnect_vsp(device); - net_device_ctx->nvdev = NULL; + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); /* * At this point, no one should be accessing net_device @@ -566,7 +572,7 @@ void netvsc_device_remove(struct hv_device *device) napi_disable(&net_device->chan_table[i].napi); /* Release all resources */ - free_netvsc_device(net_device); + free_netvsc_device_rcu(net_device); } #define RING_AVAIL_PERCENT_HIWATER 20 @@ -1322,7 +1328,7 @@ int netvsc_device_add(struct hv_device *device, */ wmb(); - net_device_ctx->nvdev = net_device; + rcu_assign_pointer(net_device_ctx->nvdev, net_device); /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device); @@ -1341,7 +1347,7 @@ close: vmbus_close(device->channel); cleanup: - free_netvsc_device(net_device); + free_netvsc_device(&net_device->rcu); return ret; } -- cgit v1.2.3 From a0be450e19d397e9ff215e32ed31bc51339b460a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:01 -0700 Subject: netvsc: uses RCU instead of removal flag It is cleaner to use RCU protected pointer (nvdev_ctx->nvdev) to indicate device is in removed state, rather than having a separate boolean flag. By using the pointer the context can be checked by static checkers and dynamic lockdep. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ab9118d620ab..1f17d948f9b0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -605,7 +605,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); - struct net_device_context *net_device_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; u16 q_idx = 0; int queue_sends; @@ -639,7 +638,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, wake_up(&net_device->wait_drain); if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && - !net_device_ctx->start_remove && (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); @@ -1326,8 +1324,6 @@ int netvsc_device_add(struct hv_device *device, /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is * populated. */ - wmb(); - rcu_assign_pointer(net_device_ctx->nvdev, net_device); /* Connect with the NetVsp */ -- cgit v1.2.3 From ebc1dcf6008e562a8f88a6b1f4a4705f4d4c4fdd Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:04 -0700 Subject: netvsc: eliminate unnecessary skb == NULL checks Since there already is a special case goto for control messages (skb == NULL) in netvsc_send, there is no need for later checks in same code path. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1f17d948f9b0..e998e2f7a619 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -706,8 +706,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, packet->page_buf_cnt; /* Add padding */ - if (skb && skb->xmit_more && remain && - !packet->cp_partial) { + if (skb->xmit_more && remain && !packet->cp_partial) { padding = net_device->pkt_align - remain; rndis_msg->msg_len += padding; packet->total_data_buflen += padding; @@ -865,9 +864,7 @@ int netvsc_send(struct hv_device *device, if (msdp->pkt) msd_len = msdp->pkt->total_data_buflen; - try_batch = (skb != NULL) && msd_len > 0 && msdp->count < - net_device->max_pkt; - + try_batch = msd_len > 0 && msdp->count < net_device->max_pkt; if (try_batch && msd_len + pktlen + net_device->pkt_align < net_device->send_section_size) { section_index = msdp->pkt->send_buf_index; @@ -877,7 +874,7 @@ int netvsc_send(struct hv_device *device, section_index = msdp->pkt->send_buf_index; packet->cp_partial = true; - } else if ((skb != NULL) && pktlen + net_device->pkt_align < + } else if (pktlen + net_device->pkt_align < net_device->send_section_size) { section_index = netvsc_get_next_send_section(net_device); if (section_index != NETVSC_INVALID_INDEX) { -- cgit v1.2.3 From bffb184247bcc783a40a0e123a9a2de3c5b28157 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 6 Apr 2017 14:59:21 -0700 Subject: netvsc: Initialize all channel related state prior to opening the channel Prior to opening the channel we should have all the state setup to handle interrupts. The current code does not do that; fix the bug. This bug can result in faults in the interrupt path. Signed-off-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e998e2f7a619..7ab06b338a14 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1289,6 +1289,21 @@ int netvsc_device_add(struct hv_device *device, */ set_channel_read_mode(device->channel, HV_CALL_ISR); + /* If we're reopening the device we may have multiple queues, fill the + * chn_table with the default channel to use it before subchannels are + * opened. + * Initialize the channel state before we open; + * we can be interrupted as soon as we open the channel. + */ + + for (i = 0; i < VRSS_CHANNEL_MAX; i++) { + struct netvsc_channel *nvchan = &net_device->chan_table[i]; + + nvchan->channel = device->channel; + netif_napi_add(ndev, &nvchan->napi, + netvsc_poll, NAPI_POLL_WEIGHT); + } + /* Open the channel */ ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, ring_size * PAGE_SIZE, NULL, 0, @@ -1303,18 +1318,6 @@ int netvsc_device_add(struct hv_device *device, /* Channel is opened */ netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); - /* If we're reopening the device we may have multiple queues, fill the - * chn_table with the default channel to use it before subchannels are - * opened. - */ - for (i = 0; i < VRSS_CHANNEL_MAX; i++) { - struct netvsc_channel *nvchan = &net_device->chan_table[i]; - - nvchan->channel = device->channel; - netif_napi_add(ndev, &nvchan->napi, - netvsc_poll, NAPI_POLL_WEIGHT); - } - /* Enable NAPI handler for init callbacks */ napi_enable(&net_device->chan_table[0].napi); -- cgit v1.2.3 From f9645430ef5f53ddf0ddd481e9f70f6fce7ccff2 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 7 Apr 2017 14:41:19 -0400 Subject: netvsc: use napi_consume_skb This allows using deferred skb freeing and with NAPI. And get buffer recycling. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 7ab06b338a14..fd21d5aab580 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -601,7 +601,8 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device, static void netvsc_send_tx_complete(struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, struct hv_device *device, - const struct vmpacket_descriptor *desc) + const struct vmpacket_descriptor *desc, + int budget) { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); @@ -628,7 +629,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, tx_stats->bytes += packet->total_bytes; u64_stats_update_end(&tx_stats->syncp); - dev_consume_skb_any(skb); + napi_consume_skb(skb, budget); } queue_sends = @@ -646,7 +647,8 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, static void netvsc_send_completion(struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, struct hv_device *device, - const struct vmpacket_descriptor *desc) + const struct vmpacket_descriptor *desc, + int budget) { struct nvsp_message *nvsp_packet = hv_pkt_data(desc); struct net_device *ndev = hv_get_drvdata(device); @@ -664,7 +666,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: netvsc_send_tx_complete(net_device, incoming_channel, - device, desc); + device, desc, budget); break; default: @@ -1174,14 +1176,16 @@ static int netvsc_process_raw_pkt(struct hv_device *device, struct vmbus_channel *channel, struct netvsc_device *net_device, struct net_device *ndev, - const struct vmpacket_descriptor *desc) + const struct vmpacket_descriptor *desc, + int budget) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct nvsp_message *nvmsg = hv_pkt_data(desc); switch (desc->type) { case VM_PKT_COMP: - netvsc_send_completion(net_device, channel, device, desc); + netvsc_send_completion(net_device, channel, device, + desc, budget); break; case VM_PKT_DATA_USING_XFER_PAGES: @@ -1230,7 +1234,7 @@ int netvsc_poll(struct napi_struct *napi, int budget) while (nvchan->desc && work_done < budget) { work_done += netvsc_process_raw_pkt(device, channel, net_device, - ndev, nvchan->desc); + ndev, nvchan->desc, budget); nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } -- cgit v1.2.3 From 73e64fa4f417b22d8d5521999a631ced8e2d442e Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 19 Apr 2017 13:53:49 -0700 Subject: netvsc: Deal with rescinded channels correctly We will not be able to send packets over a channel that has been rescinded. Make necessary adjustments so we can properly cleanup even when the channel is rescinded. This issue can be trigerred in the NIC hot-remove path. Signed-off-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index fd21d5aab580..967843ba03fa 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -135,6 +135,13 @@ static void netvsc_destroy_buf(struct hv_device *device) sizeof(struct nvsp_message), (unsigned long)revoke_packet, VM_PKT_DATA_INBAND, 0); + /* If the failure is because the channel is rescinded; + * ignore the failure since we cannot send on a rescinded + * channel. This would allow us to properly cleanup + * even when the channel is rescinded. + */ + if (device->channel->rescind) + ret = 0; /* * If we failed here, we might as well return and * have a leak rather than continue and a bugchk @@ -195,6 +202,15 @@ static void netvsc_destroy_buf(struct hv_device *device) sizeof(struct nvsp_message), (unsigned long)revoke_packet, VM_PKT_DATA_INBAND, 0); + + /* If the failure is because the channel is rescinded; + * ignore the failure since we cannot send on a rescinded + * channel. This would allow us to properly cleanup + * even when the channel is rescinded. + */ + if (device->channel->rescind) + ret = 0; + /* If we failed here, we might as well return and * have a leak rather than continue and a bugchk */ -- cgit v1.2.3 From 76bb5db5c749dfe19d779aac076133e821b859dd Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 19 Apr 2017 15:22:02 -0700 Subject: netvsc: fix use after free on module removal The NAPI data structure is embedded in the netvsc_device structure and is freed when device is closed. There is still a reference (in NAPI list) to this which causes a crash in netif_napi_del when device is removed. Fix by managing NAPI instances correctly. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/hyperv/netvsc.c') diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 967843ba03fa..f99651c03e0a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -584,8 +584,9 @@ void netvsc_device_remove(struct hv_device *device) /* Now, we can close the channel safely */ vmbus_close(device->channel); + /* And dissassociate NAPI context from device */ for (i = 0; i < net_device->num_chn; i++) - napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); /* Release all resources */ free_netvsc_device_rcu(net_device); @@ -1320,8 +1321,6 @@ int netvsc_device_add(struct hv_device *device, struct netvsc_channel *nvchan = &net_device->chan_table[i]; nvchan->channel = device->channel; - netif_napi_add(ndev, &nvchan->napi, - netvsc_poll, NAPI_POLL_WEIGHT); } /* Open the channel */ @@ -1339,6 +1338,8 @@ int netvsc_device_add(struct hv_device *device, netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); /* Enable NAPI handler for init callbacks */ + netif_napi_add(ndev, &net_device->chan_table[0].napi, + netvsc_poll, NAPI_POLL_WEIGHT); napi_enable(&net_device->chan_table[0].napi); /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is @@ -1357,7 +1358,7 @@ int netvsc_device_add(struct hv_device *device, return ret; close: - napi_disable(&net_device->chan_table[0].napi); + netif_napi_del(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ vmbus_close(device->channel); -- cgit v1.2.3