diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 172 |
1 files changed, 136 insertions, 36 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 61a57d134544..0b4747e81464 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -47,7 +47,8 @@ module_param(napi_tx, bool, 0644); #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1) -#define VIRTIO_XDP_FLAG BIT(0) +#define VIRTIO_XDP_FLAG BIT(0) +#define VIRTIO_ORPHAN_FLAG BIT(1) /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled @@ -85,6 +86,8 @@ struct virtnet_stat_desc { struct virtnet_sq_free_stats { u64 packets; u64 bytes; + u64 napi_packets; + u64 napi_bytes; }; struct virtnet_sq_stats { @@ -506,29 +509,50 @@ static struct xdp_frame *ptr_to_xdp(void *ptr) return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); } -static void __free_old_xmit(struct send_queue *sq, bool in_napi, - struct virtnet_sq_free_stats *stats) +static bool is_orphan_skb(void *ptr) +{ + return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG; +} + +static void *skb_to_ptr(struct sk_buff *skb, bool orphan) +{ + return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0)); +} + +static struct sk_buff *ptr_to_skb(void *ptr) +{ + return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG); +} + +static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, + bool in_napi, struct virtnet_sq_free_stats *stats) { unsigned int len; void *ptr; while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { - ++stats->packets; - if (!is_xdp_frame(ptr)) { - struct sk_buff *skb = ptr; + struct sk_buff *skb = ptr_to_skb(ptr); pr_debug("Sent skb %p\n", skb); - stats->bytes += skb->len; + if (is_orphan_skb(ptr)) { + stats->packets++; + stats->bytes += skb->len; + } else { + stats->napi_packets++; + stats->napi_bytes += skb->len; + } napi_consume_skb(skb, in_napi); } else { struct xdp_frame *frame = ptr_to_xdp(ptr); + stats->packets++; stats->bytes += xdp_get_frame_len(frame); xdp_return_frame(frame); } } + netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); } /* Converting between virtqueue no. and kernel tx/rx queue no. @@ -955,21 +979,22 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) virtnet_rq_free_buf(vi, rq, buf); } -static void free_old_xmit(struct send_queue *sq, bool in_napi) +static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, + bool in_napi) { struct virtnet_sq_free_stats stats = {0}; - __free_old_xmit(sq, in_napi, &stats); + __free_old_xmit(sq, txq, in_napi, &stats); /* Avoid overhead when no packets have been processed * happens when called speculatively from start_xmit. */ - if (!stats.packets) + if (!stats.packets && !stats.napi_packets) return; u64_stats_update_begin(&sq->stats.syncp); - u64_stats_add(&sq->stats.bytes, stats.bytes); - u64_stats_add(&sq->stats.packets, stats.packets); + u64_stats_add(&sq->stats.bytes, stats.bytes + stats.napi_bytes); + u64_stats_add(&sq->stats.packets, stats.packets + stats.napi_packets); u64_stats_update_end(&sq->stats.syncp); } @@ -1003,7 +1028,9 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, * early means 16 slots are typically wasted. */ if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { - netif_stop_subqueue(dev, qnum); + struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); + + netif_tx_stop_queue(txq); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.stop); u64_stats_update_end(&sq->stats.syncp); @@ -1012,7 +1039,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, virtqueue_napi_schedule(&sq->napi, sq->vq); } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ - free_old_xmit(sq, false); + free_old_xmit(sq, txq, false); if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); @@ -1138,7 +1165,8 @@ static int virtnet_xdp_xmit(struct net_device *dev, } /* Free up any pending old buffers before queueing new ones. */ - __free_old_xmit(sq, false, &stats); + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), + false, &stats); for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; @@ -1360,6 +1388,10 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + goto err_xdp; + buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -1677,6 +1709,10 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, if (unlikely(hdr->hdr.gso_type)) return NULL; + /* Partially checksummed packets must be dropped. */ + if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return NULL; + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers * with headroom may add hole in truesize, which * make their length exceed PAGE_SIZE. So we disabled the @@ -1943,6 +1979,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, struct net_device *dev = vi->dev; struct sk_buff *skb; struct virtio_net_common_hdr *hdr; + u8 flags; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -1951,6 +1988,15 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } + /* 1. Save the flags early, as the XDP program might overwrite them. + * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID + * stay valid after XDP processing. + * 2. XDP doesn't work with partially checksummed packets (refer to + * virtnet_xdp_set()), so packets marked as + * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. + */ + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; + if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); @@ -1966,7 +2012,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) + if (flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; if (virtio_net_hdr_to_skb(skb, &hdr->hdr, @@ -2313,7 +2359,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) do { virtqueue_disable_cb(sq->vq); - free_old_xmit(sq, true); + free_old_xmit(sq, txq, true); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { @@ -2336,12 +2382,13 @@ static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue if (!rq->packets_in_napi) return; - u64_stats_update_begin(&rq->stats.syncp); + /* Don't need protection when fetching stats, since fetcher and + * updater of the stats are in same context + */ dim_update_sample(rq->calls, u64_stats_read(&rq->stats.packets), u64_stats_read(&rq->stats.bytes), &cur_sample); - u64_stats_update_end(&rq->stats.syncp); net_dim(&rq->dim, cur_sample); rq->packets_in_napi = 0; @@ -2412,6 +2459,7 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) goto err_xdp_reg_mem_model; virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); + netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); return 0; @@ -2421,6 +2469,13 @@ err_xdp_reg_mem_model: return err; } +static void virtnet_cancel_dim(struct virtnet_info *vi, struct dim *dim) +{ + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return; + net_dim_work_cancel(dim); +} + static int virtnet_open(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); @@ -2447,7 +2502,7 @@ err_enable_qp: for (i--; i >= 0; i--) { virtnet_disable_queue_pair(vi, i); - cancel_work_sync(&vi->rq[i].dim.work); + virtnet_cancel_dim(vi, &vi->rq[i].dim); } return err; @@ -2471,7 +2526,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); virtqueue_disable_cb(sq->vq); - free_old_xmit(sq, true); + free_old_xmit(sq, txq, true); if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { if (netif_tx_queue_stopped(txq)) { @@ -2505,7 +2560,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) return 0; } -static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) +static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) { struct virtio_net_hdr_mrg_rxbuf *hdr; const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; @@ -2549,7 +2604,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) return num_sg; num_sg++; } - return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); + return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, + skb_to_ptr(skb, orphan), GFP_ATOMIC); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -2559,24 +2615,25 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) struct send_queue *sq = &vi->sq[qnum]; int err; struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); - bool kick = !netdev_xmit_more(); + bool xmit_more = netdev_xmit_more(); bool use_napi = sq->napi.weight; + bool kick; /* Free up any pending old buffers before queueing new ones. */ do { if (use_napi) virtqueue_disable_cb(sq->vq); - free_old_xmit(sq, false); + free_old_xmit(sq, txq, false); - } while (use_napi && kick && + } while (use_napi && !xmit_more && unlikely(!virtqueue_enable_cb_delayed(sq->vq))); /* timestamp packet in software */ skb_tx_timestamp(skb); /* Try to transmit */ - err = xmit_skb(sq, skb); + err = xmit_skb(sq, skb, !use_napi); /* This should not happen! */ if (unlikely(err)) { @@ -2598,7 +2655,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) check_sq_full_and_disable(vi, dev, sq); - if (kick || netif_xmit_stopped(txq)) { + kick = use_napi ? __netdev_tx_sent_queue(txq, skb->len, xmit_more) : + !xmit_more || netif_xmit_stopped(txq); + if (kick) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.kicks); @@ -2619,7 +2678,7 @@ static int virtnet_rx_resize(struct virtnet_info *vi, if (running) { napi_disable(&rq->napi); - cancel_work_sync(&rq->dim.work); + virtnet_cancel_dim(vi, &rq->dim); } err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf); @@ -2880,7 +2939,7 @@ static int virtnet_close(struct net_device *dev) for (i = 0; i < vi->max_queue_pairs; i++) { virtnet_disable_queue_pair(vi, i); - cancel_work_sync(&vi->rq[i].dim.work); + virtnet_cancel_dim(vi, &vi->rq[i].dim); } return 0; @@ -4406,7 +4465,7 @@ static void virtnet_rx_dim_work(struct work_struct *work) if (!rq->dim_enabled) goto out; - update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + update_moder = net_dim_get_rx_irq_moder(dev, dim); if (update_moder.usec != rq->intr_coal.max_usecs || update_moder.pkts != rq->intr_coal.max_packets) { err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum, @@ -5106,6 +5165,36 @@ static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start))); } +static int virtnet_init_irq_moder(struct virtnet_info *vi) +{ + u8 profile_flags = 0, coal_flags = 0; + int ret, i; + + profile_flags |= DIM_PROFILE_RX; + coal_flags |= DIM_COALESCE_USEC | DIM_COALESCE_PKTS; + ret = net_dim_init_irq_moder(vi->dev, profile_flags, coal_flags, + DIM_CQ_PERIOD_MODE_START_FROM_EQE, + 0, virtnet_rx_dim_work, NULL); + + if (ret) + return ret; + + for (i = 0; i < vi->max_queue_pairs; i++) + net_dim_setting(vi->dev, &vi->rq[i].dim, false); + + return 0; +} + +static void virtnet_free_irq_moder(struct virtnet_info *vi) +{ + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return; + + rtnl_lock(); + net_dim_free_irq_moder(vi->dev); + rtnl_unlock(); +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -5385,9 +5474,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) virtnet_poll_tx, napi_tx ? napi_weight : 0); - INIT_WORK(&vi->rq[i].dim.work, virtnet_rx_dim_work); - vi->rq[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); @@ -5666,8 +5752,16 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } - if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) - dev->features |= NETIF_F_RXCSUM; + + /* 1. With VIRTIO_NET_F_GUEST_CSUM negotiation, the driver doesn't + * need to calculate checksums for partially checksummed packets, + * as they're considered valid by the upper layer. + * 2. Without VIRTIO_NET_F_GUEST_CSUM negotiation, the driver only + * receives fully checksummed packets. The device may assist in + * validating these packets' checksums, so the driver won't have to. + */ + dev->features |= NETIF_F_RXCSUM; + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_GRO_HW; @@ -5808,6 +5902,10 @@ static int virtnet_probe(struct virtio_device *vdev) for (i = 0; i < vi->max_queue_pairs; i++) if (vi->sq[i].napi.weight) vi->sq[i].intr_coal.max_packets = 1; + + err = virtnet_init_irq_moder(vi); + if (err) + goto free; } #ifdef CONFIG_SYSFS @@ -5959,6 +6057,8 @@ static void virtnet_remove(struct virtio_device *vdev) disable_rx_mode_work(vi); flush_work(&vi->rx_mode_work); + virtnet_free_irq_moder(vi); + unregister_netdev(vi->dev); net_failover_destroy(vi->failover); |