diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2018-08-29 12:26:39 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2018-08-29 12:27:21 -0700 |
| commit | 29b5e0f34359f0e1c95aa644bdd92e459c1231ee (patch) | |
| tree | 57ce4ca973b0149808ac312a2642086424489a12 /include | |
| parent | 7d2c6cfc5411207f1094e7ca5e63e711dc76d1ff (diff) | |
| parent | 58c50ae4a0b638ebbcdddf03cfa4fd36f0edeb02 (diff) | |
Merge branch 'AF_XDP-zerocopy-for-i40e'
Björn Töpel says:
====================
This patch set introduces zero-copy AF_XDP support for Intel's i40e
driver. In the first preparatory patch we also add support for
XDP_REDIRECT for zero-copy allocated frames so that XDP programs can
redirect them. This was a ToDo from the first AF_XDP zero-copy patch
set from early June. Special thanks to Alex Duyck and Jesper Dangaard
Brouer for reviewing earlier versions of this patch set.
The i40e zero-copy code is located in its own file i40e_xsk.[ch]. Note
that in the interest of time, to get an AF_XDP zero-copy implementation
out there for people to try, some code paths have been copied from the
XDP path to the zero-copy path. It is out goal to merge the two paths
in later patch sets.
In contrast to the implementation from beginning of June, this patch
set does not require any extra HW queues for AF_XDP zero-copy
TX. Instead, the XDP TX HW queue is used for both XDP_REDIRECT and
AF_XDP zero-copy TX.
Jeff, given that most of changes are in i40e, it is up to you how you
would like to route these patches. The set is tagged bpf-next, but
if taking it via the Intel driver tree is easier, let us know.
We have run some benchmarks on a dual socket system with two Broadwell
E5 2660 @ 2.0 GHz with hyperthreading turned off. Each socket has 14
cores which gives a total of 28, but only two cores are used in these
experiments. One for TR/RX and one for the user space application. The
memory is DDR4 @ 2133 MT/s (1067 MHz) and the size of each DIMM is
8192MB and with 8 of those DIMMs in the system we have 64 GB of total
memory. The compiler used is gcc (Ubuntu 7.3.0-16ubuntu3) 7.3.0. The
NIC is Intel I40E 40Gbit/s using the i40e driver.
Below are the results in Mpps of the I40E NIC benchmark runs for 64
and 1500 byte packets, generated by a commercial packet generator HW
outputing packets at full 40 Gbit/s line rate. The results are with
retpoline and all other spectre and meltdown fixes, so these results
are not comparable to the ones from the zero-copy patch set in June.
AF_XDP performance 64 byte packets.
Benchmark XDP_SKB XDP_DRV XDP_DRV with zerocopy
rxdrop 2.6 8.2 15.0
txpush 2.2 - 21.9
l2fwd 1.7 2.3 11.3
AF_XDP performance 1500 byte packets:
Benchmark XDP_SKB XDP_DRV XDP_DRV with zerocopy
rxdrop 2.0 3.3 3.3
l2fwd 1.3 1.7 3.1
XDP performance on our system as a base line:
64 byte packets:
XDP stats CPU pps issue-pps
XDP-RX CPU 16 18.4M 0
1500 byte packets:
XDP stats CPU pps issue-pps
XDP-RX CPU 16 3.3M 0
The structure of the patch set is as follows:
Patch 1: Add support for XDP_REDIRECT of zero-copy allocated frames
Patches 2-4: Preparatory patches to common xsk and net code
Patches 5-7: Preparatory patches to i40e driver code for RX
Patch 8: i40e zero-copy support for RX
Patch 9: Preparatory patch to i40e driver code for TX
Patch 10: i40e zero-copy support for TX
Patch 11: Add flags to sample application to force zero-copy/copy mode
====================
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/netdevice.h | 26 | ||||
| -rw-r--r-- | include/net/xdp.h | 6 | ||||
| -rw-r--r-- | include/net/xdp_sock.h | 43 |
3 files changed, 73 insertions, 2 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..4271f6b4e419 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n) barrier(); } +/** + * napi_if_scheduled_mark_missed - if napi is running, set the + * NAPIF_STATE_MISSED + * @n: NAPI context + * + * If napi is running, set the NAPIF_STATE_MISSED, and return true if + * NAPI is scheduled. + **/ +static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (val & NAPIF_STATE_DISABLE) + return true; + + if (!(val & NAPIF_STATE_SCHED)) + return false; + + new = val | NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return true; +} + enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, diff --git a/include/net/xdp.h b/include/net/xdp.h index 76b95256c266..0f25b3675c5c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); + /* Convert xdp_buff to xdp_frame */ static inline struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) @@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) int metasize; int headroom; - /* TODO: implement clone, copy, use "native" MEM_TYPE */ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) - return NULL; + return xdp_convert_zc_to_xdp_frame(xdp); /* Assure headroom is available for storing info */ headroom = xdp->data - xdp->data_hard_start; @@ -135,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 7161856bcf9c..56994ad1ab40 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -79,6 +79,16 @@ void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); void xsk_umem_consume_tx_done(struct xdp_umem *umem); + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); +} #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { @@ -98,6 +108,39 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return false; } + +static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) +{ + return NULL; +} + +static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +{ +} + +static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) +{ +} + +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, + u32 *len) +{ + return false; +} + +static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem) +{ +} + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return NULL; +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return 0; +} #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ |
