From 1c69fc2a9012e160c8d459f63df74a6b01db8322 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 6 Feb 2008 21:07:54 -0800 Subject: IB/mlx4: Consolidate code to get an entry from a struct mlx4_buf We use struct mlx4_buf for kernel QP, CQ and SRQ buffers, and the code to look up an entry is duplicated in get_cqe_from_buf() and the QP and SRQ versions of get_wqe(). Factor this out into mlx4_buf_offset(). This will also make it easier to switch over to using vmap() for buffers. Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 222815d91c40..a0afa7511a30 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -308,6 +308,14 @@ struct mlx4_init_port_param { int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); +static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) +{ + if (buf->nbufs == 1) + return buf->u.direct.buf + offset; + else + return buf->u.page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); -- cgit v1.2.3 From 313abe55a87bc10e55d00f337d609e17ad5f8c9a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 28 Jan 2008 10:40:51 +0200 Subject: mlx4_core: For 64-bit systems, vmap() kernel queue buffers Since kernel virtual memory is not a problem on 64-bit systems, there is no reason to use our own 2-layer page mapping scheme for large kernel queue buffers on such systems. Instead, map the page list to a single virtually contiguous buffer with vmap(), so that can we access buffer memory via direct indexing. Signed-off-by: Michael S. Tsirkin Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a0afa7511a30..631607788f83 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -189,7 +189,7 @@ struct mlx4_buf_list { }; struct mlx4_buf { - union { + struct { struct mlx4_buf_list direct; struct mlx4_buf_list *page_list; } u; @@ -310,7 +310,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { - if (buf->nbufs == 1) + if (BITS_PER_LONG == 64 || buf->nbufs == 1) return buf->u.direct.buf + offset; else return buf->u.page_list[offset >> PAGE_SHIFT].buf + -- cgit v1.2.3 From b57aacfa7a95328f469d0360e49289b023c47e9e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 6 Feb 2008 21:17:59 -0800 Subject: mlx4_core: Clean up struct mlx4_buf Now that struct mlx4_buf.u is a struct instead of a union because of the vmap() changes, there's no point in having a struct at all. So move .direct and .page_list directly into struct mlx4_buf and get rid of a bunch of unnecessary ".u"s. Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 631607788f83..4210ac4a8bcd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -189,10 +189,8 @@ struct mlx4_buf_list { }; struct mlx4_buf { - struct { - struct mlx4_buf_list direct; - struct mlx4_buf_list *page_list; - } u; + struct mlx4_buf_list direct; + struct mlx4_buf_list *page_list; int nbufs; int npages; int page_shift; @@ -311,9 +309,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return buf->u.direct.buf + offset; + return buf->direct.buf + offset; else - return buf->u.page_list[offset >> PAGE_SHIFT].buf + + return buf->page_list[offset >> PAGE_SHIFT].buf + (offset & (PAGE_SIZE - 1)); } -- cgit v1.2.3 From ea54b10c7773007e173da31fe7adcc049da33331 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 28 Jan 2008 10:40:59 +0200 Subject: IB/mlx4: Use multiple WQ blocks to post smaller send WQEs ConnectX HCA supports shrinking WQEs, so that a single work request can be made of multiple units of wqe_shift. This way, WRs can differ in size, and do not have to be a power of 2 in size, saving memory and speeding up send WR posting. Unfortunately, if we do this then the wqe_index field in CQEs can't be used to look up the WR ID anymore, so our implementation does this only if selective signaling is off. Further, on 32-bit platforms, we can't use vmap() to make the QP buffer virtually contigious. Thus we have to use constant-sized WRs to make sure a WR is always fully within a single page-sized chunk. Finally, we use WRs with the NOP opcode to avoid wrapping around the queue buffer in the middle of posting a WR, and we set the NoErrorCompletion bit to avoid getting completions with error for NOP WRs. However, NEC is only supported starting with firmware 2.2.232, so we use constant-sized WRs for older firmware. And, since MLX QPs only support SEND, we use constant-sized WRs in this case. When stamping during NOP posting, do stamping following setting of the NOP WQE valid bit. Signed-off-by: Michael S. Tsirkin Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 5 +++++ include/linux/mlx4/qp.h | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4210ac4a8bcd..6cdf813cd478 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -133,6 +133,11 @@ enum { MLX4_STAT_RATE_OFFSET = 5 }; +static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) +{ + return (major << 32) | (minor << 16) | subminor; +} + struct mlx4_caps { u64 fw_ver; int num_ports; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 3968b943259a..09a2230923f2 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -154,7 +154,11 @@ struct mlx4_qp_context { u32 reserved5[10]; }; +/* Which firmware version adds support for NEC (NoErrorCompletion) bit */ +#define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) + enum { + MLX4_WQE_CTRL_NEC = 1 << 29, MLX4_WQE_CTRL_FENCE = 1 << 6, MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, MLX4_WQE_CTRL_SOLICITED = 1 << 1, -- cgit v1.2.3