From 02d92f7903647119e125b24f5470f96cee0d4b4b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 19 Jan 2018 16:13:01 -0800 Subject: net/mlx5: CQ Database per EQ Before this patch the driver had one CQ database protected via one spinlock, this spinlock is meant to synchronize between CQ adding/removing and CQ IRQ interrupt handling. On a system with large number of CPUs and on a work load that requires lots of interrupts, this global spinlock becomes a very nasty hotspot and introduces a contention between the active cores, which will significantly hurt performance and becomes a bottleneck that prevents seamless cpu scaling. To solve this we simply move the CQ database and its spinlock to be per EQ (IRQ), thus per core. Tested with: system: 2 sockets, 14 cores per socket, hyperthreading, 2x14x2=56 cores netperf command: ./super_netperf 200 -P 0 -t TCP_RR -H -l 30 -- -r 300,300 -o -s 1M,1M -S 1M,1M WITHOUT THIS PATCH: Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: all 4.32 0.00 36.15 0.09 0.00 34.02 0.00 0.00 0.00 25.41 Samples: 2M of event 'cycles:pp', Event count (approx.): 1554616897271 Overhead Command Shared Object Symbol + 14.28% swapper [kernel.vmlinux] [k] intel_idle + 12.25% swapper [kernel.vmlinux] [k] queued_spin_lock_slowpath + 10.29% netserver [kernel.vmlinux] [k] queued_spin_lock_slowpath + 1.32% netserver [kernel.vmlinux] [k] mlx5e_xmit WITH THIS PATCH: Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: all 4.27 0.00 34.31 0.01 0.00 18.71 0.00 0.00 0.00 42.69 Samples: 2M of event 'cycles:pp', Event count (approx.): 1498132937483 Overhead Command Shared Object Symbol + 23.33% swapper [kernel.vmlinux] [k] intel_idle + 1.69% netserver [kernel.vmlinux] [k] mlx5e_xmit Tested-by: Song Liu Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/cq.h | 3 +-- include/linux/mlx5/driver.h | 22 +++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 48c181a2acc9..06ba425a6ad7 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -60,6 +60,7 @@ struct mlx5_core_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; + struct mlx5_eq *eq; }; @@ -171,8 +172,6 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL); } -int mlx5_init_cq_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ed79a8a8318..96e003db2bcd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -375,8 +375,15 @@ struct mlx5_eq_pagefault { mempool_t *pool; }; +struct mlx5_cq_table { + /* protect radix tree */ + spinlock_t lock; + struct radix_tree_root tree; +}; + struct mlx5_eq { struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; __be32 __iomem *doorbell; u32 cons_index; struct mlx5_buf buf; @@ -526,13 +533,6 @@ struct mlx5_core_health { struct delayed_work recover_work; }; -struct mlx5_cq_table { - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; -}; - struct mlx5_qp_table { /* protect radix tree */ @@ -654,10 +654,6 @@ struct mlx5_priv { struct dentry *cmdif_debugfs; /* end: qp staff */ - /* start: cq staff */ - struct mlx5_cq_table cq_table; - /* end: cq staff */ - /* start: mkey staff */ struct mlx5_mkey_table mkey_table; /* end: mkey staff */ @@ -1053,12 +1049,12 @@ int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); +void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); +void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, enum mlx5_eq_type type); -- cgit v1.2.3 From f105b45bf77ced96e516e1cd771c41bb7e8c830b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 1 Feb 2018 03:32:00 -0800 Subject: net/mlx5: CQ hold/put API Now as the CQ table is per EQ, add an API to hold/put CQ to be used from eq.c in downstream patch. Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/cq.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 06ba425a6ad7..445ad194e0fe 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -172,6 +172,17 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL); } +static inline void mlx5_cq_hold(struct mlx5_core_cq *cq) +{ + refcount_inc(&cq->refcount); +} + +static inline void mlx5_cq_put(struct mlx5_core_cq *cq) +{ + if (refcount_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From 3ac7afdbcf243d6c79c1569d9e29aef0096e4743 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 1 Feb 2018 04:37:07 -0800 Subject: net/mlx5: Move CQ completion and event forwarding logic to eq.c Since CQ tree is now per EQ, CQ completion and event forwarding became specific implementation of EQ logic, this patch moves that logic to eq.c and makes those functions static. Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96e003db2bcd..09e2f3e8753c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1049,12 +1049,10 @@ int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, enum mlx5_eq_type type); -- cgit v1.2.3 From 3ec5693b17314b58977ba3c8d720d1f9cfef39f8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 1 Feb 2018 05:42:06 -0800 Subject: net/mlx5: Remove redundant EQ API exports EQ structure and API is private to mlx5_core driver only, external drivers should not have access or the means to manipulate EQ objects. Remove redundant exports and move API functions out of the linux/mlx5 include directory into the driver's mlx5_core.h private include file. Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/driver.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 09e2f3e8753c..2860a253275b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1045,20 +1045,11 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_start_eqs(struct mlx5_core_dev *dev); -void mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); @@ -1070,14 +1061,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); -int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node); -- cgit v1.2.3 From 388ca8be00370db132464e27f745b8a0add19fcb Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Tue, 2 Jan 2018 16:08:06 +0200 Subject: IB/mlx5: Implement fragmented completion queue (CQ) The current implementation of create CQ requires contiguous memory, such requirement is problematic once the memory is fragmented or the system is low in memory, it causes for failures in dma_zalloc_coherent(). This patch implements new scheme of fragmented CQ to overcome this issue by introducing new type: 'struct mlx5_frag_buf_ctrl' to allocate fragmented buffers, rather than contiguous ones. Base the Completion Queues (CQs) on this new fragmented buffer. It fixes following crashes: kworker/29:0: page allocation failure: order:6, mode:0x80d0 CPU: 29 PID: 8374 Comm: kworker/29:0 Tainted: G OE 3.10.0 Workqueue: ib_cm cm_work_handler [ib_cm] Call Trace: [<>] dump_stack+0x19/0x1b [<>] warn_alloc_failed+0x110/0x180 [<>] __alloc_pages_slowpath+0x6b7/0x725 [<>] __alloc_pages_nodemask+0x405/0x420 [<>] dma_generic_alloc_coherent+0x8f/0x140 [<>] x86_swiotlb_alloc_coherent+0x21/0x50 [<>] mlx5_dma_zalloc_coherent_node+0xad/0x110 [mlx5_core] [<>] ? mlx5_db_alloc_node+0x69/0x1b0 [mlx5_core] [<>] mlx5_buf_alloc_node+0x3e/0xa0 [mlx5_core] [<>] mlx5_buf_alloc+0x14/0x20 [mlx5_core] [<>] create_cq_kernel+0x90/0x1f0 [mlx5_ib] [<>] mlx5_ib_create_cq+0x3b0/0x4e0 [mlx5_ib] Signed-off-by: Yonatan Cohen Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 51 ++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2860a253275b..bfea26af6de5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -345,13 +345,6 @@ struct mlx5_buf_list { dma_addr_t map; }; -struct mlx5_buf { - struct mlx5_buf_list direct; - int npages; - int size; - u8 page_shift; -}; - struct mlx5_frag_buf { struct mlx5_buf_list *frags; int npages; @@ -359,6 +352,15 @@ struct mlx5_frag_buf { u8 page_shift; }; +struct mlx5_frag_buf_ctrl { + struct mlx5_frag_buf frag_buf; + u32 sz_m1; + u32 frag_sz_m1; + u8 log_sz; + u8 log_stride; + u8 log_frag_strides; +}; + struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -386,7 +388,7 @@ struct mlx5_eq { struct mlx5_cq_table cq_table; __be32 __iomem *doorbell; u32 cons_index; - struct mlx5_buf buf; + struct mlx5_frag_buf buf; int size; unsigned int irqn; u8 eqn; @@ -932,9 +934,9 @@ struct mlx5_hca_vport_context { bool grh_required; }; -static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) +static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset) { - return buf->direct.buf + offset; + return buf->frags->buf + offset; } #define STRUCT_FIELD(header, field) \ @@ -973,6 +975,25 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } +static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, + void *cqc) +{ + fbc->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz); + fbc->log_sz = MLX5_GET(cqc, cqc, log_cq_size); + fbc->sz_m1 = (1 << fbc->log_sz) - 1; + fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride; + fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1; +} + +static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, + u32 ix) +{ + unsigned int frag = (ix >> fbc->log_frag_strides); + + return fbc->frag_buf.frags[frag].buf + + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); +} + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); @@ -998,9 +1019,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_buf *buf, int node); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); + struct mlx5_frag_buf *buf, int node); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, + int size, struct mlx5_frag_buf *buf); +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); @@ -1045,7 +1067,8 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); + +void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); -- cgit v1.2.3