summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-01-02 15:51:21 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-02 15:51:21 -0500
commit525dfa2cdce4f5ab76251b5e57ebabf4f2dfc40c (patch)
tree2e7e1a80aa9aaaf328e1b1c64821bc76e27a487d /include/linux
parent85eb018fec29eae60d20f6d04af854308ffb3a05 (diff)
parentaa8e08d2f523501c40b0e70f1c4ecacb97195931 (diff)
Merge branch 'mlx5-odp'
Saeed Mahameed says: ==================== Mellanox mlx5 core and ODP updates 2017-01-01 The following eleven patches mainly come from Artemy Kovalyov who expanded mlx5 on-demand-paging (ODP) support. In addition there are three cleanup patches which don't change any functionality, but are needed to align codebase prior accepting other patches. Memory region (MR) in IB can be huge and ODP (on-demand paging) technique allows to use unpinned memory, which can be consumed and released on demand. This allows to applications do not pin down the underlying physical pages of the address space, and save from them need to track the validity of the mappings. Rather, the HCA requests the latest translations from the OS when pages are not present, and the OS invalidates translations which are no longer valid due to either non-present pages or mapping changes. In existing ODP implementation applications is needed to register memory buffers for communication, though registered memory regions need not have valid mappings at registration time. This patch set performs the following steps to expand current ODP implementation: 1. It refactors UMR to support large regions, by introducing generic function to perform HCA translation table modifications. This function supports both atomic and process contexts and is not limited by number of modified entries. This function allows to enable reallocated memory regions of arbitrary size, so adding MR cache buckets to support up to 16GB MRs. 2. It changes page fault event format and refactor page faults logic together with addition of atomic support. 3. It prepares mlx5 core code to support implicit registration with simplified and relaxed semantics. Implicit ODP semantics allows to applications provide special memory key that represents their complete address space. Thus all IO accesses referencing to this key (with proper access rights associated with the key) wouldn't need not register any virtual address range. Thanks, Artemy, Ilya and Leon v1->v2: - Don't use 'inline' in .c files ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mlx5/device.h6
-rw-r--r--include/linux/mlx5/driver.h105
-rw-r--r--include/linux/mlx5/mlx5_ifc.h31
-rw-r--r--include/linux/mlx5/qp.h76
4 files changed, 150 insertions, 68 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9f489365b3d3..3ccaeff15a80 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault {
__be16 wqe_index;
u16 reserved2;
__be16 packet_length;
- u8 reserved3[12];
+ __be32 token;
+ u8 reserved4[8];
+ __be32 pftype_wq;
} __packed wqe;
struct {
__be32 r_key;
@@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault {
__be16 packet_length;
__be32 rdma_op_len;
__be64 rdma_va;
+ __be32 pftype_token;
} __packed rdma;
} __packed;
- __be32 flags_qpn;
} __packed;
struct mlx5_eqe_vport_change {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0ae55361e674..cfa49bca009c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -42,6 +42,7 @@
#include <linux/vmalloc.h>
#include <linux/radix-tree.h>
#include <linux/workqueue.h>
+#include <linux/mempool.h>
#include <linux/interrupt.h>
#include <linux/mlx5/device.h>
@@ -83,6 +84,7 @@ enum {
MLX5_EQ_VEC_PAGES = 0,
MLX5_EQ_VEC_CMD = 1,
MLX5_EQ_VEC_ASYNC = 2,
+ MLX5_EQ_VEC_PFAULT = 3,
MLX5_EQ_VEC_COMP_BASE,
};
@@ -178,6 +180,14 @@ enum mlx5_port_status {
MLX5_PORT_DOWN = 2,
};
+enum mlx5_eq_type {
+ MLX5_EQ_TYPE_COMP,
+ MLX5_EQ_TYPE_ASYNC,
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ MLX5_EQ_TYPE_PF,
+#endif
+};
+
struct mlx5_uuar_info {
struct mlx5_uar *uars;
int num_uars;
@@ -333,6 +343,14 @@ struct mlx5_eq_tasklet {
spinlock_t lock;
};
+struct mlx5_eq_pagefault {
+ struct work_struct work;
+ /* Pagefaults lock */
+ spinlock_t lock;
+ struct workqueue_struct *wq;
+ mempool_t *pool;
+};
+
struct mlx5_eq {
struct mlx5_core_dev *dev;
__be32 __iomem *doorbell;
@@ -346,7 +364,13 @@ struct mlx5_eq {
struct list_head list;
int index;
struct mlx5_rsc_debug *dbg;
- struct mlx5_eq_tasklet tasklet_ctx;
+ enum mlx5_eq_type type;
+ union {
+ struct mlx5_eq_tasklet tasklet_ctx;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ struct mlx5_eq_pagefault pf_ctx;
+#endif
+ };
};
struct mlx5_core_psv {
@@ -370,13 +394,21 @@ struct mlx5_core_sig_ctx {
u32 sigerr_count;
};
+enum {
+ MLX5_MKEY_MR = 1,
+ MLX5_MKEY_MW,
+};
+
struct mlx5_core_mkey {
u64 iova;
u64 size;
u32 key;
u32 pd;
+ u32 type;
};
+#define MLX5_24BIT_MASK ((1 << 24) - 1)
+
enum mlx5_res_type {
MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
@@ -411,6 +443,9 @@ struct mlx5_eq_table {
struct mlx5_eq pages_eq;
struct mlx5_eq async_eq;
struct mlx5_eq cmd_eq;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ struct mlx5_eq pfault_eq;
+#endif
int num_comp_vectors;
/* protect EQs list
*/
@@ -497,6 +532,7 @@ struct mlx5_fc_stats {
struct mlx5_eswitch;
struct mlx5_lag;
+struct mlx5_pagefault;
struct mlx5_rl_entry {
u32 rate;
@@ -601,6 +637,14 @@ struct mlx5_priv {
struct mlx5_rl_table rl_table;
struct mlx5_port_module_event_stats pme_stats;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ void (*pfault)(struct mlx5_core_dev *dev,
+ void *context,
+ struct mlx5_pagefault *pfault);
+ void *pfault_ctx;
+ struct srcu_struct pfault_srcu;
+#endif
};
enum mlx5_device_state {
@@ -619,6 +663,50 @@ enum mlx5_pci_status {
MLX5_PCI_STATUS_ENABLED,
};
+enum mlx5_pagefault_type_flags {
+ MLX5_PFAULT_REQUESTOR = 1 << 0,
+ MLX5_PFAULT_WRITE = 1 << 1,
+ MLX5_PFAULT_RDMA = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+ u32 bytes_committed;
+ u32 token;
+ u8 event_subtype;
+ u8 type;
+ union {
+ /* Initiator or send message responder pagefault details. */
+ struct {
+ /* Received packet size, only valid for responders. */
+ u32 packet_size;
+ /*
+ * Number of resource holding WQE, depends on type.
+ */
+ u32 wq_num;
+ /*
+ * WQE index. Refers to either the send queue or
+ * receive queue, according to event_subtype.
+ */
+ u16 wqe_index;
+ } wqe;
+ /* RDMA responder pagefault details */
+ struct {
+ u32 r_key;
+ /*
+ * Received packet size, minimal size page fault
+ * resolution required for forward progress.
+ */
+ u32 packet_size;
+ u32 rdma_op_len;
+ u64 rdma_va;
+ } rdma;
+ };
+
+ struct mlx5_eq *eq;
+ struct work_struct work;
+};
+
struct mlx5_td {
struct list_head tirs_list;
u32 tdn;
@@ -879,15 +967,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
-#endif
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name, struct mlx5_uar *uar);
+ int nent, u64 mask, const char *name,
+ struct mlx5_uar *uar, enum mlx5_eq_type type);
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
int mlx5_start_eqs(struct mlx5_core_dev *dev);
int mlx5_stop_eqs(struct mlx5_core_dev *dev);
@@ -926,6 +1012,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
+ u32 wq_num, u8 type, int error);
+#endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
@@ -959,7 +1049,7 @@ enum {
};
enum {
- MAX_MR_CACHE_ENTRIES = 16,
+ MAX_MR_CACHE_ENTRIES = 21,
};
enum {
@@ -974,6 +1064,9 @@ struct mlx5_interface {
void (*detach)(struct mlx5_core_dev *dev, void *context);
void (*event)(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param);
+ void (*pfault)(struct mlx5_core_dev *dev,
+ void *context,
+ struct mlx5_pagefault *pfault);
void * (*get_dev)(void *context);
int protocol;
struct list_head list;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 57bec544e20a..15f896781966 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -328,7 +328,7 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 receive[0x1];
u8 write[0x1];
u8 read[0x1];
- u8 reserved_at_4[0x1];
+ u8 atomic[0x1];
u8 srq_receive[0x1];
u8 reserved_at_6[0x1a];
};
@@ -782,11 +782,12 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_eq[0x4];
u8 max_indirection[0x8];
- u8 reserved_at_108[0x1];
+ u8 fixed_buffer_size[0x1];
u8 log_max_mrw_sz[0x7];
u8 reserved_at_110[0x2];
u8 log_max_bsf_list_size[0x6];
- u8 reserved_at_118[0x2];
+ u8 umr_extended_translation_offset[0x1];
+ u8 null_mkey[0x1];
u8 log_max_klm_list_size[0x6];
u8 reserved_at_120[0xa];
@@ -826,9 +827,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_1a9[0x2];
u8 local_ca_ack_delay[0x5];
u8 port_module_event[0x1];
- u8 reserved_at_1b0[0x1];
+ u8 reserved_at_1b1[0x1];
u8 ports_check[0x1];
- u8 reserved_at_1b2[0x1];
+ u8 reserved_at_1b3[0x1];
u8 disable_link_up[0x1];
u8 beacon_led[0x1];
u8 port_type[0x2];
@@ -858,7 +859,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 compact_address_vector[0x1];
u8 striding_rq[0x1];
- u8 reserved_at_201[0x2];
+ u8 reserved_at_202[0x2];
u8 ipoib_basic_offloads[0x1];
u8 reserved_at_205[0xa];
u8 drain_sigerr[0x1];
@@ -1009,10 +1010,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 rndv_offload_rc[0x1];
u8 rndv_offload_dc[0x1];
u8 log_tag_matching_list_sz[0x5];
- u8 reserved_at_5e8[0x3];
+ u8 reserved_at_5f8[0x3];
u8 log_max_xrq[0x5];
- u8 reserved_at_5f0[0x200];
+ u8 reserved_at_600[0x200];
};
enum mlx5_flow_destination_type {
@@ -2569,6 +2570,7 @@ enum {
MLX5_MKC_ACCESS_MODE_PA = 0x0,
MLX5_MKC_ACCESS_MODE_MTT = 0x1,
MLX5_MKC_ACCESS_MODE_KLMS = 0x2,
+ MLX5_MKC_ACCESS_MODE_KSM = 0x3,
};
struct mlx5_ifc_mkc_bits {
@@ -3677,6 +3679,10 @@ struct mlx5_ifc_query_special_contexts_out_bits {
u8 dump_fill_mkey[0x20];
u8 resd_lkey[0x20];
+
+ u8 null_mkey[0x20];
+
+ u8 reserved_at_a0[0x60];
};
struct mlx5_ifc_query_special_contexts_in_bits {
@@ -4769,12 +4775,11 @@ struct mlx5_ifc_page_fault_resume_in_bits {
u8 error[0x1];
u8 reserved_at_41[0x4];
- u8 rdma[0x1];
- u8 read_write[0x1];
- u8 req_res[0x1];
- u8 qpn[0x18];
+ u8 page_fault_type[0x3];
+ u8 wq_number[0x18];
- u8 reserved_at_60[0x20];
+ u8 reserved_at_60[0x8];
+ u8 token[0x18];
};
struct mlx5_ifc_nop_out_bits {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 0aacb2a7480d..219c699c17b7 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,9 +50,6 @@
#define MLX5_BSF_APPTAG_ESCAPE 0x1
#define MLX5_BSF_APPREF_ESCAPE 0x2
-#define MLX5_QPN_BITS 24
-#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
-
enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX5_QP_OPTPAR_RRE = 1 << 1,
@@ -215,6 +212,7 @@ struct mlx5_wqe_ctrl_seg {
#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
+#define MLX5_WQE_AV_EXT 0x80000000
enum {
MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4,
@@ -245,6 +243,23 @@ struct mlx5_wqe_masked_atomic_seg {
__be64 compare_mask;
};
+struct mlx5_base_av {
+ union {
+ struct {
+ __be32 qkey;
+ __be32 reserved;
+ } qkey;
+ __be64 dc_key;
+ } key;
+ __be32 dqp_dct;
+ u8 stat_rate_sl;
+ u8 fl_mlid;
+ union {
+ __be16 rlid;
+ __be16 udp_sport;
+ };
+};
+
struct mlx5_av {
union {
struct {
@@ -292,10 +307,14 @@ struct mlx5_wqe_data_seg {
struct mlx5_wqe_umr_ctrl_seg {
u8 flags;
u8 rsvd0[3];
- __be16 klm_octowords;
- __be16 bsf_octowords;
+ __be16 xlt_octowords;
+ union {
+ __be16 xlt_offset;
+ __be16 bsf_octowords;
+ };
__be64 mkey_mask;
- u8 rsvd1[32];
+ __be32 xlt_offset_47_16;
+ u8 rsvd1[28];
};
struct mlx5_seg_set_psv {
@@ -389,6 +408,10 @@ struct mlx5_bsf {
struct mlx5_bsf_inl m_inl;
};
+struct mlx5_mtt {
+ __be64 ptag;
+};
+
struct mlx5_klm {
__be32 bcount;
__be32 key;
@@ -410,46 +433,9 @@ struct mlx5_stride_block_ctrl_seg {
__be16 num_entries;
};
-enum mlx5_pagefault_flags {
- MLX5_PFAULT_REQUESTOR = 1 << 0,
- MLX5_PFAULT_WRITE = 1 << 1,
- MLX5_PFAULT_RDMA = 1 << 2,
-};
-
-/* Contains the details of a pagefault. */
-struct mlx5_pagefault {
- u32 bytes_committed;
- u8 event_subtype;
- enum mlx5_pagefault_flags flags;
- union {
- /* Initiator or send message responder pagefault details. */
- struct {
- /* Received packet size, only valid for responders. */
- u32 packet_size;
- /*
- * WQE index. Refers to either the send queue or
- * receive queue, according to event_subtype.
- */
- u16 wqe_index;
- } wqe;
- /* RDMA responder pagefault details */
- struct {
- u32 r_key;
- /*
- * Received packet size, minimal size page fault
- * resolution required for forward progress.
- */
- u32 packet_size;
- u32 rdma_op_len;
- u64 rdma_va;
- } rdma;
- };
-};
-
struct mlx5_core_qp {
struct mlx5_core_rsc_common common; /* must be first */
void (*event) (struct mlx5_core_qp *, int);
- void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
int qpn;
struct mlx5_rsc_debug *dbg;
int pid;
@@ -549,10 +535,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
- u8 context, int error);
-#endif
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq);
void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,