From 0953fffec9ba022f63bfe01e86427530d8320d5c Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:50 +0300 Subject: RDMA/uverbs: Add UVERBS_ATTR_CONST_IN to the specs language This makes it clear and safe to access constants passed in from user space. We define a consistent ABI of u64 for all constants, and verify that the data passed in can be represented by the type the user supplies. The expectation is this will always be used with an enum declaring the constant values, and the user will use the enum type as input to the accessor. To retrieve the attribute value we introduce two helper calls - one standard which may fail if attribute is not valid and one where caller can provide a default value which will be used in case the attribute is not valid (useful when attribute is optional). Signed-off-by: Jason Gunthorpe Signed-off-by: Ariel Levkovich Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky --- include/rdma/uverbs_ioctl.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9e997c3c2f04..fc2e52234a2a 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -365,6 +365,15 @@ struct uverbs_object_tree_def { __VA_ARGS__ }, \ }) +/* An input value that is a member in the enum _enum_type. */ +#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \ + UVERBS_ATTR_PTR_IN( \ + _attr_id, \ + UVERBS_ATTR_SIZE( \ + sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \ + sizeof(u64)), \ + __VA_ARGS__) + /* * An input value that is a bitwise combination of values of _enum_type. * This permits the flag value to be passed as either a u32 or u64, it must @@ -603,6 +612,9 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } +int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val); #else static inline int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, @@ -631,6 +643,34 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return ERR_PTR(-EINVAL); } +static inline int +_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val) +{ + return -EINVAL; +} #endif +#define uverbs_get_const(_to, _attrs_bundle, _idx) \ + ({ \ + s64 _val; \ + int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*_to)), \ + type_max(typeof(*_to)), NULL); \ + (*_to) = _val; \ + _ret; \ + }) + +#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ + ({ \ + s64 _val; \ + s64 _def_val = _default; \ + int _ret = \ + _uverbs_get_const(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*_to)), \ + type_max(typeof(*_to)), &_def_val); \ + (*_to) = _val; \ + _ret; \ + }) #endif -- cgit v1.2.3 From b4749bf25652689d8e33827460266b78bb2ec42c Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:51 +0300 Subject: RDMA/mlx5: Add a new flow action verb - modify header Expose the ability to create a flow action which changes packet headers. The data passed from userspace should be modify header actions as defined by HW specification. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 10 ++++++++++ include/uapi/rdma/mlx5_user_ioctl_verbs.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 9c51801b9e64..9c83e13c0e89 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -166,4 +166,14 @@ enum mlx5_ib_flow_methods { MLX5_IB_METHOD_DESTROY_FLOW, }; +enum mlx5_ib_flow_action_methods { + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_create_flow_action_create_modify_header_attrs { + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, +}; + #endif diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 8a2fb33f3ed4..ceb6d0d8529a 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -39,5 +39,10 @@ enum mlx5_ib_uapi_flow_action_flags { MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, }; +enum mlx5_ib_uapi_flow_table_type { + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, +}; + #endif -- cgit v1.2.3 From 841eefc5cb57030ad05a0c4bc285f93ffa668ad9 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:52 +0300 Subject: RDMA/uverbs: Add generic function to fill in flow action object Refactor the initialization of a flow action object to a common function. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_std_types.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 3b00231cc084..526d918fcd5a 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -140,5 +140,17 @@ __uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, #define uobj_alloc(_type, _ufile, _ib_dev) \ __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev) +static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action, + struct ib_uobject *uobj, + struct ib_device *ib_dev, + enum ib_flow_action_type type) +{ + atomic_set(&action->usecnt, 0); + action->device = ib_dev; + action->type = type; + action->uobject = uobj; + uobj->object = action; +} + #endif -- cgit v1.2.3 From 08aeb97cb82483192bd8ad8e60d1b73ce1b75923 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:53 +0300 Subject: RDMA/mlx5: Add new flow action verb - packet reformat For now, only add L2_TUNNEL_TO_L2 option. This will allow to perform generic decap operation if the encapsulating protocol is L2 based, and the inner packet is also L2 based. For example this can be used to decap VXLAN packets. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 7 +++++++ include/uapi/rdma/mlx5_user_ioctl_verbs.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 9c83e13c0e89..40db7fca3d0b 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -168,6 +168,7 @@ enum mlx5_ib_flow_methods { enum mlx5_ib_flow_action_methods { MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, }; enum mlx5_ib_create_flow_action_create_modify_header_attrs { @@ -176,4 +177,10 @@ enum mlx5_ib_create_flow_action_create_modify_header_attrs { MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, }; +enum mlx5_ib_create_flow_action_create_packet_reformat_attrs { + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, +}; + #endif diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index ceb6d0d8529a..b5fda0fcd484 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -44,5 +44,9 @@ enum mlx5_ib_uapi_flow_table_type { MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, }; +enum mlx5_ib_uapi_flow_action_packet_reformat_type { + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, +}; + #endif -- cgit v1.2.3 From a090d0d859ff88dd4c34614d01cee9b0603f4313 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:54 +0300 Subject: RDMA/mlx5: Extend packet reformat verbs We expose new actions: L2_TO_L2_TUNNEL - A generic encap from L2 to L2, the data passed should be the encapsulating headers. L3_TUNNEL_TO_L2 - Will do decap where the inner packet starts from L3, the data should be mac or mac + vlan (14 or 18 bytes). L2_TO_L3_TUNNEL - Will do encap where is L2 of the original packet will not be included, the data should be the encapsulating header. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + include/uapi/rdma/mlx5_user_ioctl_verbs.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 40db7fca3d0b..75c7093fd95b 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -181,6 +181,7 @@ enum mlx5_ib_create_flow_action_create_packet_reformat_attrs { MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, }; #endif diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index b5fda0fcd484..4ef62c0e8452 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -46,6 +46,9 @@ enum mlx5_ib_uapi_flow_table_type { enum mlx5_ib_uapi_flow_action_packet_reformat_type { MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1, + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2, + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, }; #endif -- cgit v1.2.3 From f794809a7259dfaa3d47d90ef5a86007cf48b1ce Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 27 Aug 2018 08:35:55 +0300 Subject: IB/core: Add an unbound WQ type to the new CQ API The upstream kernel commit cited below modified the workqueue in the new CQ API to be bound to a specific CPU (instead of being unbound). This caused ALL users of the new CQ API to use the same bound WQ. Specifically, MAD handling was severely delayed when the CPU bound to the WQ was busy handling (higher priority) interrupts. This caused a delay in the MAD "heartbeat" response handling, which resulted in ports being incorrectly classified as "down". To fix this, add a new "unbound" WQ type to the new CQ API, so that users have the option to choose either a bound WQ or an unbound WQ. For MADs, choose the new "unbound" WQ. Fixes: b7363e67b23e ("IB/device: Convert ib-comp-wq to be CPU-bound") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e950c2a68f06..df8d234a2b56 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -71,6 +71,7 @@ extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; +extern struct workqueue_struct *ib_comp_unbound_wq; union ib_gid { u8 raw[16]; @@ -1570,9 +1571,10 @@ struct ib_ah { typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); enum ib_poll_context { - IB_POLL_DIRECT, /* caller context, no hw completions */ - IB_POLL_SOFTIRQ, /* poll from softirq context */ - IB_POLL_WORKQUEUE, /* poll from workqueue */ + IB_POLL_DIRECT, /* caller context, no hw completions */ + IB_POLL_SOFTIRQ, /* poll from softirq context */ + IB_POLL_WORKQUEUE, /* poll from workqueue */ + IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */ }; struct ib_cq { @@ -1589,6 +1591,7 @@ struct ib_cq { struct irq_poll iop; struct work_struct work; }; + struct workqueue_struct *comp_wq; /* * Implementation details of the RDMA core, don't use in drivers: */ -- cgit v1.2.3 From 6ceb6331b3291694fb6ceba625219f51447c3fa2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 3 Sep 2018 20:18:03 +0300 Subject: RDMA/uverbs: Declare closing variable as boolean The "closing" variable is used as boolean and set to "true" in one place, update the declaration of that variable and their other assignment to proper type. Fixes: e951747a087a ("IB/uverbs: Rework the locking for cleaning up the ucontext") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index df8d234a2b56..a4c3a09a91bc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1486,7 +1486,7 @@ struct ib_ucontext { * it is set when we are closing the file descriptor and indicates * that mm_sem may be locked. */ - int closing; + bool closing; bool cleanup_retryable; -- cgit v1.2.3 From adee9f3f3bbb317c5469f84deba01eef4b86515b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 5 Sep 2018 09:47:58 +0300 Subject: RDMA/core: Depend on device_add() to add device attributes Instead of adding/removing device attribute files, depend on device_add() which considers adding these device files based on NULL terminated attributes group array. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e950c2a68f06..cd0f935f0bc1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2536,6 +2536,9 @@ struct ib_device { struct module *owner; struct device dev; + /* First group for device attributes, NULL terminated array */ + const struct attribute_group *groups[2]; + struct kobject *ports_parent; struct list_head port_list; -- cgit v1.2.3 From e1f540c3ed0e9634d0f8c4600f3c85df8aff4ae2 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 28 Aug 2018 15:08:45 +0300 Subject: RDMA/core: Define client_data_lock as rwlock instead of spinlock Even though device registration/unregistration and client registration/unregistration is not a performance path, define the client_data_lock as rwlock for code clarity. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ddc7c317e136..995f176d4782 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2256,10 +2256,11 @@ struct ib_device { struct list_head event_handler_list; spinlock_t event_handler_lock; - spinlock_t client_data_lock; + rwlock_t client_data_lock; struct list_head core_list; /* Access to the client_data_list is protected by the client_data_lock - * spinlock and the lists_rwsem read-write semaphore */ + * rwlock and the lists_rwsem read-write semaphore + */ struct list_head client_data_list; struct ib_cache cache; -- cgit v1.2.3 From 4269024639f6ff9a1967c4bfa5a2ba7d9853384a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 4 Sep 2018 11:45:14 -0400 Subject: RDMA/core: Document CM @event_handler function Code audit suggests that the RDMA CM event handler callback function is _always_ invoked in a context that is safe to block. That's important for consumer implementers to know, so document that in the comment before rdma_create_id (where the handler function is set up by the consumer). Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_cm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 5d71a7f51a9f..53d93c7d8e01 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -152,7 +152,11 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, * @ps: RDMA port space. * @qp_type: type of queue pair associated with the id. * - * The id holds a reference on the network namespace until it is destroyed. + * Returns a new rdma_cm_id. The id holds a reference on the network + * namespace until it is destroyed. + * + * The event handler callback serializes on the id's mutex and is + * allowed to sleep. */ #define rdma_create_id(net, event_handler, context, ps, qp_type) \ __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ -- cgit v1.2.3 From eb93c82ed8c77f00955f2891483170194c3be92c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 4 Sep 2018 11:45:20 -0400 Subject: RDMA/core: Document QP @event_handler function Add helpful warning for RDMA consumer implementers. Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 995f176d4782..f687faadf33b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1138,7 +1138,9 @@ enum ib_qp_create_flags { */ struct ib_qp_init_attr { + /* Consumer's event_handler callback must not block */ void (*event_handler)(struct ib_event *, void *); + void *qp_context; struct ib_cq *send_cq; struct ib_cq *recv_cq; -- cgit v1.2.3 From 70cd20aed00f719f3536154df02596106e431e45 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 6 Sep 2018 17:27:01 +0300 Subject: IB/uverbs: Add IDRs array attribute type to ioctl() interface Methods sometimes need to get a flexible set of IDRs and not a strict set as can be achieved today by the conventional IDR attribute. Add a new IDRS_ARRAY attribute to the generic uverbs ioctl layer. IDRS_ARRAY points to array of idrs of the same object type and same access rights, only write and read are supported. Signed-off-by: Guy Levi Signed-off-by: Jason Gunthorpe `` Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 71 +++++++++++++++++++++++++++++++- include/uapi/rdma/rdma_user_ioctl_cmds.h | 7 +++- 2 files changed, 75 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index fc2e52234a2a..84d3d15f1f38 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -52,6 +52,7 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, UVERBS_ATTR_TYPE_ENUM_IN, + UVERBS_ATTR_TYPE_IDRS_ARRAY, }; enum uverbs_obj_access { @@ -101,7 +102,7 @@ struct uverbs_attr_spec { } enum_def; } u; - /* This weird split of the enum lets us remove some padding */ + /* This weird split lets us remove some padding */ union { struct { /* @@ -111,6 +112,17 @@ struct uverbs_attr_spec { */ const struct uverbs_attr_spec *ids; } enum_def; + + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u16 min_len; + u16 max_len; + u8 access; + } objs_arr; } u2; }; @@ -251,6 +263,11 @@ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key) return attr_key - 1; } +static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey) +{ + return attr_bkey + 1; +} + /* * ======================================= * Verbs definitions @@ -323,6 +340,27 @@ struct uverbs_object_tree_def { #define UA_MANDATORY .mandatory = 1 #define UA_OPTIONAL .mandatory = 0 +/* + * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only + * READ\WRITE accesses are supported. + */ +#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \ + ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id) + \ + BUILD_BUG_ON_ZERO((_min_len) == 0 || \ + (_max_len) > \ + PAGE_SIZE / sizeof(void *) || \ + (_min_len) > (_max_len) || \ + (_access) == UVERBS_ACCESS_NEW || \ + (_access) == UVERBS_ACCESS_DESTROY), \ + .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \ + .u2.objs_arr.obj_type = _idr_type, \ + .u2.objs_arr.access = _access, \ + .u2.objs_arr.min_len = _min_len, \ + .u2.objs_arr.max_len = _max_len, \ + __VA_ARGS__ } }) + #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ @@ -440,10 +478,16 @@ struct uverbs_obj_attr { const struct uverbs_api_attr *attr_elm; }; +struct uverbs_objs_arr_attr { + struct ib_uobject **uobjects; + u16 len; +}; + struct uverbs_attr { union { struct uverbs_ptr_attr ptr_attr; struct uverbs_obj_attr obj_attr; + struct uverbs_objs_arr_attr objs_arr_attr; }; }; @@ -516,6 +560,31 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) return attr->ptr_attr.len; } +/** + * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for + * UVERBS_ATTR_TYPE_IDRS_ARRAY. + * @arr: Returned pointer to array of pointers for uobjects or NULL if + * the attribute isn't provided. + * + * Return: The array length or 0 if no attribute was provided. + */ +static inline int uverbs_attr_get_uobjs_arr( + const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx, + struct ib_uobject ***arr) +{ + const struct uverbs_attr *attr = + uverbs_attr_get(attrs_bundle, attr_idx); + + if (IS_ERR(attr)) { + *arr = NULL; + return 0; + } + + *arr = attr->objs_arr_attr.uobjects; + + return attr->objs_arr_attr.len; +} + static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) { return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 24800c6c1f32..06c34d99be85 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -53,7 +53,7 @@ enum { struct ib_uverbs_attr { __u16 attr_id; /* command specific type attribute */ - __u16 len; /* only for pointers */ + __u16 len; /* only for pointers and IDRs array */ __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ union { struct { @@ -63,7 +63,10 @@ struct ib_uverbs_attr { __u16 reserved; } attr_data; union { - /* Used by PTR_IN/OUT, ENUM_IN and IDR */ + /* + * ptr to command, inline data, idr/fd or + * ptr to __u32 array of IDRs + */ __aligned_u64 data; /* Used by FD_IN and FD_OUT */ __s64 data_s64; -- cgit v1.2.3 From 86e1d464a8ccd627b6ea3e9a98a0389b0d27fd1f Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 6 Sep 2018 17:27:02 +0300 Subject: RDMA/uverbs: Move flow resources initialization Use ib_set_flow() when initializing flow related resources. Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 14 -------------- include/rdma/uverbs_std_types.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f687faadf33b..6076c9b72ab9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4162,20 +4162,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) } -static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, - struct ib_qp *qp, struct ib_device *device) -{ - uobj->object = ibflow; - ibflow->uobject = uobj; - - if (qp) { - atomic_inc(&qp->usecnt); - ibflow->qp = qp; - } - - ibflow->device = device; -} - /** * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 526d918fcd5a..dfd6d35f1783 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -152,5 +152,38 @@ static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action, uobj->object = action; } +struct ib_uflow_resources { + size_t max; + size_t num; + size_t collection_num; + size_t counters_num; + struct ib_counters **counters; + struct ib_flow_action **collection; +}; + +struct ib_uflow_object { + struct ib_uobject uobject; + struct ib_uflow_resources *resources; +}; + +static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, + struct ib_qp *qp, struct ib_device *device, + struct ib_uflow_resources *uflow_res) +{ + struct ib_uflow_object *uflow; + + uobj->object = ibflow; + ibflow->uobject = uobj; + + if (qp) { + atomic_inc(&qp->usecnt); + ibflow->qp = qp; + } + + ibflow->device = device; + uflow = container_of(uobj, typeof(*uflow), uobject); + uflow->resources = uflow_res; +} + #endif -- cgit v1.2.3 From fa76d24ee0aa24fff3fa9ba71fc2179fb88fef6a Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 6 Sep 2018 17:27:06 +0300 Subject: RDMA/mlx5: Add flow actions support to raw create flow Support attaching flow actions to a flow rule via raw create flow. For now only NIC RX path is supported. This change requires to export flow resources management functions so we can maintain proper bookkeeping of flow actions. Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_std_types.h | 6 ++++++ include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index dfd6d35f1783..3db2802fbc68 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -166,6 +166,12 @@ struct ib_uflow_object { struct ib_uflow_resources *resources; }; +struct ib_uflow_resources *flow_resources_alloc(size_t num_specs); +void flow_resources_add(struct ib_uflow_resources *uflow_res, + enum ib_flow_spec_type type, + void *ibobj); +void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); + static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, struct ib_qp *qp, struct ib_device *device, struct ib_uflow_resources *uflow_res) diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 75c7093fd95b..91c3d42ebd0f 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -155,6 +155,7 @@ enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, }; enum mlx5_ib_destoy_flow_attrs { -- cgit v1.2.3 From a7ee18bdee837e4703f01588993504b72074ffc6 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 6 Sep 2018 17:27:08 +0300 Subject: RDMA/mlx5: Allow creating a matcher for a NIC TX flow table Currently a matcher can only be created and attached to a NIC RX flow table. Extend it to allow it on NIC TX flow tables as well. In order to achieve that, we: 1) Expose a new attribute: MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS. enum ib_flow_flags is used as valid flags. Only IB_FLOW_ATTR_FLAGS_EGRESS is supported. 2) Remove the requirement to have a DEVX or QP destination when creating a flow. A flow added to NIC TX flow table will forward the packet outside of the vport (Wire or E-Switch in the SR-iOV case). Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 91c3d42ebd0f..fb4a8b17cca8 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -125,6 +125,7 @@ enum mlx5_ib_flow_matcher_create_attrs { MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, }; enum mlx5_ib_flow_matcher_destroy_attrs { -- cgit v1.2.3 From 0b79b27748cbec221e1ceabf63578198602bf01d Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 10 Sep 2018 09:49:27 -0700 Subject: IB/{hfi1, qib, rdmavt}: Schedule multi RC/UC packets instead of posting The post_send() path determines if it should post directly or, schedule the post for later. The current logic is: if the swqe ring is empty or (for hfi1) wqe->length <= piothreshold post the send else schedule This can allow large requests to call the send engine directly. Large requests can potentially produce a large number of packets prior to returning to the caller, blocking the caller from posting more requests, and allowing better parallel processing. Allow the driver(s) more say in this logic (pass call_send to the driver, rather than examining a return value). Update hfi1/qib logic to schedule the send engine if an RC or UC message is larger than the QP MTU size. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_vt.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e79229a0cf01..e32facdd9fd3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -214,8 +214,14 @@ struct rvt_driver_provided { void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send_no_lock)(struct rvt_qp *qp); - /* Driver specific work request checking */ - int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); + /* + * Validate the wqe. This needs to be done prior to inserting the + * wqe into the ring, but after the wqe has been set up. Allow for + * driver specific work request checking by providing a callback. + * call_send indicates if the wqe should be posted or scheduled. + */ + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); /* * Sometimes rdmavt needs to kick the driver's send progress. That is -- cgit v1.2.3 From 77addc524473ee9a85d2ef5747a32173c85768d4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 5 Sep 2018 12:54:20 +0300 Subject: RDMA/core: Rename rdma_copy_addr to rdma_copy_src_l2_addr Now that rdma_copy_addr() only copies the source addresses and all callers are interested in copying only source addresses, simplify it to drop the destination address argument. Given that it only copies source layer2 addresses, rename it to rdma_copy_src_l2_addr for better code readability. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_addr.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 77c7908b7d73..676514a930ab 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -105,10 +105,6 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr); -void rdma_copy_addr(struct rdma_dev_addr *dev_addr, - const struct net_device *dev, - const unsigned char *dst_dev_addr); - int rdma_addr_size(const struct sockaddr *addr); int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); -- cgit v1.2.3 From 0e9d2c19bff1d351005afb2f990a913e395ba6d4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 5 Sep 2018 12:54:26 +0300 Subject: RDMA/core: Consider net ns of gid attribute for RoCE When resolving destination address or route, when net namespace is unavailable, refer to the net namespace of the netdevice of the SGID attribute. This is typically the case for requests arriving from the network for RoCE ports. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_addr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 676514a930ab..2e33b1529015 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -95,12 +95,15 @@ int rdma_translate_ip(const struct sockaddr *addr, * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. + * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from + * rdma_dev_addr. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), + bool resolve_by_gid_attr, void *context); void rdma_addr_cancel(struct rdma_dev_addr *addr); -- cgit v1.2.3 From cb816cd22618b1822667a4c2c80023ffd0261777 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 13 Sep 2018 21:47:52 +0800 Subject: RDMA: Remove duplicated include from ib_addr.h Remove duplicated include. Signed-off-by: YueHaibing Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 2e33b1529015..e09eca91eb18 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -46,7 +46,6 @@ #include #include #include -#include #include /** -- cgit v1.2.3 From 9a59739bd01f77db6fbe2955a4fce165f0f43568 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Aug 2018 15:33:02 -0700 Subject: IB/rxe: Revise the ib_wr_opcode enum This enum has become part of the uABI, as both RXE and the ib_uverbs_post_send() command expect userspace to supply values from this enum. So it should be properly placed in include/uapi/rdma. In userspace this enum is called 'enum ibv_wr_opcode' as part of libibverbs.h. That enum defines different values for IB_WR_LOCAL_INV, IB_WR_SEND_WITH_INV, and IB_WR_LSO. These were introduced (incorrectly, it turns out) into libiberbs in 2015. The kernel has changed its mind on the numbering for several of the IB_WC values over the years, but has remained stable on IB_WR_LOCAL_INV and below. Based on this we can conclude that there is no real user space user of the values beyond IB_WR_ATOMIC_FETCH_AND_ADD, as they have never worked via rdma-core. This is confirmed by inspection, only rxe uses the kernel enum and implements the latter operations. rxe has clearly never worked with these attributes from userspace. Other drivers that support these opcodes implement the functionality without calling out to the kernel. To make IB_WR_SEND_WITH_INV and related work for RXE in userspace we choose to renumber the IB_WR enum in the kernel to match the uABI that userspace has bee using since before Soft RoCE was merged. This is an overall simpler configuration for the whole software stack, and obviously can't break anything existing. Reported-by: Seth Howell Tested-by: Seth Howell Fixes: 8700e3e7c485 ("Soft RoCE driver") Cc: Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 34 ++++++++++++++++++++-------------- include/uapi/rdma/ib_user_verbs.h | 20 +++++++++++++++++++- 2 files changed, 39 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6076c9b72ab9..e463d3007a35 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1281,21 +1281,27 @@ struct ib_qp_attr { }; enum ib_wr_opcode { - IB_WR_RDMA_WRITE, - IB_WR_RDMA_WRITE_WITH_IMM, - IB_WR_SEND, - IB_WR_SEND_WITH_IMM, - IB_WR_RDMA_READ, - IB_WR_ATOMIC_CMP_AND_SWP, - IB_WR_ATOMIC_FETCH_AND_ADD, - IB_WR_LSO, - IB_WR_SEND_WITH_INV, - IB_WR_RDMA_READ_WITH_INV, - IB_WR_LOCAL_INV, - IB_WR_REG_MR, - IB_WR_MASKED_ATOMIC_CMP_AND_SWP, - IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, + /* These are shared with userspace */ + IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE, + IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM, + IB_WR_SEND = IB_UVERBS_WR_SEND, + IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP, + IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD, + IB_WR_LSO = IB_UVERBS_WR_TSO, + IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV, + IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV, + IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV, + IB_WR_MASKED_ATOMIC_CMP_AND_SWP = + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, + IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, + + /* These are kernel only and can not be issued by userspace */ + IB_WR_REG_MR = 0x20, IB_WR_REG_SIG_MR, + /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 25a16760de2a..1254b51a551a 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -763,10 +763,28 @@ struct ib_uverbs_sge { __u32 lkey; }; +enum ib_uverbs_wr_opcode { + IB_UVERBS_WR_RDMA_WRITE = 0, + IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1, + IB_UVERBS_WR_SEND = 2, + IB_UVERBS_WR_SEND_WITH_IMM = 3, + IB_UVERBS_WR_RDMA_READ = 4, + IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5, + IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6, + IB_UVERBS_WR_LOCAL_INV = 7, + IB_UVERBS_WR_BIND_MW = 8, + IB_UVERBS_WR_SEND_WITH_INV = 9, + IB_UVERBS_WR_TSO = 10, + IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, + /* Review enum ib_wr_opcode before modifying this */ +}; + struct ib_uverbs_send_wr { __aligned_u64 wr_id; __u32 num_sge; - __u32 opcode; + __u32 opcode; /* see enum ib_uverbs_wr_opcode */ __u32 send_flags; union { __be32 imm_data; -- cgit v1.2.3 From 5f9794dc94f59ad1eb821724a8ae1f8e803ea188 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:43:08 +0300 Subject: RDMA/ucontext: Add a core API for mmaping driver IO memory To support disassociation and PCI hot unplug, we have to track all the VMAs that refer to the device IO memory. When disassociation occurs the VMAs have to be revised to point to the zero page, not the IO memory, to allow the physical HW to be unplugged. The three drivers supporting this implemented three different versions of this algorithm, all leaving something to be desired. This new common implementation has a few differences from the driver versions: - Track all VMAs, including splitting/truncating/etc. Tie the lifetime of the private data allocation to the lifetime of the vma. This avoids any tricks with setting vm_ops which Linus didn't like. (see link) - Support multiple mms, and support properly tracking mmaps triggered by processes other than the one first opening the uverbs fd. This makes fork behavior of disassociation enabled drivers the same as fork support in normal drivers. - Don't use crazy get_task stuff. - Simplify the approach for to racing between vm_ops close and disassociation, fixing the related bugs most of the driver implementations had. Since we are in core code the tracking list can be placed in struct ib_uverbs_ufile, which has a lifetime strictly longer than any VMAs created by mmap on the uverbs FD. Link: https://www.spinics.net/lists/stable/msg248747.html Link: https://lkml.kernel.org/r/CA+55aFxJTV_g46AQPoPXen-UPiqR1HGMZictt7VpC-SMFbm3Cw@mail.gmail.com Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e463d3007a35..a66238d8a2a3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2646,6 +2646,28 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot); +int rdma_user_mmap_page(struct ib_ucontext *ucontext, + struct vm_area_struct *vma, struct page *page, + unsigned long size); +#else +static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, + struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, + pgprot_t prot) +{ + return -EINVAL; +} +static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext, + struct vm_area_struct *vma, struct page *page, + unsigned long size) +{ + return -EINVAL; +} +#endif + static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; -- cgit v1.2.3 From d4b4dd1b9706e48c370f88d3adfe713e43423cc9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:44:45 +0300 Subject: RDMA/umem: Do not use current->tgid to track the mm_struct This is just wrong, the process that calls into the reg_mr is the process associated with the umem, and that does not have to be the same process that created the context. When this code was first written mmgrab() didn't exist, however these days we can just directly hold the mm_struct pointer in the umem and have no ambiguity when it comes to releasing the umem as to which mm it was associated with. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a1fd63871d17..e1c00b2ead19 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -42,14 +42,13 @@ struct ib_umem_odp; struct ib_umem { struct ib_ucontext *context; + struct mm_struct *owning_mm; size_t length; unsigned long address; int page_shift; int writable; int hugetlb; struct work_struct work; - struct mm_struct *mm; - unsigned long diff; struct ib_umem_odp *odp_data; struct sg_table sg_head; int nmap; -- cgit v1.2.3 From b5231b019d76521dd8c59a54c174770ec92c767c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:04 +0300 Subject: RDMA/umem: Use ib_umem_odp in all function signatures connected to ODP All of these functions already require the ODP version of the umem struct, make this very clear by having the signature require it. This paves the way to using the container_of() pattern to link umem_odp and umem together. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 39 +++++++++++++++++++++------------------ include/rdma/ib_verbs.h | 4 +++- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 381cdf5a9bd1..3ef2975b5fb2 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -82,15 +82,18 @@ struct ib_umem_odp { struct work_struct work; }; +static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) +{ + return umem->odp_data; +} + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, int access); -struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, - unsigned long addr, - size_t size); - -void ib_umem_odp_release(struct ib_umem *umem); +struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context, + unsigned long addr, size_t size); +void ib_umem_odp_release(struct ib_umem_odp *umem_odp); /* * The lower 2 bits of the DMA address signal the R/W permissions for @@ -105,13 +108,14 @@ void ib_umem_odp_release(struct ib_umem *umem); #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) -int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, - u64 access_mask, unsigned long current_seq); +int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, + u64 bcnt, u64 access_mask, + unsigned long current_seq); -void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, +void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bound); -typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, +typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end, void *cookie); /* * Call the callback on each ib_umem in the range. Returns the logical or of @@ -129,25 +133,25 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length); -static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, +static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, unsigned long mmu_seq) { /* * This code is strongly based on the KVM code from * mmu_notifier_retry. Should be called with - * the relevant locks taken (item->odp_data->umem_mutex + * the relevant locks taken (umem_odp->umem_mutex * and the ucontext umem_mutex semaphore locked for read). */ /* Do not allow page faults while the new ib_umem hasn't seen a state * with zero notifiers yet, and doesn't have its own valid set of * private counters. */ - if (!item->odp_data->mn_counters_active) + if (!umem_odp->mn_counters_active) return 1; - if (unlikely(item->odp_data->notifiers_count)) + if (unlikely(umem_odp->notifiers_count)) return 1; - if (item->odp_data->notifiers_seq != mmu_seq) + if (umem_odp->notifiers_seq != mmu_seq) return 1; return 0; } @@ -161,14 +165,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context, return -EINVAL; } -static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, - unsigned long addr, - size_t size) +static inline struct ib_umem_odp * +ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size) { return ERR_PTR(-EINVAL); } -static inline void ib_umem_odp_release(struct ib_umem *umem) {} +static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a66238d8a2a3..d611ce9df7fb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -69,6 +69,8 @@ #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN +struct ib_umem_odp; + extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; @@ -1506,7 +1508,7 @@ struct ib_ucontext { * mmu notifiers registration. */ struct rw_semaphore umem_rwsem; - void (*invalidate_range)(struct ib_umem *umem, + void (*invalidate_range)(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); struct mmu_notifier mn; -- cgit v1.2.3 From 41b4deeaa123e62e1037af7a0be547af2e0e05f1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:05 +0300 Subject: RDMA/umem: Make ib_umem_odp into a sub structure of ib_umem These two structures are linked together, use the container_of pattern instead of a double allocation to make the code simpler and easier to follow. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 3ef2975b5fb2..4519ea663df5 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -43,6 +43,7 @@ struct umem_odp_node { }; struct ib_umem_odp { + struct ib_umem umem; /* * An array of the pages included in the on-demand paging umem. * Indices of pages that are currently not mapped into the device will @@ -72,7 +73,6 @@ struct ib_umem_odp { /* A linked list of umems that don't have private mmu notifier * counters yet. */ struct list_head no_private_counters; - struct ib_umem *umem; /* Tree tracking */ struct umem_odp_node interval_tree; @@ -84,13 +84,12 @@ struct ib_umem_odp { static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) { - return umem->odp_data; + return container_of(umem, struct ib_umem_odp, umem); } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, - int access); +int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); @@ -158,9 +157,7 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline int ib_umem_odp_get(struct ib_ucontext *context, - struct ib_umem *umem, - int access) +static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) { return -EINVAL; } -- cgit v1.2.3 From 597ecc5a095406a668e53ab330495ddb65327f77 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:06 +0300 Subject: RDMA/umem: Get rid of struct ib_umem.odp_data This no longer has any use, we can use container_of to get to the umem_odp, and a simple flag to indicate if this is an odp MR. Remove the few remaining references to it. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index e1c00b2ead19..5d3755ec5afa 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -46,10 +46,10 @@ struct ib_umem { size_t length; unsigned long address; int page_shift; - int writable; - int hugetlb; + u32 writable : 1; + u32 hugetlb : 1; + u32 is_odp : 1; struct work_struct work; - struct ib_umem_odp *odp_data; struct sg_table sg_head; int nmap; int npages; -- cgit v1.2.3 From c9990ab39b6e911003bab10a6da96e98ab1503a3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:07 +0300 Subject: RDMA/umem: Move all the ODP related stuff out of ucontext and into per_mm This is the first step to make ODP use the owning_mm that is now part of struct ib_umem. Each ODP umem is linked to a single per_mm structure, which in turn, is linked to a single mm, via the embedded mmu_notifier. This first patch introduces the structure and reworks eveything to use it. This also needs to introduce tgid into the ib_ucontext_per_mm, as get_user_pages_remote() requires the originating task for statistics tracking. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 2 ++ include/rdma/ib_verbs.h | 32 ++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 4519ea663df5..394ea6b68db7 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -44,6 +44,8 @@ struct umem_odp_node { struct ib_umem_odp { struct ib_umem umem; + struct ib_ucontext_per_mm *per_mm; + /* * An array of the pages included in the on-demand paging umem. * Indices of pages that are currently not mapped into the device will diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d611ce9df7fb..2cf2cee5a753 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1488,6 +1488,25 @@ struct ib_rdmacg_object { #endif }; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +struct ib_ucontext_per_mm { + struct ib_ucontext *context; + + struct rb_root_cached umem_tree; + /* + * Protects .umem_rbroot and tree, as well as odp_mrs_count and + * mmu notifiers registration. + */ + struct rw_semaphore umem_rwsem; + + struct mmu_notifier mn; + atomic_t notifier_count; + /* A list of umems that don't have private mmu notifier counters yet. */ + struct list_head no_private_counters; + unsigned int odp_mrs_count; +}; +#endif + struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; @@ -1502,20 +1521,9 @@ struct ib_ucontext { struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct rb_root_cached umem_tree; - /* - * Protects .umem_rbroot and tree, as well as odp_mrs_count and - * mmu notifiers registration. - */ - struct rw_semaphore umem_rwsem; void (*invalidate_range)(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); - - struct mmu_notifier mn; - atomic_t notifier_count; - /* A list of umems that don't have private mmu notifier counters yet. */ - struct list_head no_private_counters; - int odp_mrs_count; + struct ib_ucontext_per_mm per_mm; #endif struct ib_rdmacg_object cg_obj; -- cgit v1.2.3 From f27a0d50a4bc2861b472c2e3740d63a29d1ac460 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:08 +0300 Subject: RDMA/umem: Use umem->owning_mm inside ODP Since ODP had a single struct mmu_notifier located in the ucontext it could only handle a single MM at a time, and this prevented it from using the new owning_mm system. With the prior rework it is now simple to let ODP track multiple MMs per ucontext, finish the job so that the per_mm is allocated on a mm by mm basis, and freed when the last umem is dropped from the ucontext. As a side effect the new saner locking removes the lockdep splat about nesting the umem_rwsem between mmu_notifier_unregister and ib_umem_odp_release. It also makes ODP work with multiple processes, across, fork, etc. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 20 +++++++++++++++++++- include/rdma/ib_verbs.h | 22 ++-------------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 394ea6b68db7..259eb08dfc9e 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -91,8 +91,26 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +struct ib_ucontext_per_mm { + struct ib_ucontext *context; + struct mm_struct *mm; + struct pid *tgid; + + struct rb_root_cached umem_tree; + /* Protects umem_tree */ + struct rw_semaphore umem_rwsem; + atomic_t notifier_count; + + struct mmu_notifier mn; + /* A list of umems that don't have private mmu notifier counters yet. */ + struct list_head no_private_counters; + unsigned int odp_mrs_count; + + struct list_head ucontext_list; +}; + int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context, +struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, unsigned long addr, size_t size); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2cf2cee5a753..6437e6af758d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1488,25 +1488,6 @@ struct ib_rdmacg_object { #endif }; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -struct ib_ucontext_per_mm { - struct ib_ucontext *context; - - struct rb_root_cached umem_tree; - /* - * Protects .umem_rbroot and tree, as well as odp_mrs_count and - * mmu notifiers registration. - */ - struct rw_semaphore umem_rwsem; - - struct mmu_notifier mn; - atomic_t notifier_count; - /* A list of umems that don't have private mmu notifier counters yet. */ - struct list_head no_private_counters; - unsigned int odp_mrs_count; -}; -#endif - struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; @@ -1523,7 +1504,8 @@ struct ib_ucontext { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void (*invalidate_range)(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); - struct ib_ucontext_per_mm per_mm; + struct mutex per_mm_list_lock; + struct list_head per_mm_list; #endif struct ib_rdmacg_object cg_obj; -- cgit v1.2.3 From ca748c39ea3f3c755295d64d69ba0b4375e34b5d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:09 +0300 Subject: RDMA/umem: Get rid of per_mm->notifier_count This is intrinsically racy and the scheme is simply unnecessary. New MR registration can wait for any on going invalidation to fully complete. CPU0 CPU1 if (atomic_read()) if (atomic_dec_and_test() && !list_empty()) { /* not taken */ } list_add() Putting the new UMEM into some kind of purgatory until another invalidate rolls through.. Instead hold the read side of the umem_rwsem across the pair'd start/end and get rid of the racy 'deferred add' approach. Since all umem's in the rbt are always ready to go, also get rid of the mn_counters_active stuff. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 259eb08dfc9e..ce9502545903 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -67,15 +67,9 @@ struct ib_umem_odp { struct mutex umem_mutex; void *private; /* for the HW driver to use. */ - /* When false, use the notifier counter in the ucontext struct. */ - bool mn_counters_active; int notifiers_seq; int notifiers_count; - /* A linked list of umems that don't have private mmu notifier - * counters yet. */ - struct list_head no_private_counters; - /* Tree tracking */ struct umem_odp_node interval_tree; @@ -99,11 +93,8 @@ struct ib_ucontext_per_mm { struct rb_root_cached umem_tree; /* Protects umem_tree */ struct rw_semaphore umem_rwsem; - atomic_t notifier_count; struct mmu_notifier mn; - /* A list of umems that don't have private mmu notifier counters yet. */ - struct list_head no_private_counters; unsigned int odp_mrs_count; struct list_head ucontext_list; @@ -162,12 +153,6 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, * and the ucontext umem_mutex semaphore locked for read). */ - /* Do not allow page faults while the new ib_umem hasn't seen a state - * with zero notifiers yet, and doesn't have its own valid set of - * private counters. */ - if (!umem_odp->mn_counters_active) - return 1; - if (unlikely(umem_odp->notifiers_count)) return 1; if (umem_odp->notifiers_seq != mmu_seq) -- cgit v1.2.3 From be7a57b41ad824dbc59d1ffa91160ee73f2999ee Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:10 +0300 Subject: RDMA/umem: Handle a half-complete start/end sequence mmu_notifier_unregister() can race between a invalidate_start/end and cause the invalidate_end to be skipped. This causes an imbalance in the locking, which lockdep complains about. This is not actually a bug, as we immediately kfree the memory holding the lock, but it simple enough to fix. Mark when the notifier is being destroyed and abort the start callback. This can be done under the lock we already obtained, and can re-purpose the invalidate_range test we already have. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index ce9502545903..ec05c82ead7a 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -89,6 +89,7 @@ struct ib_ucontext_per_mm { struct ib_ucontext *context; struct mm_struct *mm; struct pid *tgid; + bool active; struct rb_root_cached umem_tree; /* Protects umem_tree */ -- cgit v1.2.3 From 56ac9dd9177ce451ac8176311915b29e8b5f0ac2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:11 +0300 Subject: RDMA/umem: Avoid synchronize_srcu in the ODP MR destruction path synchronize_rcu is slow enough that it should be avoided on the syscall path when user space is destroying MRs. After all the rework we can now trivially do this by having call_srcu kfree the per_mm. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_umem_odp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index ec05c82ead7a..0b1446fe2fab 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -99,6 +99,7 @@ struct ib_ucontext_per_mm { unsigned int odp_mrs_count; struct list_head ucontext_list; + struct rcu_head rcu; }; int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); -- cgit v1.2.3 From 2a3ccfdbeb6a5f832d7203e230799f1ffa46e0fc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 16 Sep 2018 20:48:12 +0300 Subject: RDMA/uverbs: Get rid of ucontext->tgid Nothing uses this now, just delete it. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6437e6af758d..0d822a9db300 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1500,7 +1500,6 @@ struct ib_ucontext { bool cleanup_retryable; - struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void (*invalidate_range)(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); -- cgit v1.2.3 From 175edba85634a8be0ddab5ee96d0b23d9f17627e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 17 Sep 2018 13:30:48 +0300 Subject: RDMA/mlx5: Allow creating RAW ethernet QP with loopback support Expose two new flags: MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC Those flags can be used at creation time in order to allow a QP to be able to receive loopback traffic (unicast and multicast). We store the state in the QP to be used on the destroy path to indicate with which flags the QP was created with. Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index addbb9c4529e..e584ba40208e 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -45,6 +45,8 @@ enum { MLX5_QP_FLAG_BFREG_INDEX = 1 << 3, MLX5_QP_FLAG_TYPE_DCT = 1 << 4, MLX5_QP_FLAG_TYPE_DCI = 1 << 5, + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6, + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7, }; enum { -- cgit v1.2.3 From e349f858d29f300ad9ad327fd57735a1d15e147f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Sep 2018 16:58:09 -0600 Subject: RDMA: Fully setup the device name in ib_register_device The current code has two copies of the device name, ibdev->dev and dev_name(&ibdev->dev), and they are setup at different times, which is very confusing. Set them both up at the same time and make dev_name() the lead name, which is the proper use of the driver core APIs. To make it very clear that the name is not valid until registration pass it in to the ib_register_device() call rather than messing with ibdev->name directly. Also the reorganization now checks that dev_name is unique even if it does not contain a %. Signed-off-by: Jason Gunthorpe Acked-by: Adit Ranadive Reviewed-by: Steve Wise Acked-by: Devesh Sharma Reviewed-by: Shiraz Saleem Reviewed-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl --- include/rdma/ib_verbs.h | 6 +++--- include/rdma/rdma_vt.h | 9 ++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0d822a9db300..9897d2329f2c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2625,9 +2625,9 @@ void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); -int ib_register_device(struct ib_device *device, - int (*port_callback)(struct ib_device *, - u8, struct kobject *)); +int ib_register_device(struct ib_device *device, const char *name, + int (*port_callback)(struct ib_device *, u8, + struct kobject *)); void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e32facdd9fd3..065c9fbe6589 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -429,7 +429,14 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, const char *fmt, const char *name, const int unit) { - snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit); + /* + * FIXME: rvt and its users want to touch the ibdev before + * registration and have things like the name work. We don't have the + * infrastructure in the core to support this directly today, hack it + * to work by setting the name manually here. + */ + dev_set_name(&rdi->ibdev.dev, fmt, name, unit); + strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); } /** -- cgit v1.2.3 From 6c8541118bd53bc90b6c2473e289e5541de80376 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 20 Sep 2018 16:42:27 -0600 Subject: RDMA/ulp: Use dev_name instead of ibdev->name These return the same thing but dev_name is a more conventional use of the kernel API. Signed-off-by: Jason Gunthorpe Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Dennis Dalessandro --- include/rdma/rdma_vt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 065c9fbe6589..cc08de3570b4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -447,7 +447,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, */ static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi) { - return rdi->ibdev.name; + return dev_name(&rdi->ibdev.dev); } static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) -- cgit v1.2.3 From 7f72052cb48efb5637ed99d2f45cb33a0bf60719 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:45:18 +0300 Subject: IB/mlx5: Expose RAW QP device handles to user space Expose RAW QP device handles to user space by extending the UHW part of mlx5_ib_create_qp_resp. This data is returned only when DEVX context is used where it may be applicable. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5-abi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index e584ba40208e..6056625237cf 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -351,9 +351,22 @@ struct mlx5_ib_create_qp_rss { __u32 flags; }; +enum mlx5_ib_create_qp_resp_mask { + MLX5_IB_CREATE_QP_RESP_MASK_TIRN = 1UL << 0, + MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1, + MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2, + MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3, +}; + struct mlx5_ib_create_qp_resp { __u32 bfreg_index; __u32 reserved; + __u32 comp_mask; + __u32 tirn; + __u32 tisn; + __u32 rqn; + __u32 sqn; + __u32 reserved1; }; struct mlx5_ib_alloc_mw { -- cgit v1.2.3 From aef716fa5e6da3919cca22ac2097a90d73d8177f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 27 Sep 2018 13:55:58 -0700 Subject: RDMA/qedr: Remove enumerated type qed_roce_ll2_tx_dest Clang warns when one enumerated type is explicitly converted to another. drivers/infiniband/hw/qedr/qedr_roce_cm.c:198:28: warning: implicit conversion from enumeration type 'enum qed_roce_ll2_tx_dest' to different enumeration type 'enum qed_ll2_tx_dest' [-Wenum-conversion] ll2_tx_pkt.tx_dest = pkt->tx_dest; ~ ~~~~~^~~~~~~ 1 warning generated. Turns out that QED_ROCE_LL2_TX_DEST_NW and QED_ROCE_LL2_TX_DEST_LB are only used once in the whole tree and QED_ROCE_LL2_TX_DEST_MAX is used nowhere. Remove them and use the equivalent values from qed_ll2_tx_dest in their place. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Acked-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- include/linux/qed/qed_rdma_if.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index df4d13f7e191..d15f8e4815e3 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -39,15 +39,6 @@ #include #include -enum qed_roce_ll2_tx_dest { - /* Light L2 TX Destination to the Network */ - QED_ROCE_LL2_TX_DEST_NW, - - /* Light L2 TX Destination to the Loopback */ - QED_ROCE_LL2_TX_DEST_LB, - QED_ROCE_LL2_TX_DEST_MAX -}; - #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) /* rdma interface */ @@ -581,7 +572,7 @@ struct qed_roce_ll2_packet { int n_seg; struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; int roce_mode; - enum qed_roce_ll2_tx_dest tx_dest; + enum qed_ll2_tx_dest tx_dest; }; enum qed_rdma_type { -- cgit v1.2.3 From d205a06a14796a24b3447bc5d27b7dedff4479d5 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 26 Sep 2018 10:26:44 -0700 Subject: IB/rdmavt: Rename check_send_wqe as setup_wqe The driver-provided function check_send_wqe allows the hardware driver to check and set up the incoming send wqe before it is inserted into the swqe ring. This patch will rename it as setup_wqe to better reflect its usage. In addition, this function is only called when all setup is complete in rdmavt. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_vt.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index cc08de3570b4..52907204afcd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -215,13 +215,14 @@ struct rvt_driver_provided { void (*schedule_send_no_lock)(struct rvt_qp *qp); /* - * Validate the wqe. This needs to be done prior to inserting the - * wqe into the ring, but after the wqe has been set up. Allow for - * driver specific work request checking by providing a callback. - * call_send indicates if the wqe should be posted or scheduled. + * Driver specific work request setup and checking. + * This function is allowed to perform any setup, checks, or + * adjustments required to the SWQE in order to be usable by + * underlying protocols. This includes private data structure + * allocations. */ - int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, - bool *call_send); + int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, + bool *call_send); /* * Sometimes rdmavt needs to kick the driver's send progress. That is -- cgit v1.2.3 From 5da0fc9dbf891a9c9e01a634f2126b5952afb3a6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 28 Sep 2018 07:17:09 -0700 Subject: IB/hfi1: Prepare resource waits for dual leg Current implementation allows each qp to have only one send engine. As such, each qp has only one list to queue prebuilt packets when send engine resources are not available. To improve performance, it is desired to support multiple send engines for each qp. This patch creates the framework to support two send engines (two legs) for each qp for the TID RDMA protocol, which can be easily extended to support more send engines. It achieves the goal by creating a leg specific struct, iowait_work in the iowait struct, to hold the work_struct and the tx_list as well as a pointer to the parent iowait struct. The hfi1_pkt_state now has an additional field to record the current legs work structure and that is now passed to all egress waiters to determine the leg that needs to wait via a new iowait helper. The APIs are adjusted to use the new leg specific struct as required. Many new and modified helpers are added to support this change. Reviewed-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_vt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 52907204afcd..0a888a9ecc96 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -211,8 +211,8 @@ struct rvt_driver_provided { * version requires the s_lock not to be held. The other assumes the * s_lock is held. */ - void (*schedule_send)(struct rvt_qp *qp); - void (*schedule_send_no_lock)(struct rvt_qp *qp); + bool (*schedule_send)(struct rvt_qp *qp); + bool (*schedule_send_no_lock)(struct rvt_qp *qp); /* * Driver specific work request setup and checking. -- cgit v1.2.3 From d31131bba5a1630304c55ea775c48cc84912ab59 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 2 Oct 2018 16:11:21 +0300 Subject: RDMA: Remove unused parameter from ib_modify_qp_is_ok() The ll parameter is not used in ib_modify_qp_is_ok(), so remove it. Signed-off-by: Kamal Heib Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9897d2329f2c..f88c1071413a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2742,7 +2742,6 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt, * @next_state: Next QP state * @type: QP type * @mask: Mask of supplied QP attributes - * @ll : link layer of port * * This function is a helper function that a low-level driver's * modify_qp method can use to validate the consumer's input. It @@ -2751,8 +2750,7 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt, * and that the attribute mask supplied is allowed for the transition. */ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask, - enum rdma_link_layer ll); + enum ib_qp_type type, enum ib_qp_attr_mask mask); void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); -- cgit v1.2.3 From 38716732f161c3d107c4cc406a287f1201bed752 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Oct 2018 11:49:24 +0300 Subject: RDMA/netlink: Simplify netlink listener existence check All users of rdma_nl_chk_listeners() are interested to get boolean answer if netlink socket has listeners, so update all places to boolean function. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index c369703fcd69..70218e6b5187 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -96,7 +96,7 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); /** * Check if there are any listeners to the netlink group * @group: the netlink group ID - * Returns 0 on success or a negative for no listeners. + * Returns true on success or false if no listeners. */ -int rdma_nl_chk_listeners(unsigned int group); +bool rdma_nl_chk_listeners(unsigned int group); #endif /* _RDMA_NETLINK_H */ -- cgit v1.2.3 From b56511c15713ba6c7572e77a41f7ddba9c1053ec Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 12:57:16 -0700 Subject: IB/mlx4: Avoid implicit enumerated type conversion Clang warns when one enumerated type is implicitly converted to another. drivers/infiniband/hw/mlx4/mad.c:1811:41: warning: implicit conversion from enumeration type 'enum mlx4_ib_qp_flags' to different enumeration type 'enum ib_qp_create_flags' [-Wenum-conversion] qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP; ~ ^~~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/mlx4/mad.c:1819:41: warning: implicit conversion from enumeration type 'enum mlx4_ib_qp_flags' to different enumeration type 'enum ib_qp_create_flags' [-Wenum-conversion] qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP; ~ ^~~~~~~~~~~~~~~~~ The type mlx4_ib_qp_flags explicitly provides supplemental values to the type ib_qp_create_flags. Make that clear to Clang by changing the create_flags type to u32. Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f88c1071413a..7ce617d77f8f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1151,7 +1151,7 @@ struct ib_qp_init_attr { struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; - enum ib_qp_create_flags create_flags; + u32 create_flags; /* * Only needed for special QP types, or when using the RW API. -- cgit v1.2.3 From 019f118b94c895294debfaa394b267638fe2f648 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Wed, 26 Sep 2018 10:44:33 -0700 Subject: IB/{hfi1, qib, rdmavt}: Move copy SGE logic into rdmavt This patch moves hfi1_copy_sge() into rdmavt for sharing with qib. This patch also moves all the wss_*() functions into rdmavt as several wss_*() functions are called from hfi1_copy_sge() When SGE copy mode is adaptive, cacheless copy may be done in some cases for performance reasons. In those cases, X86 cacheless copy function is called since the drivers that use rdmavt and may set SGE copy mode to adaptive are X86 only. For this reason, this patch adds "depends on X86_64" to rdmavt/Kconfig. Reviewed-by: Ashutosh Dixit Reviewed-by: Michael J. Ruhl Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_vt.h | 22 ++++++++++++++++++++++ include/rdma/rdmavt_qp.h | 4 ++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 0a888a9ecc96..7fa2f2d46a3c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -149,6 +149,10 @@ struct rvt_ibport { #define RVT_CQN_MAX 16 /* maximum length of cq name */ +#define RVT_SGE_COPY_MEMCPY 0 +#define RVT_SGE_COPY_CACHELESS 1 +#define RVT_SGE_COPY_ADAPTIVE 2 + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -161,6 +165,9 @@ struct rvt_driver_params { */ unsigned int lkey_table_size; unsigned int qp_table_size; + unsigned int sge_copy_mode; + unsigned int wss_threshold; + unsigned int wss_clean_period; int qpn_start; int qpn_inc; int qpn_res_start; @@ -193,6 +200,19 @@ struct rvt_ah { u8 log_pmtu; }; +/* memory working set size */ +struct rvt_wss { + unsigned long *entries; + atomic_t total_count; + atomic_t clean_counter; + atomic_t clean_entry; + + int threshold; + int num_entries; + long pages_mask; + unsigned int clean_period; +}; + struct rvt_dev_info; struct rvt_swqe; struct rvt_driver_provided { @@ -418,6 +438,8 @@ struct rvt_dev_info { u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; + /* Memory Working Set Size */ + struct rvt_wss *wss; }; /** diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 927f6d5b6d0f..eaf2593ca822 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -678,6 +678,10 @@ void rvt_del_timers_sync(struct rvt_qp *qp); void rvt_stop_rc_timers(struct rvt_qp *qp); void rvt_add_retry_timer(struct rvt_qp *qp); +void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, + void *data, u32 length, + bool release, bool copy_last); + /** * struct rvt_qp_iter - the iterator for QPs * @qp - the current QP -- cgit v1.2.3 From 116aa0330ec71b9872dfb3a3cc5202a72b5a1666 Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Wed, 26 Sep 2018 10:44:42 -0700 Subject: IB/{hfi1, qib, rdmavt}: Move send completion logic to rdmavt Moving send completion code into rdmavt in order to have shared logic between qib and hfi1 drivers. Reviewed-by: Mike Marciniszyn Reviewed-by: Brian Welty Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_vt.h | 3 +++ include/rdma/rdmavt_qp.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 7fa2f2d46a3c..3584d0816fcd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -398,6 +398,9 @@ struct rvt_dev_info { /* post send table */ const struct rvt_operation_params *post_parms; + /* opcode translation table */ + const enum ib_wc_opcode *wc_opcode; + /* Driver specific helper functions */ struct rvt_driver_provided driver_f; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index eaf2593ca822..6fd6f2ad9c0f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -681,6 +681,8 @@ void rvt_add_retry_timer(struct rvt_qp *qp); void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, void *data, u32 length, bool release, bool copy_last); +void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, + enum ib_wc_status status); /** * struct rvt_qp_iter - the iterator for QPs -- cgit v1.2.3 From 15703461533a5ffd775722390431625daaae7618 Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Wed, 26 Sep 2018 10:44:52 -0700 Subject: IB/{hfi1, qib, rdmavt}: Move ruc_loopback to rdmavt This patch moves ruc_loopback() from hfi1 into rdmavt for code sharing with the qib driver. Reviewed-by: Brian Welty Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdmavt_qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 6fd6f2ad9c0f..cbafb1878669 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -683,6 +683,7 @@ void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, bool release, bool copy_last); void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); +void rvt_ruc_loopback(struct rvt_qp *qp); /** * struct rvt_qp_iter - the iterator for QPs -- cgit v1.2.3 From 363ad35577de3a73cf97006ec5f00fccaee73172 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Oct 2018 11:48:01 +0300 Subject: RDMA/restrack: Un-inline set task implementation Prepare rdma_restrack_set_task() call to accommodate more code by moving its implementation from *.h to *.c. Reviewed-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- include/rdma/restrack.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 9654d33edd98..0bddbbdbaf7c 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -175,14 +175,8 @@ int rdma_restrack_put(struct rdma_restrack_entry *res); * @res: resource entry * @task: task struct */ -static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res, - struct task_struct *task) -{ - if (res->task) - put_task_struct(res->task); - get_task_struct(task); - res->task = task; -} +void rdma_restrack_set_task(struct rdma_restrack_entry *res, + struct task_struct *task); /* * Helper functions for rdma drivers when filling out -- cgit v1.2.3 From 2165fc264079ecb7fbfa5e8b330a92eb3f0fcbe1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Oct 2018 11:48:02 +0300 Subject: RDMA/restrack: Consolidate task name updates in one place Unify task update and kernel name set in one place. Reviewed-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- include/rdma/restrack.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 0bddbbdbaf7c..2638fa7cd702 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -173,10 +173,10 @@ int rdma_restrack_put(struct rdma_restrack_entry *res); /** * rdma_restrack_set_task() - set the task for this resource * @res: resource entry - * @task: task struct + * @caller: kernel name, the current task will be used if the caller is NULL. */ void rdma_restrack_set_task(struct rdma_restrack_entry *res, - struct task_struct *task); + const char *caller); /* * Helper functions for rdma drivers when filling out -- cgit v1.2.3 From ba4a41198324be2e6fbb06c270fdc8500c0e38de Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 10 Oct 2018 09:55:10 +0300 Subject: RDMA/mlx5: Add support for flow tag to raw create flow A user can provide a hint which will be attached to the packet and written to the CQE on receive. This can be used as a way to offload operations into the HW, for example parsing a packet which is a tunneled packet, and if so, pass 0x1 as the hint. The software can use that hint to decapsulate the packet and parse only the inner headers thus saving CPU cycles. Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index fb4a8b17cca8..408e220034de 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -157,6 +157,7 @@ enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, MLX5_IB_ATTR_CREATE_FLOW_MATCHER, MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + MLX5_IB_ATTR_CREATE_FLOW_TAG, }; enum mlx5_ib_destoy_flow_attrs { -- cgit v1.2.3 From 9549c2bd094f0f54b8827d64886f5b1de370dff3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 11 Oct 2018 17:30:04 +0300 Subject: RDMA/core: Align multiple functions to kernel coding style This patch changes the small number of functions to be aligned to kernel coding style. It is needed to minimize the diffstat of the following patch. It doesn't change any functionality. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 3 +-- include/rdma/ib_sa.h | 36 +++++++++++++++--------------------- 2 files changed, 16 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index e09eca91eb18..eebbe63b530c 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -102,8 +102,7 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), - bool resolve_by_gid_attr, - void *context); + bool resolve_by_gid_attr, void *context); void rdma_addr_cancel(struct rdma_dev_addr *addr); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index b6ddf2a1b9d8..95ce625a49e3 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -449,28 +449,23 @@ struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); -int ib_sa_path_rec_get(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct sa_path_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct sa_path_rec *resp, +int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, + u8 port_num, struct sa_path_rec *rec, + ib_sa_comp_mask comp_mask, int timeout_ms, + gfp_t gfp_mask, + void (*callback)(int status, struct sa_path_rec *resp, void *context), - void *context, - struct ib_sa_query **query); + void *context, struct ib_sa_query **query); int ib_sa_service_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - u8 method, - struct ib_sa_service_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_service_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query); + struct ib_device *device, u8 port_num, u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, int timeout_ms, + gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, struct ib_sa_query **sa_query); struct ib_sa_multicast { struct ib_sa_mcmember_rec rec; @@ -577,8 +572,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), - void *context, - struct ib_sa_query **sa_query); + void *context, struct ib_sa_query **sa_query); bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, struct ib_device *device, -- cgit v1.2.3 From dbace111e5b320682eee63d7173959a2b2bd9ccb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 11 Oct 2018 17:30:05 +0300 Subject: RDMA/core: Annotate timeout as unsigned long The ucma users supply timeout in u32 format, it means that any number with most significant bit set will be converted to negative value by various rdma_*, cma_* and sa_query functions, which treat timeout as int. In the lowest level, the timeout is converted back to be unsigned long. Remove this ambiguous conversion by updating all function signatures to receive unsigned long. Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 2 +- include/rdma/ib_cm.h | 2 +- include/rdma/ib_sa.h | 6 +++--- include/rdma/rdma_cm.h | 5 +++-- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index eebbe63b530c..2734c895c1bf 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -99,7 +99,7 @@ int rdma_translate_ip(const struct sockaddr *addr, * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, - struct rdma_dev_addr *addr, int timeout_ms, + struct rdma_dev_addr *addr, unsigned long timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), bool resolve_by_gid_attr, void *context); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index c10f4b5ea8ab..49f4f75499b3 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -583,7 +583,7 @@ struct ib_cm_sidr_req_param { struct sa_path_rec *path; const struct ib_gid_attr *sgid_attr; __be64 service_id; - int timeout_ms; + unsigned long timeout_ms; const void *private_data; u8 private_data_len; u8 max_cm_retries; diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 95ce625a49e3..19520979b84c 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -451,7 +451,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct sa_path_rec *rec, - ib_sa_comp_mask comp_mask, int timeout_ms, + ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, void *context), @@ -460,7 +460,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, int ib_sa_service_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, - ib_sa_comp_mask comp_mask, int timeout_ms, + ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, @@ -568,7 +568,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, - int timeout_ms, gfp_t gfp_mask, + unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 53d93c7d8e01..60987a5903b7 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -196,7 +196,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr); * @timeout_ms: Time to wait for resolution to complete. */ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, int timeout_ms); + const struct sockaddr *dst_addr, + unsigned long timeout_ms); /** * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier @@ -206,7 +207,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, * Users must have first called rdma_resolve_addr to resolve a dst_addr * into an RDMA address before calling this routine. */ -int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); +int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms); /** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA -- cgit v1.2.3 From 05d940d3a3ec4e6d5d6a726aae4d73c5c64603c6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 10 Oct 2018 09:19:12 +0300 Subject: RDMA/nldev: Allow IB device rename through RDMA netlink Provide an option to rename IB device name through RDMA netlink and limit it to users with ADMIN capability only. Signed-off-by: Leon Romanovsky Reviewed-by: Parav Pandit Signed-off-by: Doug Ledford --- include/uapi/rdma/rdma_netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index edba6351ac13..f9c41bf59efc 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -227,8 +227,9 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_UNSPEC, RDMA_NLDEV_CMD_GET, /* can dump */ + RDMA_NLDEV_CMD_SET, - /* 2 - 4 are free to use */ + /* 3 - 4 are free to use */ RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ -- cgit v1.2.3 From 1ae4cfa03902c83d1d77123e5ac8f0812c61b90e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 7 Oct 2018 12:12:41 +0300 Subject: RDMA/core: Rename ports_parent to ports_kobj Normally kobj objects have kobj suffix to reflect it. Rename ports_parent to ports_kobj. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7ce617d77f8f..7d732cf87886 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2542,7 +2542,7 @@ struct ib_device { /* First group for device attributes, NULL terminated array */ const struct attribute_group *groups[2]; - struct kobject *ports_parent; + struct kobject *ports_kobj; struct list_head port_list; enum { -- cgit v1.2.3 From d4122f5abef844112799d2056fdc7bbedbc913f3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 11 Oct 2018 22:31:53 +0300 Subject: RDMA/core: Allow existing drivers to set one sysfs group per device Currently many rdma drivers are creating device attribute files using device_create_file() with device specific attributes. Device specific attributes should be exposed via well defined netlink device attributes in future. Introduce an API rdma_set_device_sysfs_group() for existing drivers to set a group for sysfs attributes for legacy. This API is only for exposing legacy attributes which existed for sometime now. New drivers should not be using this API and rather follow netlink path. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7d732cf87886..b17eea0373cb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2539,8 +2539,11 @@ struct ib_device { struct module *owner; struct device dev; - /* First group for device attributes, NULL terminated array */ - const struct attribute_group *groups[2]; + /* First group for device attributes, + * Second group for driver provided attributes (optional). + * It is NULL terminated array. + */ + const struct attribute_group *groups[3]; struct kobject *ports_kobj; struct list_head port_list; @@ -4191,4 +4194,27 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); int uverbs_destroy_def_handler(struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs); + +/** + * rdma_set_device_sysfs_group - Set device attributes group to have + * driver specific sysfs entries at + * for infiniband class. + * + * @device: device pointer for which attributes to be created + * @group: Pointer to group which should be added when device + * is registered with sysfs. + * rdma_set_device_sysfs_group() allows existing drivers to expose one + * group per device to have sysfs attributes. + * + * NOTE: New drivers should not make use of this API; instead new device + * parameter should be exposed via netlink command. This API and mechanism + * exist only for existing drivers. + */ +static inline void +rdma_set_device_sysfs_group(struct ib_device *dev, + const struct attribute_group *group) +{ + dev->groups[1] = group; +} + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 6f4bc0ea682b59d7013cbc5ced2d4dd73067a33f Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Tue, 9 Oct 2018 12:05:15 +0300 Subject: IB/mlx5: Allow scatter to CQE without global signaled WRs Requester scatter to CQE is restricted to QPs configured to signal all WRs. This patch adds ability to enable scatter to cqe (force enable) in the requester without sig_all, for users who do not want all WRs signaled but rather just the ones whose data found in the CQE. Signed-off-by: Yonatan Cohen Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 6056625237cf..8fa9f90e2bb1 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -47,6 +47,7 @@ enum { MLX5_QP_FLAG_TYPE_DCI = 1 << 5, MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6, MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7, + MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8, }; enum { -- cgit v1.2.3 From a60109dc9a954ef9eddba6577e2d2e9e7952e487 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 10 Oct 2018 09:25:16 +0300 Subject: IB/mlx5: Add support for extended atomic operations Extended atomic operations cmp&swp and fetch&add is a Mellanox feature extending the standard atomic operation to use, varied operand sizes, as apposed to normal atomic operation that use an 8 byte operand only. Extended atomics allows masking the results and arguments. This patch configures QP to support extended atomic operation with the maximum size possible, as exposed by HCA capabilities. Signed-off-by: Yonatan Cohen Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8fb072aa8671..a73c701edd16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -97,14 +97,15 @@ enum { }; enum { - MLX5_ATOMIC_MODE_IB_COMP = 1 << 16, - MLX5_ATOMIC_MODE_CX = 2 << 16, - MLX5_ATOMIC_MODE_8B = 3 << 16, - MLX5_ATOMIC_MODE_16B = 4 << 16, - MLX5_ATOMIC_MODE_32B = 5 << 16, - MLX5_ATOMIC_MODE_64B = 6 << 16, - MLX5_ATOMIC_MODE_128B = 7 << 16, - MLX5_ATOMIC_MODE_256B = 8 << 16, + MLX5_ATOMIC_MODE_OFFSET = 16, + MLX5_ATOMIC_MODE_IB_COMP = 1, + MLX5_ATOMIC_MODE_CX = 2, + MLX5_ATOMIC_MODE_8B = 3, + MLX5_ATOMIC_MODE_16B = 4, + MLX5_ATOMIC_MODE_32B = 5, + MLX5_ATOMIC_MODE_64B = 6, + MLX5_ATOMIC_MODE_128B = 7, + MLX5_ATOMIC_MODE_256B = 8, }; enum { @@ -162,13 +163,11 @@ enum mlx5_dcbx_oper_mode { MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, }; -enum mlx5_dct_atomic_mode { - MLX5_ATOMIC_MODE_DCT_CX = 2, -}; - enum { MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, + MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP = 1 << 2, + MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD = 1 << 3, }; enum mlx5_page_fault_resume_flags { -- cgit v1.2.3