From 511cbce2ff8b9d322077909ee90c5d4b67b29b75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Nov 2015 14:56:14 +0100 Subject: irq_poll: make blk-iopoll available outside the block layer The new name is irq_poll as iopoll is already taken. Better suggestions welcome. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/blk-iopoll.h | 46 ---------------------------------------------- include/linux/interrupt.h | 2 +- include/linux/irq_poll.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/trace/events/irq.h | 2 +- 4 files changed, 48 insertions(+), 48 deletions(-) delete mode 100644 include/linux/blk-iopoll.h create mode 100644 include/linux/irq_poll.h (limited to 'include') diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h deleted file mode 100644 index 77ae77c0b704..000000000000 --- a/include/linux/blk-iopoll.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef BLK_IOPOLL_H -#define BLK_IOPOLL_H - -struct blk_iopoll; -typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); - -struct blk_iopoll { - struct list_head list; - unsigned long state; - unsigned long data; - int weight; - int max; - blk_iopoll_fn *poll; -}; - -enum { - IOPOLL_F_SCHED = 0, - IOPOLL_F_DISABLE = 1, -}; - -/* - * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) -{ - if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); - - return 1; -} - -static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -{ - return test_bit(IOPOLL_F_DISABLE, &iop->state); -} - -extern void blk_iopoll_sched(struct blk_iopoll *); -extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); -extern void blk_iopoll_complete(struct blk_iopoll *); -extern void __blk_iopoll_complete(struct blk_iopoll *); -extern void blk_iopoll_enable(struct blk_iopoll *); -extern void blk_iopoll_disable(struct blk_iopoll *); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809c8596..7ff98c23199a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -412,7 +412,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - BLOCK_IOPOLL_SOFTIRQ, + IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h new file mode 100644 index 000000000000..50c39dcd2cba --- /dev/null +++ b/include/linux/irq_poll.h @@ -0,0 +1,46 @@ +#ifndef IRQ_POLL_H +#define IRQ_POLL_H + +struct irq_poll; +typedef int (irq_poll_fn)(struct irq_poll *, int); + +struct irq_poll { + struct list_head list; + unsigned long state; + unsigned long data; + int weight; + int max; + irq_poll_fn *poll; +}; + +enum { + IRQ_POLL_F_SCHED = 0, + IRQ_POLL_F_DISABLE = 1, +}; + +/* + * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating + * that we were the first to acquire this iop for scheduling. If this iop + * is currently disabled, return "failure". + */ +static inline int irq_poll_sched_prep(struct irq_poll *iop) +{ + if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); + + return 1; +} + +static inline int irq_poll_disable_pending(struct irq_poll *iop) +{ + return test_bit(IRQ_POLL_F_DISABLE, &iop->state); +} + +extern void irq_poll_sched(struct irq_poll *); +extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); +extern void irq_poll_complete(struct irq_poll *); +extern void __irq_poll_complete(struct irq_poll *); +extern void irq_poll_enable(struct irq_poll *); +extern void irq_poll_disable(struct irq_poll *); + +#endif diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ff8f6c091a15..f95f25e786ef 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -15,7 +15,7 @@ struct softirq_action; softirq_name(NET_TX) \ softirq_name(NET_RX) \ softirq_name(BLOCK) \ - softirq_name(BLOCK_IOPOLL) \ + softirq_name(IRQ_POLL) \ softirq_name(TASKLET) \ softirq_name(SCHED) \ softirq_name(HRTIMER) \ -- cgit v1.2.3 From ea51190c03150fce4d9e428bfb608abbe0991db8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:41:11 -0800 Subject: irq_poll: fold irq_poll_sched_prep into irq_poll_sched There is no good reason to keep them apart, and this makes using the API a bit simpler. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 50c39dcd2cba..57efae661400 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -18,19 +18,6 @@ enum { IRQ_POLL_F_DISABLE = 1, }; -/* - * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int irq_poll_sched_prep(struct irq_poll *iop) -{ - if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); - - return 1; -} - static inline int irq_poll_disable_pending(struct irq_poll *iop) { return test_bit(IRQ_POLL_F_DISABLE, &iop->state); -- cgit v1.2.3 From 0bc92ace52ef3ed1c8eb9bcf36cd3d7ca72d5d14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:56:36 -0800 Subject: irq_poll: fold irq_poll_disable_pending into irq_poll_softirq Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 57efae661400..b4ad03cee9d4 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -18,11 +18,6 @@ enum { IRQ_POLL_F_DISABLE = 1, }; -static inline int irq_poll_disable_pending(struct irq_poll *iop) -{ - return test_bit(IRQ_POLL_F_DISABLE, &iop->state); -} - extern void irq_poll_sched(struct irq_poll *); extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); extern void irq_poll_complete(struct irq_poll *); -- cgit v1.2.3 From 83af187d1b776753d58b53d155318d94f9428e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:57:25 -0800 Subject: irq_poll: mark __irq_poll_complete static Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index b4ad03cee9d4..8c4b4087f1f2 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -21,7 +21,6 @@ enum { extern void irq_poll_sched(struct irq_poll *); extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); extern void irq_poll_complete(struct irq_poll *); -extern void __irq_poll_complete(struct irq_poll *); extern void irq_poll_enable(struct irq_poll *); extern void irq_poll_disable(struct irq_poll *); -- cgit v1.2.3 From 839a301dc2c007ec942b73a0025695056648f59b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:57:52 -0800 Subject: irq_poll: remove unused data and max fields Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 8c4b4087f1f2..3e8c1b8fb9be 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -7,9 +7,7 @@ typedef int (irq_poll_fn)(struct irq_poll *, int); struct irq_poll { struct list_head list; unsigned long state; - unsigned long data; int weight; - int max; irq_poll_fn *poll; }; -- cgit v1.2.3 From 14d3a3b2498edadec344cb11e60e66091f5daf63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Dec 2015 11:53:03 -0800 Subject: IB: add a proper completion queue abstraction This adds an abstraction that allows ULPs to simply pass a completion object and completion callback with each submitted WR and let the RDMA core handle the nitty gritty details of how to handle completion interrupts and poll the CQ. In detail there is a new ib_cqe structure which just contains the completion callback, and which can be used to get at the containing object using container_of. It is pointed to by the WR and WC as an alternative to the wr_id field, similar to how many ULPs already use the field to store a pointer using casts. A driver using the new completion callbacks allocates it's CQs using the new ib_create_cq API, which in addition to the number of CQEs and the completion vectors also takes a mode on how we poll for CQEs. Three modes are available: direct for drivers that never take CQ interrupts and just poll for them, softirq to poll from softirq context using the to be renamed blk-iopoll infrastructure which takes care of rearming and budgeting, or a workqueue for consumer who want to be called from user context. Thanks a lot to Sagi Grimberg who helped reviewing the API, wrote the current version of the workqueue code because my two previous attempts sucked too much and converted the iSER initiator to the new API. Signed-off-by: Christoph Hellwig --- include/rdma/ib_verbs.h | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9a68a19532ba..131dd4b5176c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include extern struct workqueue_struct *ib_wq; +extern struct workqueue_struct *ib_comp_wq; union ib_gid { u8 raw[16]; @@ -758,7 +760,10 @@ enum ib_wc_flags { }; struct ib_wc { - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; enum ib_wc_status status; enum ib_wc_opcode opcode; u32 vendor_err; @@ -1079,9 +1084,16 @@ struct ib_mw_bind_info { int mw_access_flags; }; +struct ib_cqe { + void (*done)(struct ib_cq *cq, struct ib_wc *wc); +}; + struct ib_send_wr { struct ib_send_wr *next; - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; struct ib_sge *sg_list; int num_sge; enum ib_wr_opcode opcode; @@ -1175,7 +1187,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) struct ib_recv_wr { struct ib_recv_wr *next; - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; struct ib_sge *sg_list; int num_sge; }; @@ -1306,6 +1321,12 @@ struct ib_ah { typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); +enum ib_poll_context { + IB_POLL_DIRECT, /* caller context, no hw completions */ + IB_POLL_SOFTIRQ, /* poll from softirq context */ + IB_POLL_WORKQUEUE, /* poll from workqueue */ +}; + struct ib_cq { struct ib_device *device; struct ib_uobject *uobject; @@ -1314,6 +1335,12 @@ struct ib_cq { void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ + enum ib_poll_context poll_ctx; + struct ib_wc *wc; + union { + struct irq_poll iop; + struct work_struct work; + }; }; struct ib_srq { @@ -2453,6 +2480,11 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } +struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); +void ib_free_cq(struct ib_cq *cq); +int ib_process_cq_direct(struct ib_cq *cq, int budget); + /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. -- cgit v1.2.3 From 3e153a93a1c12e3354dd38cca414fb51a15136a2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 18 Dec 2015 10:59:44 +0200 Subject: IB/core: Save the device attributes on the device structure This way both the IB core and upper level drivers can access these cached device attributes rather than querying or caching them on their own. Signed-off-by: Ira Weiny Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 120da1d7f57e..730dcfb209b9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1823,6 +1823,7 @@ struct ib_device { u16 is_switch:1; u8 node_type; u8 phys_port_cnt; + struct ib_device_attr attrs; /** * The following mandatory functions are used only at device -- cgit v1.2.3 From 182a2da0c768a9ec64abb0d6009667057f1c06af Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 18 Dec 2015 10:59:50 +0200 Subject: IB/core: Remove ib_query_device The copy of the attributes present on the device is now used by all consumers except for uverbs in case of serving user-space query, where dev->query_device is called. Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 730dcfb209b9..0567866a05e2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1913,9 +1913,6 @@ int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); -int ib_query_device(struct ib_device *device, - struct ib_device_attr *device_attr); - int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); -- cgit v1.2.3 From b39ffa1df505378336a85064ad9ec403765bbb0b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 23 Dec 2015 14:56:47 +0200 Subject: IB/core: Add gid_type to gid attribute In order to support multiple GID types, we need to store the gid_type with each GID. This is also aligned with the RoCE v2 annex "RoCEv2 PORT GID table entries shall have a "GID type" attribute that denotes the L3 Address type". The currently supported GID is IB_GID_TYPE_IB which is also RoCE v1 GID type. This implies that gid_type should be added to roce_gid_table meta-data. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_cache.h | 4 ++++ include/rdma/ib_sa.h | 1 + include/rdma/ib_verbs.h | 11 ++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 269a27cf0a46..e30f19bd4a41 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device *device, * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. * @port_num: The port number of the device where the GID value was found. * @index: The index into the cached GID table where the GID was found. This @@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device *device, */ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, + enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, u16 *index); @@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device, * GID value occurs * @device: The device to query. * @gid: The GID value to search for. + * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value sould be * searched. * @ndev: In RoCE, the net device of the device. Null means ignore. @@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device, */ int ib_find_cached_gid_by_port(struct ib_device *device, const union ib_gid *gid, + enum ib_gid_type gid_type, u8 port_num, struct net_device *ndev, u16 *index); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 301969552d0a..0a40ed241156 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -160,6 +160,7 @@ struct ib_sa_path_rec { int ifindex; /* ignored in IB */ struct net *net; + enum ib_gid_type gid_type; }; static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b14feaba39f1..00d2006ad816 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -69,7 +69,15 @@ union ib_gid { extern union ib_gid zgid; +enum ib_gid_type { + /* If link layer is Ethernet, this is RoCE V1 */ + IB_GID_TYPE_IB = 0, + IB_GID_TYPE_ROCE = 0, + IB_GID_TYPE_SIZE +}; + struct ib_gid_attr { + enum ib_gid_type gid_type; struct net_device *ndev; }; @@ -2245,7 +2253,8 @@ int ib_modify_port(struct ib_device *device, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - struct net_device *ndev, u8 *port_num, u16 *index); + enum ib_gid_type gid_type, struct net_device *ndev, + u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); -- cgit v1.2.3 From 7766a99fdcd30c78fc8299db9102e3624232007c Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 23 Dec 2015 14:56:50 +0200 Subject: IB/core: Add ROCE_UDP_ENCAP (RoCE V2) type Adding RoCE v2 GID type and port type. Vendors which support this type will get their GID table populated with RoCE v2 GIDs automatically. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 00d2006ad816..ab05ef695d63 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -73,6 +73,7 @@ enum ib_gid_type { /* If link layer is Ethernet, this is RoCE V1 */ IB_GID_TYPE_IB = 0, IB_GID_TYPE_ROCE = 0, + IB_GID_TYPE_ROCE_UDP_ENCAP = 1, IB_GID_TYPE_SIZE }; @@ -403,6 +404,7 @@ union rdma_protocol_stats { #define RDMA_CORE_CAP_PROT_IB 0x00100000 #define RDMA_CORE_CAP_PROT_ROCE 0x00200000 #define RDMA_CORE_CAP_PROT_IWARP 0x00400000 +#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000 #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ @@ -415,6 +417,12 @@ union rdma_protocol_stats { | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_AF_IB \ | RDMA_CORE_CAP_ETH_AH) +#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \ + (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \ + | RDMA_CORE_CAP_IB_MAD \ + | RDMA_CORE_CAP_IB_CM \ + | RDMA_CORE_CAP_AF_IB \ + | RDMA_CORE_CAP_ETH_AH) #define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \ | RDMA_CORE_CAP_IW_CM) #define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ @@ -2000,6 +2008,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) } static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & + (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); +} + +static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; +} + +static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } @@ -2011,8 +2030,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & - (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE); + return rdma_protocol_ib(device, port_num) || + rdma_protocol_roce(device, port_num); } /** -- cgit v1.2.3 From c865f24628b9310e1815d59f723a34ea3df4890f Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 23 Dec 2015 14:56:51 +0200 Subject: IB/core: Add rdma_network_type to wc Providers should tell IB core the wc's network type. This is used in order to search for the proper GID in the GID table. When using HCAs that can't provide this info, IB core tries to deep examine the packet and extract the GID type by itself. We choose sgid_index and type from all the matching entries in RDMA-CM based on hint from the IP stack and we set hop_limit for the IP packet based on above hint from IP stack. Signed-off-by: Matan Barak Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 1 + include/rdma/ib_verbs.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 11528591d0d7..c799caacf353 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -83,6 +83,7 @@ struct rdma_dev_addr { int bound_dev_if; enum rdma_transport_type transport; struct net *net; + enum rdma_network_type network; }; /** diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ab05ef695d63..368fc22f30f1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -51,6 +51,8 @@ #include #include #include +#include +#include #include #include @@ -109,6 +111,35 @@ enum rdma_protocol_type { __attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type); +enum rdma_network_type { + RDMA_NETWORK_IB, + RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB, + RDMA_NETWORK_IPV4, + RDMA_NETWORK_IPV6 +}; + +static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type) +{ + if (network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) + return IB_GID_TYPE_ROCE_UDP_ENCAP; + + /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */ + return IB_GID_TYPE_IB; +} + +static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, + union ib_gid *gid) +{ + if (gid_type == IB_GID_TYPE_IB) + return RDMA_NETWORK_IB; + + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + return RDMA_NETWORK_IPV4; + else + return RDMA_NETWORK_IPV6; +} + enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, IB_LINK_LAYER_INFINIBAND, @@ -537,6 +568,17 @@ struct ib_grh { union ib_gid dgid; }; +union rdma_network_hdr { + struct ib_grh ibgrh; + struct { + /* The IB spec states that if it's IPv4, the header + * is located in the last 20 bytes of the header. + */ + u8 reserved[20]; + struct iphdr roce4grh; + }; +}; + enum { IB_MULTICAST_QPN = 0xffffff }; @@ -773,6 +815,7 @@ enum ib_wc_flags { IB_WC_IP_CSUM_OK = (1<<3), IB_WC_WITH_SMAC = (1<<4), IB_WC_WITH_VLAN = (1<<5), + IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6), }; struct ib_wc { @@ -798,6 +841,7 @@ struct ib_wc { u8 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; + u8 network_hdr_type; }; enum ib_cq_notify_flags { -- cgit v1.2.3 From 200298326b276d8dbeff204f7d407432100d9963 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 23 Dec 2015 14:56:53 +0200 Subject: IB/core: Validate route when we init ah In order to make sure API users don't try to use SGIDs which don't conform to the routing table, validate the route before searching the RoCE GID table. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index c799caacf353..87156dcb73bb 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -92,8 +92,8 @@ struct rdma_dev_addr { * * The dev_addr->net field must be initialized. */ -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, - u16 *vlan_id); +int rdma_translate_ip(const struct sockaddr *addr, + struct rdma_dev_addr *dev_addr, u16 *vlan_id); /** * rdma_resolve_ip - Resolve source and destination IP addresses to @@ -118,6 +118,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct rdma_dev_addr *addr, void *context), void *context); +int rdma_resolve_ip_route(struct sockaddr *src_addr, + const struct sockaddr *dst_addr, + struct rdma_dev_addr *addr); + void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, @@ -127,7 +131,7 @@ int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *smac, u16 *vlan_id, int if_index); + u8 *smac, u16 *vlan_id, int *if_index); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { -- cgit v1.2.3 From 25f40220e56b507fce186985c0ed2967b7f04596 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Dec 2015 14:56:56 +0200 Subject: IB/core: Initialize UD header structure with IP and UDP headers ib_ud_header_init() is used to format InfiniBand headers in a buffer up to (but not with) BTH. For RoCE UDP ENCAP it is required that this function would be able to build also IP and UDP headers. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_pack.h | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index e99d8f9a4551..a1930819b97e 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -41,6 +41,8 @@ enum { IB_ETH_BYTES = 14, IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 }; @@ -223,6 +225,27 @@ struct ib_unpacked_eth { __be16 type; }; +struct ib_unpacked_ip4 { + u8 ver; + u8 hdr_len; + u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + u8 ttl; + u8 protocol; + __be16 check; + __be32 saddr; + __be32 daddr; +}; + +struct ib_unpacked_udp { + __be16 sport; + __be16 dport; + __be16 length; + __be16 csum; +}; + struct ib_unpacked_vlan { __be16 tag; __be16 type; @@ -237,6 +260,10 @@ struct ib_ud_header { struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; + int ipv4_present; + struct ib_unpacked_ip4 ip4; + int udp_present; + struct ib_unpacked_udp udp; struct ib_unpacked_bth bth; struct ib_unpacked_deth deth; int immediate_present; @@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field *desc, void *buf, void *structure); -void ib_ud_header_init(int payload_bytes, - int lrh_present, - int eth_present, - int vlan_present, - int grh_present, - int immediate_present, - struct ib_ud_header *header); +__be16 ib_ud_ip4_csum(struct ib_ud_header *header); + +int ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int vlan_present, + int grh_present, + int ip_version, + int udp_present, + int immediate_present, + struct ib_ud_header *header); int ib_ud_header_pack(struct ib_ud_header *header, void *buf); -- cgit v1.2.3 From bee3c3c91865d520cb692689500df051e4ca3dd6 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Dec 2015 14:56:57 +0200 Subject: IB/cma: Join and leave multicast groups with IGMP Since RoCEv2 is a protocol over IP header it is required to send IGMP join and leave requests to the network when joining and leaving multicast groups. Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- include/rdma/ib_sa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 0a40ed241156..cdc1c81aa275 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -403,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, + struct net_device *ndev, + enum ib_gid_type gid_type, struct ib_ah_attr *ah_attr); /** -- cgit v1.2.3 From 0de60af649533ad8d9aaeab1df710e6a728d45ea Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:19 +0200 Subject: net/mlx5_core: Introduce access functions to enable/disable RoCE A mlx5 Ethernet port must be explicitly enabled for RoCE. When RoCE is not enabled on the port, the NIC will refuse to create QPs attached to it and incoming RoCE packets will be considered by the NIC as plain Ethernet packets. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- include/linux/mlx5/vport.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 967e0fd06e89..4c9ac604cccd 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -52,4 +52,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); + #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 9efa75254593d6ca3ae54bac8153f47e1a7cbcda Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:20 +0200 Subject: net/mlx5_core: Introduce access functions to query vport RoCE fields Introduce access functions to query NIC vport system_image_guid, node_guid and qkey_viol_cntr. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++++- include/linux/mlx5/vport.h | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1565324eb620..49b34c6466ac 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2141,7 +2141,15 @@ struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; - u8 reserved_1[0x760]; + u8 reserved_1[0x120]; + + u8 system_image_guid[0x40]; + u8 port_guid[0x40]; + u8 node_guid[0x40]; + + u8 reserved_5[0x140]; + u8 qkey_violation_counter[0x10]; + u8 reserved_6[0x430]; u8 reserved_2[0x5]; u8 allowed_list_type[0x3]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 4c9ac604cccd..dfb2d9497d2d 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -37,6 +37,11 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid); +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid); -- cgit v1.2.3 From 3f89a643eb29543af0838d37604bbc29a4e1eb60 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:21 +0200 Subject: IB/mlx5: Extend query_device/port to support RoCE Using the vport access functions to retrieve the Ethernet specific information and return this information in ib_query_device and ib_query_port. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5c857f2a20d7..7b9c976b42d9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -632,13 +632,6 @@ extern struct workqueue_struct *mlx5_core_wq; .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -struct ib_field { - size_t struct_offset_bytes; - size_t struct_size_bytes; - int offset_bits; - int size_bits; -}; - static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) { return pci_get_drvdata(pdev); -- cgit v1.2.3 From cb34be6da25f45034ef4ff6103d401b451165e39 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:22 +0200 Subject: IB/mlx5: Set network_hdr_type upon RoCE responder completion When handling a responder completion, if the link layer is Ethernet, set the work completion network_hdr_type field according to CQE's info and the IB_WC_WITH_NETWORK_HDR_TYPE flag. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef..84aa7e0e1dfa 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -628,6 +628,12 @@ enum { CQE_RSS_HTYPE_L4 = 0x3 << 2, }; +enum { + MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0, + MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1, + MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2, +}; + enum { CQE_L2_OK = 1 << 0, CQE_L3_OK = 1 << 1, -- cgit v1.2.3 From 3cca26069a4b7f6d8fd3dc0ed707e795c22712e2 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:23 +0200 Subject: IB/mlx5: Support IB device's callbacks for adding/deleting GIDs These callbacks write into the mlx5 RoCE address table. Upon del_gid we write a zero'd GID. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 84aa7e0e1dfa..ea4281b00c8d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -278,6 +278,26 @@ enum { MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; +enum { + MLX5_ROCE_VERSION_1 = 0, + MLX5_ROCE_VERSION_2 = 2, +}; + +enum { + MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1, + MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2, +}; + +enum { + MLX5_ROCE_L3_TYPE_IPV4 = 0, + MLX5_ROCE_L3_TYPE_IPV6 = 1, +}; + +enum { + MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1, + MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2, +}; + enum { MLX5_OPCODE_NOP = 0x00, MLX5_OPCODE_SEND_INVAL = 0x01, -- cgit v1.2.3 From 2811ba51b04958cd001b6409c9f70e8563376346 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:24 +0200 Subject: IB/mlx5: Add RoCE fields to Address Vector Set the address handle and QP address path fields according to the link layer type (IB/Eth). Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- include/linux/mlx5/qp.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f079fb1a31f7..a9ad40169191 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -248,8 +248,12 @@ struct mlx5_av { __be32 dqp_dct; u8 stat_rate_sl; u8 fl_mlid; - __be16 rlid; - u8 reserved0[10]; + union { + __be16 rlid; + __be16 udp_sport; + }; + u8 reserved0[4]; + u8 rmac[6]; u8 tclass; u8 hop_limit; __be32 grh_gid_fl; @@ -456,11 +460,16 @@ struct mlx5_qp_path { u8 static_rate; u8 hop_limit; __be32 tclass_flowlabel; - u8 rgid[16]; - u8 rsvd1[4]; - u8 sl; + union { + u8 rgid[16]; + u8 rip[16]; + }; + u8 f_dscp_ecn_prio; + u8 ecn_dscp; + __be16 udp_sport; + u8 dci_cfi_prio_sl; u8 port; - u8 rsvd2[6]; + u8 rmac[6]; }; struct mlx5_qp_context { -- cgit v1.2.3 From b1adc7146af54487bb2ce1cb3012e66a1bbd8e39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Dec 2015 19:12:45 +0100 Subject: IB: start documenting device capabilities Just IB_DEVICE_LOCAL_DMA_LKEY and IB_DEVICE_MEM_MGT_EXTENSIONS for now as I'm most familar with those. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-By: Jason Gunthorpe Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 368fc22f30f1..f06ce80f12e3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -162,6 +162,14 @@ enum ib_device_cap_flags { IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), IB_DEVICE_SRQ_RESIZE = (1<<13), IB_DEVICE_N_NOTIFY_CQ = (1<<14), + + /* + * This device supports a per-device lkey or stag that can be + * used without performing a memory registration for the local + * memory. Note that ULPs should never check this flag, but + * instead of use the local_dma_lkey flag in the ib_pd structure, + * which will always contain a usable lkey. + */ IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ IB_DEVICE_MEM_WINDOW = (1<<17), @@ -175,6 +183,16 @@ enum ib_device_cap_flags { IB_DEVICE_UD_IP_CSUM = (1<<18), IB_DEVICE_UD_TSO = (1<<19), IB_DEVICE_XRC = (1<<20), + + /* + * This device supports the IB "base memory management extension", + * which includes support for fast registrations (IB_WR_REG_MR, + * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should + * also be set by any iWarp device which must support FRs to comply + * to the iWarp verbs spec. iWarp devices also support the + * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the + * stag. + */ IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), -- cgit v1.2.3 From a4d825a01e51b9c74d5a64237dd8b901822cf035 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Dec 2015 19:12:46 +0100 Subject: IB: remove ib_query_mr This functionality has no users and was only supported by the staged out EHCA driver. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe [core] Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f06ce80f12e3..368d9559472f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1288,15 +1288,6 @@ struct ib_phys_buf { u64 size; }; -struct ib_mr_attr { - struct ib_pd *pd; - u64 device_virt_addr; - u64 size; - int mr_access_flags; - u32 lkey; - u32 rkey; -}; - enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), @@ -1846,8 +1837,6 @@ struct ib_device { int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); - int (*query_mr)(struct ib_mr *mr, - struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); struct ib_mr * (*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -2958,13 +2947,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } -/** - * ib_query_mr - Retrieves information about a specific memory region. - * @mr: The memory region to retrieve information about. - * @mr_attr: The attributes of the specified memory region. - */ -int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - /** * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. -- cgit v1.2.3 From b7d3e0a94fe128912bbebf0ae68551c85fd2d429 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Dec 2015 19:12:47 +0100 Subject: IB: remove support for phys MRs We have stopped using phys MRs in the kernel a while ago, so let's remove all the cruft used to implement them. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe [core] Reviewed-By: Devesh Sharma [ocrdma] Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 368d9559472f..86970f3e90b4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1288,6 +1288,10 @@ struct ib_phys_buf { u64 size; }; +/* + * XXX: these are apparently used for ->rereg_user_mr, no idea why they + * are hidden here instead of a uapi header! + */ enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), @@ -1820,11 +1824,6 @@ struct ib_device { int wc_cnt); struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); - struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, @@ -1844,13 +1843,6 @@ struct ib_device { int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents); - int (*rereg_phys_mr)(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type); int (*bind_mw)(struct ib_qp *qp, -- cgit v1.2.3 From feb7c1e38bccfd18cc06677cb648ed2340788fe8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Dec 2015 19:12:48 +0100 Subject: IB: remove in-kernel support for memory windows Remove the unused ib_allow_mw and ib_bind_mw functions, remove the unused IB_WR_BIND_MW and IB_WC_BIND_MW opcodes and move ib_dealloc_mw into the uverbs module. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe [core] Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 83 ------------------------------------------------- 1 file changed, 83 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 86970f3e90b4..177844265c98 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -812,7 +812,6 @@ enum ib_wc_opcode { IB_WC_RDMA_READ, IB_WC_COMP_SWAP, IB_WC_FETCH_ADD, - IB_WC_BIND_MW, IB_WC_LSO, IB_WC_LOCAL_INV, IB_WC_REG_MR, @@ -1110,7 +1109,6 @@ enum ib_wr_opcode { IB_WR_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, - IB_WR_BIND_MW, IB_WR_REG_SIG_MR, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. @@ -1145,23 +1143,6 @@ struct ib_sge { u32 lkey; }; -/** - * struct ib_mw_bind_info - Parameters for a memory window bind operation. - * @mr: A memory region to bind the memory window to. - * @addr: The address where the memory window should begin. - * @length: The length of the memory window, in bytes. - * @mw_access_flags: Access flags from enum ib_access_flags for the window. - * - * This struct contains the shared parameters for type 1 and type 2 - * memory window bind operations. - */ -struct ib_mw_bind_info { - struct ib_mr *mr; - u64 addr; - u64 length; - int mw_access_flags; -}; - struct ib_cqe { void (*done)(struct ib_cq *cq, struct ib_wc *wc); }; @@ -1237,19 +1218,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) return container_of(wr, struct ib_reg_wr, wr); } -struct ib_bind_mw_wr { - struct ib_send_wr wr; - struct ib_mw *mw; - /* The new rkey for the memory window. */ - u32 rkey; - struct ib_mw_bind_info bind_info; -}; - -static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr) -{ - return container_of(wr, struct ib_bind_mw_wr, wr); -} - struct ib_sig_handover_wr { struct ib_send_wr wr; struct ib_sig_attrs *sig_attrs; @@ -1299,18 +1267,6 @@ enum ib_mr_rereg_flags { IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; -/** - * struct ib_mw_bind - Parameters for a type 1 memory window bind operation. - * @wr_id: Work request id. - * @send_flags: Flags from ib_send_flags enum. - * @bind_info: More parameters of the bind operation. - */ -struct ib_mw_bind { - u64 wr_id; - int send_flags; - struct ib_mw_bind_info bind_info; -}; - struct ib_fmr_attr { int max_pages; int max_maps; @@ -1845,9 +1801,6 @@ struct ib_device { int sg_nents); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type); - int (*bind_mw)(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, @@ -2975,42 +2928,6 @@ static inline u32 ib_inc_rkey(u32 rkey) return ((rkey + 1) & mask) | (rkey & ~mask); } -/** - * ib_alloc_mw - Allocates a memory window. - * @pd: The protection domain associated with the memory window. - * @type: The type of the memory window (1 or 2). - */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); - -/** - * ib_bind_mw - Posts a work request to the send queue of the specified - * QP, which binds the memory window to the given address range and - * remote access attributes. - * @qp: QP to post the bind work request on. - * @mw: The memory window to bind. - * @mw_bind: Specifies information about the memory window, including - * its address range, remote access rights, and associated memory region. - * - * If there is no immediate error, the function will update the rkey member - * of the mw parameter to its new value. The bind operation can still fail - * asynchronously. - */ -static inline int ib_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* XXX reference counting in corresponding MR? */ - return mw->device->bind_mw ? - mw->device->bind_mw(qp, mw, mw_bind) : - -ENOSYS; -} - -/** - * ib_dealloc_mw - Deallocates a memory window. - * @mw: The memory window to deallocate. - */ -int ib_dealloc_mw(struct ib_mw *mw); - /** * ib_alloc_fmr - Allocates a unmapped fast memory region. * @pd: The protection domain associated with the unmapped region. -- cgit v1.2.3 From 7cf9ff643b7f709173ca6ff6376fdff5b8d16124 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Dec 2015 19:12:53 +0100 Subject: IB: remove the struct ib_phys_buf definition Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe [core] Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 177844265c98..197b620eec9f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1251,11 +1251,6 @@ enum ib_access_flags { IB_ACCESS_ON_DEMAND = (1<<6), }; -struct ib_phys_buf { - u64 addr; - u64 size; -}; - /* * XXX: these are apparently used for ->rereg_user_mr, no idea why they * are hidden here instead of a uapi header! -- cgit v1.2.3 From ab67ed8de0250e9ad7956ff4d98c3c98858b6c3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Dec 2015 19:12:54 +0100 Subject: IB: remove the write-only usecnt field from struct ib_mr Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 197b620eec9f..36acb30eac85 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1414,7 +1414,6 @@ struct ib_mr { u64 iova; u32 length; unsigned int page_size; - atomic_t usecnt; /* count number of MWs */ }; struct ib_mw { -- cgit v1.2.3 From 145d9c5410324a6f64d8847f115e5f4fdfddce39 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 21 Dec 2015 08:20:29 -0600 Subject: IB/core: Display extended counter set if available Check if the extended counters are available and if so create the proper extended and additional counters. Signed-off-by: Christoph Lameter Reviewed-by: Ira Weiny Reviewed-by: Hal Rosenstock Signed-off-by: Doug Ledford --- include/rdma/ib_pma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h index a5889f18807b..2f8a65c1fca7 100644 --- a/include/rdma/ib_pma.h +++ b/include/rdma/ib_pma.h @@ -42,6 +42,7 @@ */ #define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8) #define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9) +#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10) #define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12) #define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) -- cgit v1.2.3 From 301a721e1fcb890afc29997f46de9561686ed391 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 15 Dec 2015 20:30:10 +0200 Subject: IB/core: Add ib_is_udata_cleared Extending core and vendor verb commands require us to check that the unknown part of the user's given command is all zeros. Adding ib_is_udata_cleared in order to do so. Signed-off-by: Matan Barak Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 36acb30eac85..bbb1c349cb3d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -53,6 +53,8 @@ #include #include #include +#include +#include #include #include @@ -1922,6 +1924,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } +static inline bool ib_is_udata_cleared(struct ib_udata *udata, + size_t offset, + size_t len) +{ + const void __user *p = udata->inbuf + offset; + bool ret = false; + u8 *buf; + + if (len > USHRT_MAX) + return false; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return false; + + if (copy_from_user(buf, p, len)) + goto free; + + ret = !memchr_inv(buf, 0, len); + +free: + kfree(buf); + return ret; +} + /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for -- cgit v1.2.3 From 7c60bcbb68122b39fe3e92143abce01be75f3fa6 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 15 Dec 2015 20:30:11 +0200 Subject: IB/mlx5: Add support for hca_core_clock and timestamp_mask Reporting the hca_core_clock (in kHZ) and the timestamp_mask in query_device extended verb. timestamp_mask is used by users in order to know what is the valid range of the raw timestamps, while hca_core_clock reports the clock frequency that is used for timestamps. Signed-off-by: Matan Barak Reviewed-by: Moshe Lazer Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 49b34c6466ac..091d8343d594 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -794,15 +794,18 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_63[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_64[0x100]; + u8 reserved_64[0x20]; + u8 device_frequency_mhz[0x20]; + u8 device_frequency_khz[0x20]; + u8 reserved_65[0xa0]; - u8 reserved_65[0x1f]; + u8 reserved_66[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_66[0x220]; + u8 reserved_67[0x220]; }; enum { -- cgit v1.2.3 From b368d7cb8ceb77f481b066bd8be5fada82da7301 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 15 Dec 2015 20:30:12 +0200 Subject: IB/mlx5: Add hca_core_clock_offset to udata in init_ucontext Pass hca_core_clock_offset to user-space is mandatory in order to let the user-space read the free-running clock register from the right offset in the memory mapped page. Passing this value is done by changing the vendor's command and response of init_ucontext to be in extensible form. Signed-off-by: Matan Barak Reviewed-by: Moshe Lazer Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ea4281b00c8d..48c4623ad651 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -462,9 +462,12 @@ struct mlx5_init_seg { __be32 rsvd1[120]; __be32 initializing; struct health_buffer health; - __be32 rsvd2[884]; + __be32 rsvd2[880]; + __be32 internal_timer_h; + __be32 internal_timer_l; + __be32 rsvd3[2]; __be32 health_counter; - __be32 rsvd3[1019]; + __be32 rsvd4[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; -- cgit v1.2.3 From 7ca0bc53652382529b1cc1c39987cf93084d5828 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Dec 2015 12:16:09 +0200 Subject: IB/core: Align coding style of ib_device_cap_flags structure Modify enum ib_device_cap_flags such that other patches which add new enum values pass strict checkpatch.pl checks. Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 60 ++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bbb1c349cb3d..862b8a07c028 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -149,21 +149,21 @@ enum rdma_link_layer { }; enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = 1, - IB_DEVICE_BAD_PKEY_CNTR = (1<<1), - IB_DEVICE_BAD_QKEY_CNTR = (1<<2), - IB_DEVICE_RAW_MULTI = (1<<3), - IB_DEVICE_AUTO_PATH_MIG = (1<<4), - IB_DEVICE_CHANGE_PHY_PORT = (1<<5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), - IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), - IB_DEVICE_SHUTDOWN_PORT = (1<<8), - IB_DEVICE_INIT_TYPE = (1<<9), - IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), - IB_DEVICE_SYS_IMAGE_GUID = (1<<11), - IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), - IB_DEVICE_SRQ_RESIZE = (1<<13), - IB_DEVICE_N_NOTIFY_CQ = (1<<14), + IB_DEVICE_RESIZE_MAX_WR = (1 << 0), + IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), + IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), + IB_DEVICE_RAW_MULTI = (1 << 3), + IB_DEVICE_AUTO_PATH_MIG = (1 << 4), + IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), + IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), + IB_DEVICE_SHUTDOWN_PORT = (1 << 8), + IB_DEVICE_INIT_TYPE = (1 << 9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), + IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), + IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), + IB_DEVICE_SRQ_RESIZE = (1 << 13), + IB_DEVICE_N_NOTIFY_CQ = (1 << 14), /* * This device supports a per-device lkey or stag that can be @@ -172,9 +172,9 @@ enum ib_device_cap_flags { * instead of use the local_dma_lkey flag in the ib_pd structure, * which will always contain a usable lkey. */ - IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), - IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ - IB_DEVICE_MEM_WINDOW = (1<<17), + IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), + IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16), + IB_DEVICE_MEM_WINDOW = (1 << 17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB @@ -182,9 +182,9 @@ enum ib_device_cap_flags { * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ - IB_DEVICE_UD_IP_CSUM = (1<<18), - IB_DEVICE_UD_TSO = (1<<19), - IB_DEVICE_XRC = (1<<20), + IB_DEVICE_UD_IP_CSUM = (1 << 18), + IB_DEVICE_UD_TSO = (1 << 19), + IB_DEVICE_XRC = (1 << 20), /* * This device supports the IB "base memory management extension", @@ -195,15 +195,15 @@ enum ib_device_cap_flags { * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the * stag. */ - IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), - IB_DEVICE_RC_IP_CSUM = (1<<25), - IB_DEVICE_RAW_IP_CSUM = (1<<26), - IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), - IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), - IB_DEVICE_ON_DEMAND_PAGING = (1<<31), + IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), + IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), + IB_DEVICE_RC_IP_CSUM = (1 << 25), + IB_DEVICE_RAW_IP_CSUM = (1 << 26), + IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), + IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), + IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), }; enum ib_signature_prot_cap { -- cgit v1.2.3 From 8a06ce59a4cd034c52c59c44ff6b0785a3969409 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Dec 2015 12:16:10 +0200 Subject: IB/core: Add cross-channel support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-channel feature allows to execute WQEs that involve synchronization of I/O operations’ on different QPs. This capability enables to program complex flows with a single function call, hereby significantly reducing overhead associated with I/O processing. Cross-channel operations support is indicated by HCA capability information. The queue pairs can be configured to work as a “sync master queue” or “sync slave queues”. The added flags are: 1. Device capability flag IB_DEVICE_CROSS_CHANNEL for the devices that can perform cross-channel operations. 2. CQ property flag IB_CQ_FLAGS_IGNORE_OVERRUN to disable CQ overrun check. This check is useless in cross-channel scenario. 3. QP property flags to indicate if queues are slave or master: * IB_QP_CREATE_MANAGED_SEND indicates that posted send work requests will not be executed immediately and requires enabling. * IB_QP_CREATE_MANAGED_RECV indicates that posted receive work requests will not be executed immediately and requires enabling. * IB_QP_CREATE_CROSS_CHANNEL declares the QP to work in cross-channel mode. If IB_QP_CREATE_MANAGED_SEND and IB_QP_CREATE_MANAGED_RECV are not provided, this QP will be sync master queue, else it will be sync slave. Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 862b8a07c028..75fcc97886de 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -201,6 +201,13 @@ enum ib_device_cap_flags { IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), IB_DEVICE_RC_IP_CSUM = (1 << 25), IB_DEVICE_RAW_IP_CSUM = (1 << 26), + /* + * Devices should set IB_DEVICE_CROSS_CHANNEL if they + * support execution of WQEs that involve synchronization + * of I/O operations with single completion queue managed + * by hardware. + */ + IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), @@ -246,6 +253,7 @@ struct ib_odp_caps { enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, + IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, }; struct ib_cq_init_attr { @@ -950,6 +958,9 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, + IB_QP_CREATE_MANAGED_SEND = 1 << 3, + IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, -- cgit v1.2.3 From 051f263098a90d208e2d20251bfd4834bc783214 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Dec 2015 12:16:11 +0200 Subject: IB/mlx5: Add driver cross-channel support Add support of cross-channel functionality to mlx5 driver. This includes ability to ignore overrun for CQ which intended for cross-channel, export device capability and configure the QP to be sync master/slave queues. The cross-channel enabled QP supports combination of three possible properties: * WQE processing on the receive queue of this QP * WQE processing on the send queue of this QP * WQE are supported on the send queue Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/qp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index a9ad40169191..fd1ff4110e80 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -130,6 +130,9 @@ enum { MLX5_QP_BIT_RWE = 1 << 14, MLX5_QP_BIT_RAE = 1 << 13, MLX5_QP_BIT_RIC = 1 << 4, + MLX5_QP_BIT_CC_SLAVE_RECV = 1 << 2, + MLX5_QP_BIT_CC_SLAVE_SEND = 1 << 1, + MLX5_QP_BIT_CC_MASTER = 1 << 0 }; enum { -- cgit v1.2.3 From f91e6d8941bf450f7842dfc1ed80e948aaa65e8c Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 14 Dec 2015 16:34:09 +0200 Subject: net/mlx5_core: Add setting ATOMIC endian mode HW is capable of 2 requestor endianness modes for standard 8 Bytes atomic: BE (0x0) and host endianness (0x1). Read the supported modes from hca atomic capabilities and configure HW to host endianness mode if supported. Signed-off-by: Eran Ben Elisha Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 091d8343d594..991283b51f61 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -66,6 +66,11 @@ enum { MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 }; +enum { + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, +}; + enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, @@ -527,21 +532,24 @@ enum { struct mlx5_ifc_atomic_caps_bits { u8 reserved_0[0x40]; - u8 atomic_req_endianness[0x1]; - u8 reserved_1[0x1f]; + u8 atomic_req_8B_endianess_mode[0x2]; + u8 reserved_1[0x4]; + u8 supported_atomic_req_8B_endianess_mode_1[0x1]; - u8 reserved_2[0x20]; + u8 reserved_2[0x19]; - u8 reserved_3[0x10]; - u8 atomic_operations[0x10]; + u8 reserved_3[0x20]; u8 reserved_4[0x10]; - u8 atomic_size_qp[0x10]; + u8 atomic_operations[0x10]; u8 reserved_5[0x10]; + u8 atomic_size_qp[0x10]; + + u8 reserved_6[0x10]; u8 atomic_size_dc[0x10]; - u8 reserved_6[0x720]; + u8 reserved_7[0x720]; }; struct mlx5_ifc_odp_cap_bits { -- cgit v1.2.3 From da7525d2a9ae9d9d9af754441befcf2560f6cac3 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 14 Dec 2015 16:34:10 +0200 Subject: IB/mlx5: Advertise atomic capabilities in query device In order to ensure IB spec atomic correctness in atomic operations, if HW is configured to host endianness, advertise IB_ATOMIC_HCA. if not, advertise IB_ATOMIC_NONE. Signed-off-by: Eran Ben Elisha Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7b9c976b42d9..53c57724c8dd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -115,6 +115,11 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, }; +enum { + MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, + MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, +}; + enum mlx5_page_fault_resume_flags { MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0, MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1, -- cgit v1.2.3 From d3cf81f9c805d599e91d1dcaebdd82ec17c299a6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 9 Dec 2015 14:12:03 +0200 Subject: IB/iser,isert: Create and use new shared header The iser RDMA_CM negotiation protocol is shared by the initiator and the target, so have a shared header for the defines and structure. Move relevant items from the initiator and target headers. Signed-off-by: Sagi Grimberg Signed-off-by: Jenny Derzhavetz Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- include/scsi/iser.h | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 include/scsi/iser.h (limited to 'include') diff --git a/include/scsi/iser.h b/include/scsi/iser.h new file mode 100644 index 000000000000..2e678fa74eca --- /dev/null +++ b/include/scsi/iser.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ISCSI_ISER_H +#define ISCSI_ISER_H + +#define ISER_ZBVA_NOT_SUP 0x80 +#define ISER_SEND_W_INV_NOT_SUP 0x40 +#define ISERT_ZBVA_NOT_USED 0x80 +#define ISERT_SEND_W_INV_NOT_USED 0x40 + +#define ISCSI_CTRL 0x10 +#define ISER_HELLO 0x20 +#define ISER_HELLORPLY 0x30 + +#define ISER_VER 0x10 +#define ISER_WSV 0x08 +#define ISER_RSV 0x04 + +/** + * struct iser_cm_hdr - iSER CM header (from iSER Annex A12) + * + * @flags: flags support (zbva, send_w_inv) + * @rsvd: reserved + */ +struct iser_cm_hdr { + u8 flags; + u8 rsvd[3]; +} __packed; + +/** + * struct iser_ctrl - iSER header of iSCSI control PDU + * + * @flags: opcode and read/write valid bits + * @rsvd: reserved + * @write_stag: write rkey + * @write_va: write virtual address + * @reaf_stag: read rkey + * @read_va: read virtual address + */ +struct iser_ctrl { + u8 flags; + u8 rsvd[3]; + __be32 write_stag; + __be64 write_va; + __be32 read_stag; + __be64 read_va; +} __packed; + +#endif /* ISCSI_ISER_H */ -- cgit v1.2.3 From f25bf1977f7a968e85fe8ab99252b8132c6cf8c4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:48:07 +0200 Subject: net/mlx4: Remove unused macro The macro mlx4_foreach_non_ib_transport_port() is not used anywhere. Remove it. Fixes: aa9a2d51a3e7 ("mlx4: Activate RoCE/SRIOV") Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d3133be12d92..971037188907 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -979,10 +979,6 @@ struct mlx4_mad_ifc { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) -#define mlx4_foreach_non_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ - if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) - #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ -- cgit v1.2.3 From ca281265c02f342fed4927b4e98e377d9318881f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Jan 2016 14:15:58 +0100 Subject: IB/mad: pass ib_mad_send_buf explicitly to the recv_handler Stop abusing wr_id and just pass the parameter explicitly. Signed-off-by: Christoph Hellwig Reviewed-by: Hal Rosenstock Reviewed-by: Ira Weiny Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/rdma/ib_mad.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index ec9b44dd3d80..0ff049bd9ad4 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, /** * ib_mad_recv_handler - callback handler for a received MAD. * @mad_agent: MAD agent requesting the received MAD. + * @send_buf: Send buffer if found, else NULL * @mad_recv_wc: Received work completion information on the received MAD. * * MADs received in response to a send request operation will be handed to @@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, * modify the data referenced by @mad_recv_wc. */ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc); /** -- cgit v1.2.3 From f7f4b23e27f7561330ef13f93dbe8f2dc410efa7 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 4 Jan 2016 10:49:53 +0200 Subject: IB/core: Rename rdma_addr_find_dmac_by_grh rdma_addr_find_dmac_by_grh resolves dmac, vlan_id and if_index and downsteram patch will also add hop_limit as an output parameter, thus we rename it to rdma_addr_find_l2_eth_by_grh. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 87156dcb73bb..73fd088dde56 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -130,8 +130,9 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); -int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *smac, u16 *vlan_id, int *if_index); +int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, + const union ib_gid *dgid, + u8 *smac, u16 *vlan_id, int *if_index); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { -- cgit v1.2.3 From c3efe7500add077f79d37b18e9c66df6621409b6 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 4 Jan 2016 10:49:54 +0200 Subject: IB/core: Use hop-limit from IP stack for RoCE Previously, IPV6_DEFAULT_HOPLIMIT was used as the hop limit value for RoCE. Fixing that by taking ip4_dst_hoplimit and ip6_dst_hoplimit as hop limit values. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 73fd088dde56..c34c9002460c 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -84,6 +84,7 @@ struct rdma_dev_addr { enum rdma_transport_type transport; struct net *net; enum rdma_network_type network; + int hoplimit; }; /** @@ -132,7 +133,8 @@ int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *smac, u16 *vlan_id, int *if_index); + u8 *smac, u16 *vlan_id, int *if_index, + int *hoplimit); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { -- cgit v1.2.3 From cc886c9ff1607eda04062bdcec963e2f8e6a3eb1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:12 -0500 Subject: svcrdma: Improve allocation of struct svc_rdma_op_ctxt When the maximum payload size of NFS READ and WRITE was increased by commit cc9a903d915c ("svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD"), the size of struct svc_rdma_op_ctxt increased to over 6KB (on x86_64). That makes allocating one of these from a kmem_cache more likely to fail in situations when system memory is exhausted. Since I'm about to add a caller where this allocation must always work _and_ it cannot sleep, pre-allocate ctxts for each connection. Another motivation for this change is that NFSv4.x servers are required by specification not to drop NFS requests. Pre-allocating memory resources reduces the likelihood of a drop. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f869807a0d0e..be2804b72cd8 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { + struct list_head free; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -141,7 +142,10 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_ctxt_lock; + struct list_head sc_ctxts; + int sc_ctxt_used; + struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; -- cgit v1.2.3 From 2fe81b239dbb00d0a2fd8858ac9dd4ef4a8841ee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:20 -0500 Subject: svcrdma: Improve allocation of struct svc_rdma_req_map To ensure this allocation cannot fail and will not sleep, pre-allocate the req_map structures per-connection. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index be2804b72cd8..05bf4febad44 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -113,6 +113,7 @@ struct svc_rdma_fastreg_mr { struct list_head frmr_list; }; struct svc_rdma_req_map { + struct list_head free; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; @@ -145,6 +146,8 @@ struct svcxprt_rdma { spinlock_t sc_ctxt_lock; struct list_head sc_ctxts; int sc_ctxt_used; + spinlock_t sc_map_lock; + struct list_head sc_maps; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; @@ -223,8 +226,9 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); -extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *); +extern void svc_rdma_put_req_map(struct svcxprt_rdma *, + struct svc_rdma_req_map *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); -- cgit v1.2.3 From 71810ef3271d1a06f7002c55c7e354d8c3233762 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:28 -0500 Subject: svcrdma: Remove unused req_map and ctxt kmem_caches Clean up. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 05bf4febad44..141edbbb73b3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -242,6 +242,7 @@ extern struct svc_xprt_class svc_rdma_bc_class; #endif /* svc_rdma.c */ +extern struct workqueue_struct *svc_rdma_wq; extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); -- cgit v1.2.3 From 39b09a1a121cb22820c374f4e92f7ca34be1b75d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:37 -0500 Subject: svcrdma: Add gfp flags to svc_rdma_post_recv() svc_rdma_post_recv() allocates pages for receive buffers on-demand. It uses GFP_KERNEL so the allocator tries hard, and may sleep. But I'm about to add a call to svc_rdma_post_recv() from a function that may not sleep. Since all svc_rdma_post_recv() call sites can tolerate its failure, allow it to fail if the page allocator returns nothing. Longer term, receive buffers, being a finite resource per-connection, should be pre-allocated and re-used. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 141edbbb73b3..729ff356c18a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -221,7 +221,7 @@ extern struct rpcrdma_read_chunk * extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); -extern int svc_rdma_post_recv(struct svcxprt_rdma *); +extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); -- cgit v1.2.3 From ba986c96f907a513215fb7f1c0a89261c97251ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:53 -0500 Subject: svcrdma: Make map_xdr non-static Pre-requisite to use map_xdr in the backchannel code. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 729ff356c18a..aeffa30655ce 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,6 +213,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, u32, u32, u64, bool); /* svc_rdma_sendto.c */ +extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, + struct svc_rdma_req_map *); extern int svc_rdma_sendto(struct svc_rqst *); extern struct rpcrdma_read_chunk * svc_rdma_get_read_chunk(struct rpcrdma_msg *); -- cgit v1.2.3 From 03fe9931536fe4782e9e34f7f499d588acd2015b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:50:02 -0500 Subject: svcrdma: Define maximum number of backchannel requests Extra resources for handling backchannel requests have to be pre-allocated when a transport instance is created. Set up additional fields in svcxprt_rdma to track these resources. The max_requests fields are elements of the RPC-over-RDMA protocol, so they should be u32. To ensure that unsigned arithmetic is used everywhere, some other fields in the svcxprt_rdma struct are updated. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index aeffa30655ce..9a2c418dc690 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -51,6 +51,7 @@ /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; extern atomic_t rdma_stat_recv; @@ -134,10 +135,11 @@ struct svcxprt_rdma { int sc_max_sge; int sc_max_sge_rd; /* max sge for read target */ - int sc_sq_depth; /* Depth of SQ */ atomic_t sc_sq_count; /* Number of SQ WR on queue */ - - int sc_max_requests; /* Depth of RQ */ + unsigned int sc_sq_depth; /* Depth of SQ */ + unsigned int sc_rq_depth; /* Depth of RQ */ + u32 sc_max_requests; /* Forward credits */ + u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ struct ib_pd *sc_pd; @@ -186,6 +188,11 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 +/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our + * current NFSv4.1 implementation supports one backchannel slot. + */ +#define RPCRDMA_MAX_BC_REQUESTS 2 + #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD /* svc_rdma_marshal.c */ -- cgit v1.2.3 From 5d252f90a800cee5bc57c76d636ae60464f7a887 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:50:10 -0500 Subject: svcrdma: Add class for RDMA backwards direction transport To support the server-side of an NFSv4.1 backchannel on RDMA connections, add a transport class that enables backward direction messages on an existing forward channel connection. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 9a2c418dc690..b13513a0caf4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -195,6 +195,11 @@ struct svcxprt_rdma { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/* svc_rdma_backchannel.c */ +extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, + struct rpcrdma_msg *rmsgp, + struct xdr_buf *rcvbuf); + /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, -- cgit v1.2.3 From 5fe1043da84887369d32459514f2c7d98ff37936 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jan 2016 23:53:41 -0800 Subject: svc_rdma: use local_dma_lkey We now alwasy have a per-PD local_dma_lkey available. Make use of that fact in svc_rdma and stop registering our own MR. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Chuck Lever Reviewed-by: Steve Wise Acked-by: J. Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b13513a0caf4..5322fea6fe4c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -156,13 +156,11 @@ struct svcxprt_rdma { struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - struct ib_mr *sc_phys_mr; /* MR for server memory */ int (*sc_reader)(struct svcxprt_rdma *, struct svc_rqst *, struct svc_rdma_op_ctxt *, int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ - u32 sc_dma_lkey; /* local dma key */ unsigned int sc_frmr_pg_list_len; struct list_head sc_frmr_q; spinlock_t sc_frmr_q_lock; -- cgit v1.2.3 From d8ae914196d35bbc0c459aec6de588ba585a1c3e Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:32 +0200 Subject: net/mlx4: Query RoCE support Query the RoCE support from firmware using the appropriate firmware commands. Downstream patches will read these capabilities and act accordingly. Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 971037188907..28cbee0df7d7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -216,6 +216,7 @@ enum { MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, + MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, }; enum { @@ -267,12 +268,14 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19, MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; enum { - MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP + MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP, + MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2 }; enum mlx4_event { @@ -980,9 +983,10 @@ struct mlx4_mad_ifc { if ((type) == (dev)->caps.port_mask[(port)]) #define mlx4_foreach_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ + ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) -- cgit v1.2.3 From 7e57b85c444c3c1bf3550aa6890666fc4353bd33 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:35 +0200 Subject: IB/mlx4: Add support for setting RoCEv2 gids in hardware To tell hardware about a gid with type RoCEv2, software needs a new modifier to the SET_PORT command: MLX4_SET_PORT_ROCE_ADDR. This can replace the old method, MLX4_SET_PORT_GID_TABLE, for RoCEv1 gids. Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- include/linux/mlx4/cmd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 58391f2e0414..116b284bc4ce 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -206,7 +206,8 @@ enum { MLX4_SET_PORT_GID_TABLE = 0x5, MLX4_SET_PORT_PRIO2TC = 0x8, MLX4_SET_PORT_SCHEDULER = 0x9, - MLX4_SET_PORT_VXLAN = 0xB + MLX4_SET_PORT_VXLAN = 0xB, + MLX4_SET_PORT_ROCE_ADDR = 0xD }; enum { -- cgit v1.2.3 From fca83006294a6356705781eee31da1658fd411a5 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:36 +0200 Subject: net/mlx4_core: Add support for configuring RoCE v2 UDP port In order to support RoCE v2, the hardware needs to be configured to classify certain UDP packets as RoCE v2 packets and pass it through its RoCE pipeline. This patch enables configuring this UDP port. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 28cbee0df7d7..430a929f048b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); +int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port); int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); -- cgit v1.2.3 From 3f723f42d9d625bb9ecfe923d19d1d42da775797 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:37 +0200 Subject: net/mlx4_core: Add support for RoCE v2 entropy In RoCE v2 we need to choose a source UDP port, we do so by using entropy over the source and dest QPNs. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx4/qp.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index fe052e234906..cdf110d3f260 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -204,7 +204,8 @@ struct mlx4_qp_context { u32 reserved1; __be32 next_send_psn; __be32 cqn_send; - u32 reserved2[2]; + __be16 roce_entropy; + __be16 reserved2[3]; __be32 last_acked_psn; __be32 ssn; __be32 params2; @@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); +static inline u16 folded_qp(u32 q) +{ + u16 res; + + res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00); + return res; +} + +u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); + #endif /* MLX4_QP_H */ -- cgit v1.2.3 From 7ead4bcb1b788732516755ef84ef1272d3e152eb Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:38 +0200 Subject: IB/core: Add definition for the standard RoCE V2 UDP port This will be used in hardware device driver when building QP or AH contexts. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 75fcc97886de..284b00c8fea4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -81,6 +81,7 @@ enum ib_gid_type { IB_GID_TYPE_SIZE }; +#define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { enum ib_gid_type gid_type; struct net_device *ndev; -- cgit v1.2.3 From 3b5daf28ac4bb9354b7d2f10ce5942cad23e979a Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:39 +0200 Subject: IB/mlx4: Support modify_qp for RoCE v2 In order to support modify_qp for RoCE v2, we need to set the gid_type in the QP context. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx4/qp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index cdf110d3f260..587cdf943b52 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -194,7 +194,7 @@ struct mlx4_qp_context { u8 mtu_msgmax; u8 rq_size_stride; u8 sq_size_stride; - u8 rlkey; + u8 rlkey_roce_mode; __be32 usr_page; __be32 local_qpn; __be32 remote_qpn; -- cgit v1.2.3 From 3ef967a4affeef7bb3b7713dcfed6518b99737c6 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:41 +0200 Subject: IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers RoCEv2 packets are sent over IP/UDP protocols. The mlx4 driver uses a type of RAW QP to send packets for QP1 and therefore needs to build the network headers below BTH in software. This patch adds option to build QP1 packets with IP and UDP headers if RoCEv2 is requested. Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- include/rdma/ib_pack.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index a1930819b97e..0f3daae44bf9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -234,7 +234,7 @@ struct ib_unpacked_ip4 { __be16 frag_off; u8 ttl; u8 protocol; - __be16 check; + __sum16 check; __be32 saddr; __be32 daddr; }; @@ -280,7 +280,7 @@ void ib_unpack(const struct ib_field *desc, void *buf, void *structure); -__be16 ib_ud_ip4_csum(struct ib_ud_header *header); +__sum16 ib_ud_ip4_csum(struct ib_ud_header *header); int ib_ud_header_init(int payload_bytes, int lrh_present, -- cgit v1.2.3 From 8d7f9ecb371a15e48754fa816e3f716517df7b13 Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:12:59 +0200 Subject: net/mlx5_core: Export transport objects To be used by mlx5_ib in the following patches for implementing RAW PACKET QP. Add mlx5_core_ prefix to alloc and delloc transport_domain since they are exposed now. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx5/transobj.h | 74 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 include/linux/mlx5/transobj.h (limited to 'include') diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h new file mode 100644 index 000000000000..376229f09499 --- /dev/null +++ b/include/linux/mlx5/transobj.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __TRANSOBJ_H__ +#define __TRANSOBJ_H__ + +#include + +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqn); +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *sqn); +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tirn); +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen); +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tisn); +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn); +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen); +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); + +#endif /* __TRANSOBJ_H__ */ -- cgit v1.2.3 From e2013b212f9f201c71fc5826ce41f39ebece0852 Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:00 +0200 Subject: net/mlx5_core: Add RQ and SQ event handling RQ/SQ will be used to implement IB verbs QPs, so the IB QP affiliated events are affiliated also with SQs and RQs. Since SQ, RQ and QP resource numbers do not share the same name space, a queue type field was added to the event data to specify the SW object that the event is affiliated with. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 12 +++++++++++- include/linux/mlx5/driver.h | 8 +++++--- include/linux/mlx5/qp.h | 8 ++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 48c4623ad651..b7eaccf997ff 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -223,6 +223,14 @@ enum { #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT +#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) + +enum { + MLX5_EVENT_QUEUE_TYPE_QP = 0, + MLX5_EVENT_QUEUE_TYPE_RQ = 1, + MLX5_EVENT_QUEUE_TYPE_SQ = 2, +}; + enum mlx5_event { MLX5_EVENT_TYPE_COMP = 0x0, @@ -479,7 +487,9 @@ struct mlx5_eqe_comp { }; struct mlx5_eqe_qp_srq { - __be32 reserved[6]; + __be32 reserved1[5]; + u8 type; + u8 reserved2[3]; __be32 qp_srq_n; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 53c57724c8dd..ae8f91528b6f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -346,9 +346,11 @@ struct mlx5_core_mr { }; enum mlx5_res_type { - MLX5_RES_QP, - MLX5_RES_SRQ, - MLX5_RES_XSRQ, + MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, + MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, + MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, + MLX5_RES_SRQ = 3, + MLX5_RES_XSRQ = 4, }; struct mlx5_core_rsc_common { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fd1ff4110e80..431176ec70e2 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -651,6 +651,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, u8 context, int error); #endif +int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *rq); +void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *rq); +int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *sq); +void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *sq); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3 From 6d2f89df04b796e7dcc4f9f8dc0d8f04ad7f144b Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:05 +0200 Subject: IB/mlx5: Add Raw Packet QP query functionality Since Raw Packet QP is composed of RQ and SQ, the IB QP's state is derived from the sub-objects. Therefore we need to query each one of the sub-objects, and decide on the IB QP's state. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx5/qp.h | 11 ++++++++++- include/linux/mlx5/transobj.h | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 431176ec70e2..f033c7a1490c 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -85,7 +85,16 @@ enum mlx5_qp_state { MLX5_QP_STATE_ERR = 6, MLX5_QP_STATE_SQ_DRAINING = 7, MLX5_QP_STATE_SUSPENDED = 9, - MLX5_QP_NUM_STATE + MLX5_QP_NUM_STATE, + MLX5_QP_STATE, + MLX5_QP_STATE_BAD, +}; + +enum { + MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1, + MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1, + MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1, + MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1, }; enum { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 376229f09499..d259e4c423dd 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -41,10 +41,12 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out); int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn); int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, -- cgit v1.2.3 From 75850d0bcece42416ba81bd38e4c719f101c832d Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:06 +0200 Subject: IB/mlx5: Support setting Ethernet priority for Raw Packet QPs When the user changes the Address Vector(AV) in the modify QP, he provides an SL. This SL should be translated to Ethernet Priority by taking the 3 LSB bits, and modify the QP's TIS according to this Ethernet priority. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- include/linux/mlx5/transobj.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 991283b51f61..4633b88b0c3b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4052,6 +4052,13 @@ struct mlx5_ifc_modify_tis_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_modify_tis_bitmask_bits { + u8 reserved_0[0x20]; + + u8 reserved_1[0x1f]; + u8 prio[0x1]; +}; + struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4064,7 +4071,7 @@ struct mlx5_ifc_modify_tis_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_modify_tis_bitmask_bits bitmask; u8 reserved_4[0x40]; diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index d259e4c423dd..88441f5ece25 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -54,6 +54,8 @@ int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, + int inlen); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn); -- cgit v1.2.3 From 427c1e7bcd7e5cd62160fcda0ce215ebbe0da3a1 Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:07 +0200 Subject: {IB, net}/mlx5: Move the modify QP operation table to mlx5_ib When modifying a QP, the desired operation was determined in the mlx5_core using a transition table that takes the current state, the final state, and returns the desired operation. Since this logic will be used for Raw Packet QP, move the operation table to the mlx5_ib. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx5/qp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f033c7a1490c..5b8c89ffaa58 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -641,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, - enum mlx5_qp_state new_state, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, -- cgit v1.2.3