From a3f5adaf491490089215f863a61b9422fae902f8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 27 Sep 2010 17:51:10 -0700 Subject: IB/core: Add link layer property to ports This patch allows ports to have different link layers: IB_LINK_LAYER_INFINIBAND or IB_LINK_LAYER_ETHERNET. This is required for adding IBoE (InfiniBand-over-Ethernet, aka RoCE) support. For devices that do not provide an implementation for querying the link layer property of a port, we return a default value based on the transport: RMA_TRANSPORT_IB nodes will return IB_LINK_LAYER_INFINIBAND and RDMA_TRANSPORT_IWARP nodes will return IB_LINK_LAYER_ETHERNET. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 857b3b9cf120..e04c4888d1fd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -75,6 +75,12 @@ enum rdma_transport_type { enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; +enum rdma_link_layer { + IB_LINK_LAYER_UNSPECIFIED, + IB_LINK_LAYER_INFINIBAND, + IB_LINK_LAYER_ETHERNET, +}; + enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = 1, IB_DEVICE_BAD_PKEY_CNTR = (1<<1), @@ -1010,6 +1016,8 @@ struct ib_device { int (*query_port)(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, + u8 port_num); int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); @@ -1222,6 +1230,9 @@ int ib_query_device(struct ib_device *device, int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); +enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, + u8 port_num); + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); -- cgit v1.2.3 From 5a0fd09428e47fb08d5a887515d92bb2447f4b65 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 7 Oct 2010 16:24:16 +0200 Subject: IB/mlx4: Limit size of fast registration WRs Fix the limit on the size of max fast registration WRs that can be posted to match hardware capabilities. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7a7f9c1e679a..ada69389fb91 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -171,6 +171,10 @@ enum { MLX4_NUM_FEXCH = 64 * 1024, }; +enum { + MLX4_MAX_FAST_REG_PAGES = 511, +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; -- cgit v1.2.3 From 3c86aa70bf677a31b71c8292e349242e26cbc743 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 13 Oct 2010 21:26:51 +0200 Subject: RDMA/cm: Add RDMA CM support for IBoE devices Add support for IBoE device binding and IP --> GID resolution. Path resolving and multicast joining are implemented within cma.c by filling in the responses and running callbacks in the CMA work queue. IP --> GID resolution always yields IPv6 link local addresses; remote GIDs are derived from the destination MAC address of the remote port. Multicast GIDs are always mapped to multicast MACs as is done in IPv6. (IPv4 multicast is enabled by translating IPv4 multicast addresses to IPv6 multicast as described in .) Some helper functions are added to ib_addr.h. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index fa0d52b8e622..904ffa92fc93 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -40,6 +40,7 @@ #include #include #include +#include struct rdma_addr_client { atomic_t refcount; @@ -63,6 +64,7 @@ struct rdma_dev_addr { unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; + enum rdma_transport_type transport; }; /** @@ -127,9 +129,31 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } +static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac) +{ + memset(gid->raw, 0, 16); + *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + memcpy(gid->raw + 13, mac + 3, 3); + memcpy(gid->raw + 8, mac, 3); + gid->raw[8] ^= 2; +} + +static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + iboe_mac_to_ll(gid, dev_addr->src_dev_addr); +} + static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); + if (dev_addr->transport == RDMA_TRANSPORT_IB && + dev_addr->dev_type != ARPHRD_INFINIBAND) + iboe_addr_get_sgid(dev_addr, gid); + else + memcpy(gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -147,4 +171,77 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } +static inline enum ib_mtu iboe_get_mtu(int mtu) +{ + /* + * reduce IB headers from effective IBoE MTU. 28 stands for + * atomic header which is the biggest possible header after BTH + */ + mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + + if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) + return IB_MTU_4096; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) + return IB_MTU_2048; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) + return IB_MTU_1024; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) + return IB_MTU_512; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) + return IB_MTU_256; + else + return 0; +} + +static inline int iboe_get_rate(struct net_device *dev) +{ + struct ethtool_cmd cmd; + + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings || + dev->ethtool_ops->get_settings(dev, &cmd)) + return IB_RATE_PORT_CURRENT; + + if (cmd.speed >= 40000) + return IB_RATE_40_GBPS; + else if (cmd.speed >= 30000) + return IB_RATE_30_GBPS; + else if (cmd.speed >= 20000) + return IB_RATE_20_GBPS; + else if (cmd.speed >= 10000) + return IB_RATE_10_GBPS; + else + return IB_RATE_PORT_CURRENT; +} + +static inline int rdma_link_local_addr(struct in6_addr *addr) +{ + if (addr->s6_addr32[0] == htonl(0xfe800000) && + addr->s6_addr32[1] == 0) + return 1; + + return 0; +} + +static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) +{ + memcpy(mac, &addr->s6_addr[8], 3); + memcpy(mac + 3, &addr->s6_addr[13], 3); + mac[0] ^= 2; +} + +static inline int rdma_is_multicast_addr(struct in6_addr *addr) +{ + return addr->s6_addr[0] == 0xff; +} + +static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) +{ + int i; + + mac[0] = 0x33; + mac[1] = 0x33; + for (i = 2; i < 6; ++i) + mac[i] = addr->s6_addr[i + 10]; +} + #endif /* IB_ADDR_H */ -- cgit v1.2.3 From ff7f5aab354dee01f29c9c00933f6d4aa590eadb Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 14:17:56 +0000 Subject: IB/pack: IBoE UD packet packing support Add support for packing IBoE packet headers. Signed-off-by: Eli Cohen [ Clean up and fix ib_ud_header_init() a bit. - Roland ] Signed-off-by: Roland Dreier --- include/rdma/ib_pack.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index cbb50f4da3dd..6b91d8e7a1fa 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -37,6 +37,7 @@ enum { IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, IB_GRH_BYTES = 40, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 @@ -210,14 +211,25 @@ struct ib_unpacked_deth { __be32 source_qpn; }; +struct ib_unpacked_eth { + u8 dmac_h[4]; + u8 dmac_l[2]; + u8 smac_h[2]; + u8 smac_l[4]; + __be16 type; +}; + struct ib_ud_header { + int lrh_present; struct ib_unpacked_lrh lrh; - int grh_present; - struct ib_unpacked_grh grh; - struct ib_unpacked_bth bth; + int eth_present; + struct ib_unpacked_eth eth; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; struct ib_unpacked_deth deth; - int immediate_present; - __be32 immediate_data; + int immediate_present; + __be32 immediate_data; }; void ib_pack(const struct ib_field *desc, @@ -230,9 +242,11 @@ void ib_unpack(const struct ib_field *desc, void *buf, void *structure); -void ib_ud_header_init(int payload_bytes, - int grh_present, - int immediate_present, +void ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int grh_present, + int immediate_present, struct ib_ud_header *header); int ib_ud_header_pack(struct ib_ud_header *header, -- cgit v1.2.3 From bb12588a38e6db85e01dceadff7bc161fc92e7d2 Mon Sep 17 00:00:00 2001 From: David Dillow Date: Fri, 8 Oct 2010 14:40:47 -0400 Subject: IB/srp: Implement SRP_CRED_REQ and SRP_AER_REQ This patch adds support for SRP_CRED_REQ to avoid a lockup by targets that use that mechanism to return credits to the initiator. This prevents a lockup observed in the field where we would never add the credits from the SRP_CRED_REQ to our current count, and would therefore never send another command to the target. Minimal support for SRP_AER_REQ is also added, as these messages can also be used to convey additional credits to the initiator. Based upon extensive debugging and code by Bart Van Assche and a bug report by Chris Worley. Signed-off-by: David Dillow Signed-off-by: Roland Dreier --- include/scsi/srp.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/scsi/srp.h b/include/scsi/srp.h index ad178fa78f66..1ae84db4c9fb 100644 --- a/include/scsi/srp.h +++ b/include/scsi/srp.h @@ -239,4 +239,42 @@ struct srp_rsp { u8 data[0]; } __attribute__((packed)); +struct srp_cred_req { + u8 opcode; + u8 sol_not; + u8 reserved[2]; + __be32 req_lim_delta; + u64 tag; +}; + +struct srp_cred_rsp { + u8 opcode; + u8 reserved[7]; + u64 tag; +}; + +/* + * The SRP spec defines the fixed portion of the AER_REQ structure to be + * 36 bytes, so it needs to be packed to avoid having it padded to 40 bytes + * on 64-bit architectures. + */ +struct srp_aer_req { + u8 opcode; + u8 sol_not; + u8 reserved[2]; + __be32 req_lim_delta; + u64 tag; + u32 reserved2; + __be64 lun; + __be32 sense_data_len; + u32 reserved3; + u8 sense_data[0]; +} __attribute__((packed)); + +struct srp_aer_rsp { + u8 opcode; + u8 reserved[7]; + u64 tag; +}; + #endif /* SCSI_SRP_H */ -- cgit v1.2.3 From d0d68b8693bd16bfbbc93b89f1d9f3351723307c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 4 Oct 2010 12:11:34 +0000 Subject: IB/mlx4: Signal node desc changes to SM by using FW to generate trap 144 The Node Description cannot be changed via MADs (it is read-only). Until now, it was changed in the driver via sysfs, and the new Node Description was simply inserted by the driver into MAD responses (replacing the description returned by FW). System startup scripts use the sysfs interface to change the node description at driver startup to show the hostname, etc. However, this has a race condition: the SM could discover the original FW node description rather than the system-specific description if it queried the port before the startup scripts finish running. For mlx4, we fix this with a new FW command (SET_NODE) that allows passing the new node description to FW. When this command is invoked, FW sends a trap 144 to the SM. When it gets this trap, the SM can query the node to obtain the new node description -- thus eliminating the effects of the race. This patch simply calls SET_NODE command when a new node description is entered via sysfs (thus causing trap 144 to be issued by the FW). We ignore all failures of the SET_NODE command (including those caused by using a device FW that predates the SET_NODE command), since in that case things work just as before. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/cmd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..2731266e73a7 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -57,6 +57,7 @@ enum { MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, MLX4_CMD_SET_PORT = 0xc, + MLX4_CMD_SET_NODE = 0x5a, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, MLX4_CMD_UNMAP_ICM = 0xff9, -- cgit v1.2.3 From 2420b60b1dc4ed98cb1788e928bc57ff2efa1a8d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 18 Oct 2010 14:45:20 -0700 Subject: IB/uverbs: Return link layer type to userspace for query port operation Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/rdma/ib_user_verbs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index a17f77106149..fe5b05177a2c 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -205,7 +205,8 @@ struct ib_uverbs_query_port_resp { __u8 active_width; __u8 active_speed; __u8 phys_state; - __u8 reserved[3]; + __u8 link_layer; + __u8 reserved[2]; }; struct ib_uverbs_alloc_pd { -- cgit v1.2.3 From 33c87f0af60146b375220809c1cb745ac1a86edf Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 14:18:43 +0000 Subject: mlx4_core: Allow protocol drivers to find corresponding interfaces Add a mechanism for mlx4 protocol drivers to get a pointer to other drivers's device objects. For this, an exported function, mlx4_get_protocol_dev() is added, which allows a driver to get some other driver's device based on the protocol that the driver implements. Two protocols are added: MLX4_PROTOCOL_IB and MLX4_PROTOCOL_EN. This will be used in mlx4 IBoE support so that mlx4_ib can find the corresponding mlx4_en netdev. Signed-off-by: Eli Cohen [ Clean up and rename a few things. - Roland ] Signed-off-by: Roland Dreier --- include/linux/mlx4/driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 53c5fdb6eac4..f407cd4bfb34 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -44,15 +44,24 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_REINIT, }; +enum mlx4_protocol { + MLX4_PROTOCOL_IB, + MLX4_PROTOCOL_EN, +}; + struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); void (*event) (struct mlx4_dev *dev, void *context, enum mlx4_dev_event event, int port); + void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; + enum mlx4_protocol protocol; }; int mlx4_register_interface(struct mlx4_interface *intf); void mlx4_unregister_interface(struct mlx4_interface *intf); +void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); + #endif /* MLX4_DRIVER_H */ -- cgit v1.2.3 From 96dfa684c85d24b697f865f37a4f0c8678fc86e9 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 20 Oct 2010 21:57:02 -0700 Subject: mlx4_core: Update data structures and constants for IBoE Add fields to hardware data structures and add new constants required for IBoE support. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 3 ++- include/linux/mlx4/qp.h | 7 +++++-- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..22bd8d3b84a2 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -140,6 +140,7 @@ enum { MLX4_SET_PORT_MAC_TABLE = 0x2, MLX4_SET_PORT_VLAN_TABLE = 0x3, MLX4_SET_PORT_PRIO_MAP = 0x4, + MLX4_SET_PORT_GID_TABLE = 0x5, }; struct mlx4_dev; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7a7f9c1e679a..47e163ad3d11 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -67,7 +67,8 @@ enum { MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19, MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20, - MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21 + MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21, + MLX4_DEV_CAP_FLAG_IBOE = 1 << 30 }; enum { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 7abe64326f72..97cfdc8d7e2f 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -112,7 +112,8 @@ struct mlx4_qp_path { u8 snooper_flags; u8 reserved3[2]; u8 counter_index; - u8 reserved4[7]; + u8 reserved4; + u8 dmac[6]; }; struct mlx4_qp_context { @@ -166,6 +167,7 @@ enum { MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_VLAN = 1 << 6, MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, + MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; struct mlx4_wqe_ctrl_seg { @@ -219,7 +221,8 @@ struct mlx4_wqe_datagram_seg { __be32 av[8]; __be32 dqpn; __be32 qkey; - __be32 reservd[2]; + __be16 vlan; + u8 mac[6]; }; struct mlx4_wqe_lso_seg { -- cgit v1.2.3 From fa417f7b520ee60b39f7e23528d2030af30a07d1 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 24 Oct 2010 21:08:52 -0700 Subject: IB/mlx4: Add support for IBoE Add support for IBoE to mlx4_ib. The bulk of the code is handling the new address vector fields; mlx4 needs the MAC address of a remote node to include it in a WQE (for datagrams) or in the QP context (for connected QPs). Address resolution is done by assuming all unicast GIDs are either link-local IPv6 addresses. Multicast group attach/detach needs to update the NIC's multicast filters; but since attaching a QP to a multicast group can be done before the QP is bound to a port, for IBoE we need to keep track of all multicast groups that a QP is attached too before it transitions from INIT to RTR (since it does not have a port in the INIT state). Signed-off-by: Eli Cohen [ Many things cleaned up and otherwise monkeyed with; hope I didn't introduce too many bugs. - Roland ] Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 47e163ad3d11..ca5645c43f61 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -374,6 +374,27 @@ struct mlx4_av { u8 dgid[16]; }; +struct mlx4_eth_av { + __be32 port_pd; + u8 reserved1; + u8 smac_idx; + u16 reserved2; + u8 reserved3; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 dgid[16]; + u32 reserved4[2]; + __be16 vlan; + u8 mac[6]; +}; + +union mlx4_ext_av { + struct mlx4_av ib; + struct mlx4_eth_av eth; +}; + struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; @@ -402,6 +423,12 @@ struct mlx4_init_port_param { if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \ ~(dev)->caps.port_mask) & 1 << ((port) - 1)) +#define mlx4_foreach_ib_transport_port(port, dev) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((dev)->caps.port_mask & 1 << ((port) - 1)) || \ + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); -- cgit v1.2.3 From af7bd463761c6abd8ca8d831f9cc0ac19f3b7d4b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:18:59 +0300 Subject: IB/core: Add VLAN support for IBoE Add 802.1q VLAN support to IBoE. The VLAN tag is encoded within the GID derived from a link local address in the following way: GID[11] GID[12] contain the VLAN ID when the GID contains a VLAN. The 3 bits user priority field of the packets are identical to the 3 bits of the SL. In case of rdma_cm apps, the TOS field is used to generate the SL field by doing a shift right of 5 bits effectively taking to 3 MS bits of the TOS field. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 43 +++++++++++++++++++++++++++++++++++++++---- include/rdma/ib_pack.h | 9 +++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 904ffa92fc93..b5fc9f39122b 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -129,21 +130,41 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac) +static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) { memset(gid->raw, 0, 16); *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); - gid->raw[12] = 0xfe; - gid->raw[11] = 0xff; + if (vid < 0x1000) { + gid->raw[12] = vid & 0xff; + gid->raw[11] = vid >> 8; + } else { + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + } memcpy(gid->raw + 13, mac + 3, 3); memcpy(gid->raw + 8, mac, 3); gid->raw[8] ^= 2; } +static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_vlan_id(dev) : 0xffff; +} + static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - iboe_mac_to_ll(gid, dev_addr->src_dev_addr); + struct net_device *dev; + u16 vid = 0xffff; + + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -244,4 +265,18 @@ static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) mac[i] = addr->s6_addr[i + 10]; } +static inline u16 rdma_get_vlan_id(union ib_gid *dgid) +{ + u16 vid; + + vid = dgid->raw[11] << 8 | dgid->raw[12]; + return vid < 0x1000 ? vid : 0xffff; +} + +static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_real_dev(dev) : 0; +} + #endif /* IB_ADDR_H */ diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 6b91d8e7a1fa..b37fe3b10a9d 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -38,6 +38,7 @@ enum { IB_LRH_BYTES = 8, IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 @@ -219,11 +220,18 @@ struct ib_unpacked_eth { __be16 type; }; +struct ib_unpacked_vlan { + __be16 tag; + __be16 type; +}; + struct ib_ud_header { int lrh_present; struct ib_unpacked_lrh lrh; int eth_present; struct ib_unpacked_eth eth; + int vlan_present; + struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; struct ib_unpacked_bth bth; @@ -245,6 +253,7 @@ void ib_unpack(const struct ib_field *desc, void ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header); -- cgit v1.2.3 From 4c3eb3ca13966508bcb64f39dcdef48be22f1731 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:19:22 +0300 Subject: IB/mlx4: Add VLAN support for IBoE This patch allows IBoE traffic to be encapsulated in 802.1Q tagged VLAN frames. The VLAN tag is encoded in the GID and derived from it by a simple computation. The netdev notifier callback is modified to catch VLAN device addition/removal and the port's GID table is updated to reflect the change, so that for each netdevice there is an entry in the GID table. When the port's GID table is exhausted, GID entries will not be added. Only children of the main interfaces can add to the GID table; if a VLAN interface is added on another VLAN interface (e.g. "vconfig add eth2.6 8"), then that interfaces will not add an entry to the GID table. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ca5645c43f61..ff9893a33e90 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -496,6 +496,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index); +int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 97cfdc8d7e2f..0eeb2a1a867c 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -109,7 +109,7 @@ struct mlx4_qp_path { __be32 tclass_flowlabel; u8 rgid[16]; u8 sched_queue; - u8 snooper_flags; + u8 vlan_index; u8 reserved3[2]; u8 counter_index; u8 reserved4; -- cgit v1.2.3