summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/rds/connection.c5
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/recv.c7
-rw-r--r--net/rds/send.c18
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_listen.c55
6 files changed, 72 insertions, 17 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 4b7715eb2111..185f73b01694 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -447,13 +447,16 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
+ if (conn->c_trans->t_mp_capable &&
+ cp->cp_index == 0)
+ rds_send_ping(conn, 0);
rds_queue_reconnect(cp);
} else {
rcu_read_unlock();
}
if (conn->c_trans->conn_slots_available)
- conn->c_trans->conn_slots_available(conn);
+ conn->c_trans->conn_slots_available(conn, false);
}
/* destroy a single rds_conn_path. rds_conn_destroy() iterates over
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 6d9f4a08b0ee..6e0790e4b570 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -549,7 +549,7 @@ struct rds_transport {
* messages received on the new socket are not discarded when no
* connection path was available at the time.
*/
- void (*conn_slots_available)(struct rds_connection *conn);
+ void (*conn_slots_available)(struct rds_connection *conn, bool fan_out);
int (*conn_path_connect)(struct rds_conn_path *cp);
/*
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 889a5b7935e5..4b3f9e4a8bfd 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -209,6 +209,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
bool new_with_sport_idx = false;
u32 new_peer_gen_num = 0;
int new_npaths;
+ bool fan_out;
new_npaths = conn->c_npaths;
@@ -248,7 +249,11 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
spin_lock_irqsave(&cp0->cp_lock, flags);
conn->c_cp0_mprds_catchup_tx_seq = cp0->cp_next_tx_seq;
spin_unlock_irqrestore(&cp0->cp_lock, flags);
+ fan_out = true;
+ } else {
+ fan_out = false;
}
+
/* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
conn->c_npaths = max_t(int, new_npaths, 1);
@@ -257,7 +262,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
if (conn->c_npaths > 1 &&
conn->c_trans->conn_slots_available)
- conn->c_trans->conn_slots_available(conn);
+ conn->c_trans->conn_slots_available(conn, fan_out);
}
/* rds_start_mprds() will synchronously start multiple paths when appropriate.
diff --git a/net/rds/send.c b/net/rds/send.c
index 599c2cfb7a1d..6e96f108473e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1339,6 +1339,24 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
cpath = &conn->c_path[0];
}
+ /* If we're multipath capable and path 0 is down, queue reconnect
+ * and send a ping. This initiates the multipath handshake through
+ * rds_send_probe(), which sends RDS_EXTHDR_NPATHS to the peer,
+ * starting multipath capability negotiation.
+ */
+ if (conn->c_trans->t_mp_capable &&
+ !rds_conn_path_up(&conn->c_path[0])) {
+ /* Ensures that only one request is queued. And
+ * rds_send_ping() ensures that only one ping is
+ * outstanding.
+ */
+ if (!test_and_set_bit(RDS_RECONNECT_PENDING,
+ &conn->c_path[0].cp_flags))
+ queue_delayed_work(conn->c_path[0].cp_wq,
+ &conn->c_path[0].cp_conn_w, 0);
+ rds_send_ping(conn, 0);
+ }
+
rm->m_conn_path = cpath;
/* Parse any control messages the user may have included. */
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index b36af0865a07..39c86347188c 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -90,7 +90,7 @@ void rds_tcp_state_change(struct sock *sk);
struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
-void rds_tcp_conn_slots_available(struct rds_connection *conn);
+void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out);
int rds_tcp_accept_one(struct rds_tcp_net *rtn);
void rds_tcp_keepalive(struct socket *sock);
void *rds_tcp_listen_sock_def_readable(struct net *net);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index b5786227623c..6fb5c928b8fd 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -56,14 +56,8 @@ void rds_tcp_keepalive(struct socket *sock)
tcp_sock_set_keepintvl(sock->sk, keepidle);
}
-/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
- * client's ipaddr < server's ipaddr. Otherwise, close the accepted
- * socket and force a reconneect from smaller -> larger ip addr. The reason
- * we special case cp_index 0 is to allow the rds probe ping itself to itself
- * get through efficiently.
- */
-static struct rds_tcp_connection *
-rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
+static int
+rds_tcp_get_peer_sport(struct socket *sock)
{
union {
struct sockaddr_storage storage;
@@ -71,11 +65,9 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} saddr;
- int sport, npaths, i_min, i_max, i;
+ int sport;
- if (conn->c_with_sport_idx &&
- kernel_getpeername(sock, &saddr.addr) >= 0) {
- /* cp->cp_index is encoded in lowest bits of source-port */
+ if (kernel_getpeername(sock, &saddr.addr) >= 0) {
switch (saddr.addr.sa_family) {
case AF_INET:
sport = ntohs(saddr.sin.sin_port);
@@ -90,6 +82,26 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
sport = -1;
}
+ return sport;
+}
+
+/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
+ * client's ipaddr < server's ipaddr. Otherwise, close the accepted
+ * socket and force a reconneect from smaller -> larger ip addr. The reason
+ * we special case cp_index 0 is to allow the rds probe ping itself to itself
+ * get through efficiently.
+ */
+static struct rds_tcp_connection *
+rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
+{
+ int sport, npaths, i_min, i_max, i;
+
+ if (conn->c_with_sport_idx)
+ /* cp->cp_index is encoded in lowest bits of source-port */
+ sport = rds_tcp_get_peer_sport(sock);
+ else
+ sport = -1;
+
npaths = max_t(int, 1, conn->c_npaths);
if (sport >= 0) {
@@ -111,10 +123,12 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock)
return NULL;
}
-void rds_tcp_conn_slots_available(struct rds_connection *conn)
+void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out)
{
struct rds_tcp_connection *tc;
struct rds_tcp_net *rtn;
+ struct socket *sock;
+ int sport, npaths;
if (rds_destroy_pending(conn))
return;
@@ -124,6 +138,21 @@ void rds_tcp_conn_slots_available(struct rds_connection *conn)
if (!rtn)
return;
+ sock = tc->t_sock;
+
+ /* During fan-out, check that the connection we already
+ * accepted in slot#0 carried the proper source port modulo.
+ */
+ if (fan_out && conn->c_with_sport_idx && sock &&
+ rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0) {
+ /* cp->cp_index is encoded in lowest bits of source-port */
+ sport = rds_tcp_get_peer_sport(sock);
+ npaths = max_t(int, 1, conn->c_npaths);
+ if (sport >= 0 && sport % npaths != 0)
+ /* peer initiated with a non-#0 lane first */
+ rds_conn_path_drop(conn->c_path, 0);
+ }
+
/* As soon as a connection went down,
* it is safe to schedule a "rds_tcp_accept_one"
* attempt even if there are no connections pending: