diff options
| -rw-r--r-- | net/rds/connection.c | 5 | ||||
| -rw-r--r-- | net/rds/rds.h | 2 | ||||
| -rw-r--r-- | net/rds/recv.c | 7 | ||||
| -rw-r--r-- | net/rds/send.c | 18 | ||||
| -rw-r--r-- | net/rds/tcp.h | 2 | ||||
| -rw-r--r-- | net/rds/tcp_listen.c | 55 |
6 files changed, 72 insertions, 17 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c index 4b7715eb2111..185f73b01694 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -447,13 +447,16 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); + if (conn->c_trans->t_mp_capable && + cp->cp_index == 0) + rds_send_ping(conn, 0); rds_queue_reconnect(cp); } else { rcu_read_unlock(); } if (conn->c_trans->conn_slots_available) - conn->c_trans->conn_slots_available(conn); + conn->c_trans->conn_slots_available(conn, false); } /* destroy a single rds_conn_path. rds_conn_destroy() iterates over diff --git a/net/rds/rds.h b/net/rds/rds.h index 6d9f4a08b0ee..6e0790e4b570 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -549,7 +549,7 @@ struct rds_transport { * messages received on the new socket are not discarded when no * connection path was available at the time. */ - void (*conn_slots_available)(struct rds_connection *conn); + void (*conn_slots_available)(struct rds_connection *conn, bool fan_out); int (*conn_path_connect)(struct rds_conn_path *cp); /* diff --git a/net/rds/recv.c b/net/rds/recv.c index 889a5b7935e5..4b3f9e4a8bfd 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -209,6 +209,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, bool new_with_sport_idx = false; u32 new_peer_gen_num = 0; int new_npaths; + bool fan_out; new_npaths = conn->c_npaths; @@ -248,7 +249,11 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, spin_lock_irqsave(&cp0->cp_lock, flags); conn->c_cp0_mprds_catchup_tx_seq = cp0->cp_next_tx_seq; spin_unlock_irqrestore(&cp0->cp_lock, flags); + fan_out = true; + } else { + fan_out = false; } + /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ conn->c_npaths = max_t(int, new_npaths, 1); @@ -257,7 +262,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, if (conn->c_npaths > 1 && conn->c_trans->conn_slots_available) - conn->c_trans->conn_slots_available(conn); + conn->c_trans->conn_slots_available(conn, fan_out); } /* rds_start_mprds() will synchronously start multiple paths when appropriate. diff --git a/net/rds/send.c b/net/rds/send.c index 599c2cfb7a1d..6e96f108473e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1339,6 +1339,24 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) cpath = &conn->c_path[0]; } + /* If we're multipath capable and path 0 is down, queue reconnect + * and send a ping. This initiates the multipath handshake through + * rds_send_probe(), which sends RDS_EXTHDR_NPATHS to the peer, + * starting multipath capability negotiation. + */ + if (conn->c_trans->t_mp_capable && + !rds_conn_path_up(&conn->c_path[0])) { + /* Ensures that only one request is queued. And + * rds_send_ping() ensures that only one ping is + * outstanding. + */ + if (!test_and_set_bit(RDS_RECONNECT_PENDING, + &conn->c_path[0].cp_flags)) + queue_delayed_work(conn->c_path[0].cp_wq, + &conn->c_path[0].cp_conn_w, 0); + rds_send_ping(conn, 0); + } + rm->m_conn_path = cpath; /* Parse any control messages the user may have included. */ diff --git a/net/rds/tcp.h b/net/rds/tcp.h index b36af0865a07..39c86347188c 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -90,7 +90,7 @@ void rds_tcp_state_change(struct sock *sk); struct socket *rds_tcp_listen_init(struct net *net, bool isv6); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); -void rds_tcp_conn_slots_available(struct rds_connection *conn); +void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out); int rds_tcp_accept_one(struct rds_tcp_net *rtn); void rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index b5786227623c..6fb5c928b8fd 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -56,14 +56,8 @@ void rds_tcp_keepalive(struct socket *sock) tcp_sock_set_keepintvl(sock->sk, keepidle); } -/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the - * client's ipaddr < server's ipaddr. Otherwise, close the accepted - * socket and force a reconneect from smaller -> larger ip addr. The reason - * we special case cp_index 0 is to allow the rds probe ping itself to itself - * get through efficiently. - */ -static struct rds_tcp_connection * -rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock) +static int +rds_tcp_get_peer_sport(struct socket *sock) { union { struct sockaddr_storage storage; @@ -71,11 +65,9 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock) struct sockaddr_in sin; struct sockaddr_in6 sin6; } saddr; - int sport, npaths, i_min, i_max, i; + int sport; - if (conn->c_with_sport_idx && - kernel_getpeername(sock, &saddr.addr) >= 0) { - /* cp->cp_index is encoded in lowest bits of source-port */ + if (kernel_getpeername(sock, &saddr.addr) >= 0) { switch (saddr.addr.sa_family) { case AF_INET: sport = ntohs(saddr.sin.sin_port); @@ -90,6 +82,26 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock) sport = -1; } + return sport; +} + +/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the + * client's ipaddr < server's ipaddr. Otherwise, close the accepted + * socket and force a reconneect from smaller -> larger ip addr. The reason + * we special case cp_index 0 is to allow the rds probe ping itself to itself + * get through efficiently. + */ +static struct rds_tcp_connection * +rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock) +{ + int sport, npaths, i_min, i_max, i; + + if (conn->c_with_sport_idx) + /* cp->cp_index is encoded in lowest bits of source-port */ + sport = rds_tcp_get_peer_sport(sock); + else + sport = -1; + npaths = max_t(int, 1, conn->c_npaths); if (sport >= 0) { @@ -111,10 +123,12 @@ rds_tcp_accept_one_path(struct rds_connection *conn, struct socket *sock) return NULL; } -void rds_tcp_conn_slots_available(struct rds_connection *conn) +void rds_tcp_conn_slots_available(struct rds_connection *conn, bool fan_out) { struct rds_tcp_connection *tc; struct rds_tcp_net *rtn; + struct socket *sock; + int sport, npaths; if (rds_destroy_pending(conn)) return; @@ -124,6 +138,21 @@ void rds_tcp_conn_slots_available(struct rds_connection *conn) if (!rtn) return; + sock = tc->t_sock; + + /* During fan-out, check that the connection we already + * accepted in slot#0 carried the proper source port modulo. + */ + if (fan_out && conn->c_with_sport_idx && sock && + rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0) { + /* cp->cp_index is encoded in lowest bits of source-port */ + sport = rds_tcp_get_peer_sport(sock); + npaths = max_t(int, 1, conn->c_npaths); + if (sport >= 0 && sport % npaths != 0) + /* peer initiated with a non-#0 lane first */ + rds_conn_path_drop(conn->c_path, 0); + } + /* As soon as a connection went down, * it is safe to schedule a "rds_tcp_accept_one" * attempt even if there are no connections pending: |
