summaryrefslogtreecommitdiff
path: root/net/rds/connection.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
commit468fc7ed5537615efe671d94248446ac24679773 (patch)
tree27bc9de792e863d6ec1630927b77ac9e7dabb38a /net/rds/connection.c
parent08fd8c17686c6b09fa410a26d516548dd80ff147 (diff)
parent36232012344b8db67052432742deaf17f82e70e6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Unified UDP encapsulation offload methods for drivers, from Alexander Duyck. 2) Make DSA binding more sane, from Andrew Lunn. 3) Support QCA9888 chips in ath10k, from Anilkumar Kolli. 4) Several workqueue usage cleanups, from Bhaktipriya Shridhar. 5) Add XDP (eXpress Data Path), essentially running BPF programs on RX packets as soon as the device sees them, with the option to mirror the packet on TX via the same interface. From Brenden Blanco and others. 6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet. 7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli. 8) Simplify netlink conntrack entry layout, from Florian Westphal. 9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido Schimmel, Yotam Gigi, and Jiri Pirko. 10) Add SKB array infrastructure and convert tun and macvtap over to it. From Michael S Tsirkin and Jason Wang. 11) Support qdisc packet injection in pktgen, from John Fastabend. 12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy. 13) Add NV congestion control support to TCP, from Lawrence Brakmo. 14) Add GSO support to SCTP, from Marcelo Ricardo Leitner. 15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni. 16) Support MPLS over IPV4, from Simon Horman. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits) xgene: Fix build warning with ACPI disabled. be2net: perform temperature query in adapter regardless of its interface state l2tp: Correctly return -EBADF from pppol2tp_getname. net/mlx5_core/health: Remove deprecated create_singlethread_workqueue net: ipmr/ip6mr: update lastuse on entry change macsec: ensure rx_sa is set when validation is disabled tipc: dump monitor attributes tipc: add a function to get the bearer name tipc: get monitor threshold for the cluster tipc: make cluster size threshold for monitoring configurable tipc: introduce constants for tipc address validation net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update() MAINTAINERS: xgene: Add driver and documentation path Documentation: dtb: xgene: Add MDIO node dtb: xgene: Add MDIO node drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset drivers: net: xgene: Use exported functions drivers: net: xgene: Enable MDIO driver drivers: net: xgene: Add backward compatibility drivers: net: phy: xgene: Add MDIO driver ...
Diffstat (limited to 'net/rds/connection.c')
-rw-r--r--net/rds/connection.c329
1 files changed, 229 insertions, 100 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index e3b118cae81d..f5058559bb08 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -95,14 +95,16 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
* and receiving over this connection again in the future. It is up to
* the transport to have serialized this call with its send and recv.
*/
-static void rds_conn_reset(struct rds_connection *conn)
+static void rds_conn_path_reset(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
+
rdsdebug("connection %pI4 to %pI4 reset\n",
&conn->c_laddr, &conn->c_faddr);
rds_stats_inc(s_conn_reset);
- rds_send_reset(conn);
- conn->c_flags = 0;
+ rds_send_path_reset(cp);
+ cp->cp_flags = 0;
/* Do not clear next_rx_seq here, else we cannot distinguish
* retransmitted packets from new packets, and will hand all
@@ -110,6 +112,32 @@ static void rds_conn_reset(struct rds_connection *conn)
* reliability guarantees of RDS. */
}
+static void __rds_conn_path_init(struct rds_connection *conn,
+ struct rds_conn_path *cp, bool is_outgoing)
+{
+ spin_lock_init(&cp->cp_lock);
+ cp->cp_next_tx_seq = 1;
+ init_waitqueue_head(&cp->cp_waitq);
+ INIT_LIST_HEAD(&cp->cp_send_queue);
+ INIT_LIST_HEAD(&cp->cp_retrans);
+
+ cp->cp_conn = conn;
+ atomic_set(&cp->cp_state, RDS_CONN_DOWN);
+ cp->cp_send_gen = 0;
+ /* cp_outgoing is per-path. So we can only set it here
+ * for the single-path transports.
+ */
+ if (!conn->c_trans->t_mp_capable)
+ cp->cp_outgoing = (is_outgoing ? 1 : 0);
+ cp->cp_reconnect_jiffies = 0;
+ INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
+ INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
+ INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
+ INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
+ mutex_init(&cp->cp_cm_lock);
+ cp->cp_flags = 0;
+}
+
/*
* There is only every one 'conn' for a given pair of addresses in the
* system at a time. They contain messages to be retransmitted and so
@@ -127,7 +155,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
struct rds_transport *loop_trans;
unsigned long flags;
- int ret;
+ int ret, i;
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -153,13 +181,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
INIT_HLIST_NODE(&conn->c_hash_node);
conn->c_laddr = laddr;
conn->c_faddr = faddr;
- spin_lock_init(&conn->c_lock);
- conn->c_next_tx_seq = 1;
- rds_conn_net_set(conn, net);
- init_waitqueue_head(&conn->c_waitq);
- INIT_LIST_HEAD(&conn->c_send_queue);
- INIT_LIST_HEAD(&conn->c_retrans);
+ rds_conn_net_set(conn, net);
ret = rds_cong_get_maps(conn);
if (ret) {
@@ -188,6 +211,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_trans = trans;
+ init_waitqueue_head(&conn->c_hs_waitq);
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ __rds_conn_path_init(conn, &conn->c_path[i],
+ is_outgoing);
+ conn->c_path[i].cp_index = i;
+ }
ret = trans->conn_alloc(conn, gfp);
if (ret) {
kmem_cache_free(rds_conn_slab, conn);
@@ -195,17 +224,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
goto out;
}
- atomic_set(&conn->c_state, RDS_CONN_DOWN);
- conn->c_send_gen = 0;
- conn->c_outgoing = (is_outgoing ? 1 : 0);
- conn->c_reconnect_jiffies = 0;
- INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
- INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
- INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
- INIT_WORK(&conn->c_down_w, rds_shutdown_worker);
- mutex_init(&conn->c_cm_lock);
- conn->c_flags = 0;
-
rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
conn, &laddr, &faddr,
trans->t_name ? trans->t_name : "[unknown]",
@@ -222,7 +240,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (parent) {
/* Creating passive conn */
if (parent->c_passive) {
- trans->conn_free(conn->c_transport_data);
+ trans->conn_free(conn->c_path[0].cp_transport_data);
kmem_cache_free(rds_conn_slab, conn);
conn = parent->c_passive;
} else {
@@ -236,7 +254,18 @@ static struct rds_connection *__rds_conn_create(struct net *net,
found = rds_conn_lookup(net, head, laddr, faddr, trans);
if (found) {
- trans->conn_free(conn->c_transport_data);
+ struct rds_conn_path *cp;
+ int i;
+
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ /* The ->conn_alloc invocation may have
+ * allocated resource for all paths, so all
+ * of them may have to be freed here.
+ */
+ if (cp->cp_transport_data)
+ trans->conn_free(cp->cp_transport_data);
+ }
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
@@ -267,10 +296,12 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
-void rds_conn_shutdown(struct rds_connection *conn)
+void rds_conn_shutdown(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
+
/* shut it down unless it's down already */
- if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
+ if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
/*
* Quiesce the connection mgmt handlers before we start tearing
* things down. We don't hold the mutex for the entire
@@ -278,35 +309,38 @@ void rds_conn_shutdown(struct rds_connection *conn)
* deadlocking with the CM handler. Instead, the CM event
* handler is supposed to check for state DISCONNECTING
*/
- mutex_lock(&conn->c_cm_lock);
- if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
- && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
- rds_conn_error(conn, "shutdown called in state %d\n",
- atomic_read(&conn->c_state));
- mutex_unlock(&conn->c_cm_lock);
+ mutex_lock(&cp->cp_cm_lock);
+ if (!rds_conn_path_transition(cp, RDS_CONN_UP,
+ RDS_CONN_DISCONNECTING) &&
+ !rds_conn_path_transition(cp, RDS_CONN_ERROR,
+ RDS_CONN_DISCONNECTING)) {
+ rds_conn_path_error(cp,
+ "shutdown called in state %d\n",
+ atomic_read(&cp->cp_state));
+ mutex_unlock(&cp->cp_cm_lock);
return;
}
- mutex_unlock(&conn->c_cm_lock);
+ mutex_unlock(&cp->cp_cm_lock);
- wait_event(conn->c_waitq,
- !test_bit(RDS_IN_XMIT, &conn->c_flags));
- wait_event(conn->c_waitq,
- !test_bit(RDS_RECV_REFILL, &conn->c_flags));
+ wait_event(cp->cp_waitq,
+ !test_bit(RDS_IN_XMIT, &cp->cp_flags));
+ wait_event(cp->cp_waitq,
+ !test_bit(RDS_RECV_REFILL, &cp->cp_flags));
- conn->c_trans->conn_shutdown(conn);
- rds_conn_reset(conn);
+ conn->c_trans->conn_path_shutdown(cp);
+ rds_conn_path_reset(cp);
- if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
+ if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+ RDS_CONN_DOWN)) {
/* This can happen - eg when we're in the middle of tearing
* down the connection, and someone unloads the rds module.
* Quite reproduceable with loopback connections.
* Mostly harmless.
*/
- rds_conn_error(conn,
- "%s: failed to transition to state DOWN, "
- "current state is %d\n",
- __func__,
- atomic_read(&conn->c_state));
+ rds_conn_path_error(cp, "%s: failed to transition "
+ "to state DOWN, current state "
+ "is %d\n", __func__,
+ atomic_read(&cp->cp_state));
return;
}
}
@@ -315,18 +349,47 @@ void rds_conn_shutdown(struct rds_connection *conn)
* The passive side of an IB loopback connection is never added
* to the conn hash, so we never trigger a reconnect on this
* conn - the reconnect is always triggered by the active peer. */
- cancel_delayed_work_sync(&conn->c_conn_w);
+ cancel_delayed_work_sync(&cp->cp_conn_w);
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
- if (conn->c_trans->t_type != RDS_TRANS_TCP ||
- conn->c_outgoing == 1)
- rds_queue_reconnect(conn);
+ rds_queue_reconnect(cp);
} else {
rcu_read_unlock();
}
}
+/* destroy a single rds_conn_path. rds_conn_destroy() iterates over
+ * all paths using rds_conn_path_destroy()
+ */
+static void rds_conn_path_destroy(struct rds_conn_path *cp)
+{
+ struct rds_message *rm, *rtmp;
+
+ if (!cp->cp_transport_data)
+ return;
+
+ rds_conn_path_drop(cp);
+ flush_work(&cp->cp_down_w);
+
+ /* make sure lingering queued work won't try to ref the conn */
+ cancel_delayed_work_sync(&cp->cp_send_w);
+ cancel_delayed_work_sync(&cp->cp_recv_w);
+
+ /* tear down queued messages */
+ list_for_each_entry_safe(rm, rtmp,
+ &cp->cp_send_queue,
+ m_conn_item) {
+ list_del_init(&rm->m_conn_item);
+ BUG_ON(!list_empty(&rm->m_sock_item));
+ rds_message_put(rm);
+ }
+ if (cp->cp_xmit_rm)
+ rds_message_put(cp->cp_xmit_rm);
+
+ cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
+}
+
/*
* Stop and free a connection.
*
@@ -336,8 +399,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
*/
void rds_conn_destroy(struct rds_connection *conn)
{
- struct rds_message *rm, *rtmp;
unsigned long flags;
+ int i;
+ struct rds_conn_path *cp;
rdsdebug("freeing conn %p for %pI4 -> "
"%pI4\n", conn, &conn->c_laddr,
@@ -350,25 +414,11 @@ void rds_conn_destroy(struct rds_connection *conn)
synchronize_rcu();
/* shut the connection down */
- rds_conn_drop(conn);
- flush_work(&conn->c_down_w);
-
- /* make sure lingering queued work won't try to ref the conn */
- cancel_delayed_work_sync(&conn->c_send_w);
- cancel_delayed_work_sync(&conn->c_recv_w);
-
- /* tear down queued messages */
- list_for_each_entry_safe(rm, rtmp,
- &conn->c_send_queue,
- m_conn_item) {
- list_del_init(&rm->m_conn_item);
- BUG_ON(!list_empty(&rm->m_sock_item));
- rds_message_put(rm);
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ rds_conn_path_destroy(cp);
+ BUG_ON(!list_empty(&cp->cp_retrans));
}
- if (conn->c_xmit_rm)
- rds_message_put(conn->c_xmit_rm);
-
- conn->c_trans->conn_free(conn->c_transport_data);
/*
* The congestion maps aren't freed up here. They're
@@ -377,7 +427,6 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
- BUG_ON(!list_empty(&conn->c_retrans));
kmem_cache_free(rds_conn_slab, conn);
spin_lock_irqsave(&rds_conn_lock, flags);
@@ -398,6 +447,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
unsigned int total = 0;
unsigned long flags;
size_t i;
+ int j;
len /= sizeof(struct rds_info_message);
@@ -406,23 +456,32 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
i++, head++) {
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
- if (want_send)
- list = &conn->c_send_queue;
- else
- list = &conn->c_retrans;
-
- spin_lock_irqsave(&conn->c_lock, flags);
-
- /* XXX too lazy to maintain counts.. */
- list_for_each_entry(rm, list, m_conn_item) {
- total++;
- if (total <= len)
- rds_inc_info_copy(&rm->m_inc, iter,
- conn->c_laddr,
- conn->c_faddr, 0);
+ struct rds_conn_path *cp;
+
+ for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+ cp = &conn->c_path[j];
+ if (want_send)
+ list = &cp->cp_send_queue;
+ else
+ list = &cp->cp_retrans;
+
+ spin_lock_irqsave(&cp->cp_lock, flags);
+
+ /* XXX too lazy to maintain counts.. */
+ list_for_each_entry(rm, list, m_conn_item) {
+ total++;
+ if (total <= len)
+ rds_inc_info_copy(&rm->m_inc,
+ iter,
+ conn->c_laddr,
+ conn->c_faddr,
+ 0);
+ }
+
+ spin_unlock_irqrestore(&cp->cp_lock, flags);
+ if (!conn->c_trans->t_mp_capable)
+ break;
}
-
- spin_unlock_irqrestore(&conn->c_lock, flags);
}
}
rcu_read_unlock();
@@ -484,27 +543,72 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
}
EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
-static int rds_conn_info_visitor(struct rds_connection *conn,
- void *buffer)
+void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens,
+ int (*visitor)(struct rds_conn_path *, void *),
+ size_t item_len)
+{
+ u64 buffer[(item_len + 7) / 8];
+ struct hlist_head *head;
+ struct rds_connection *conn;
+ size_t i;
+ int j;
+
+ rcu_read_lock();
+
+ lens->nr = 0;
+ lens->each = item_len;
+
+ for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
+ i++, head++) {
+ hlist_for_each_entry_rcu(conn, head, c_hash_node) {
+ struct rds_conn_path *cp;
+
+ for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+ cp = &conn->c_path[j];
+
+ /* XXX no cp_lock usage.. */
+ if (!visitor(cp, buffer))
+ continue;
+ if (!conn->c_trans->t_mp_capable)
+ break;
+ }
+
+ /* We copy as much as we can fit in the buffer,
+ * but we count all items so that the caller
+ * can resize the buffer.
+ */
+ if (len >= item_len) {
+ rds_info_copy(iter, buffer, item_len);
+ len -= item_len;
+ }
+ lens->nr++;
+ }
+ }
+ rcu_read_unlock();
+}
+
+static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
{
struct rds_info_connection *cinfo = buffer;
- cinfo->next_tx_seq = conn->c_next_tx_seq;
- cinfo->next_rx_seq = conn->c_next_rx_seq;
- cinfo->laddr = conn->c_laddr;
- cinfo->faddr = conn->c_faddr;
- strncpy(cinfo->transport, conn->c_trans->t_name,
+ cinfo->next_tx_seq = cp->cp_next_tx_seq;
+ cinfo->next_rx_seq = cp->cp_next_rx_seq;
+ cinfo->laddr = cp->cp_conn->c_laddr;
+ cinfo->faddr = cp->cp_conn->c_faddr;
+ strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
sizeof(cinfo->transport));
cinfo->flags = 0;
- rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
+ rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
SENDING);
/* XXX Future: return the state rather than these funky bits */
rds_conn_info_set(cinfo->flags,
- atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
+ atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
CONNECTING);
rds_conn_info_set(cinfo->flags,
- atomic_read(&conn->c_state) == RDS_CONN_UP,
+ atomic_read(&cp->cp_state) == RDS_CONN_UP,
CONNECTED);
return 1;
}
@@ -513,7 +617,7 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
- rds_for_each_conn_info(sock, len, iter, lens,
+ rds_walk_conn_path_info(sock, len, iter, lens,
rds_conn_info_visitor,
sizeof(struct rds_info_connection));
}
@@ -553,10 +657,17 @@ void rds_conn_exit(void)
/*
* Force a disconnect
*/
+void rds_conn_path_drop(struct rds_conn_path *cp)
+{
+ atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+ queue_work(rds_wq, &cp->cp_down_w);
+}
+EXPORT_SYMBOL_GPL(rds_conn_path_drop);
+
void rds_conn_drop(struct rds_connection *conn)
{
- atomic_set(&conn->c_state, RDS_CONN_ERROR);
- queue_work(rds_wq, &conn->c_down_w);
+ WARN_ON(conn->c_trans->t_mp_capable);
+ rds_conn_path_drop(&conn->c_path[0]);
}
EXPORT_SYMBOL_GPL(rds_conn_drop);
@@ -564,11 +675,17 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
* If the connection is down, trigger a connect. We may have scheduled a
* delayed reconnect however - in this case we should not interfere.
*/
+void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
+{
+ if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
+ !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+}
+
void rds_conn_connect_if_down(struct rds_connection *conn)
{
- if (rds_conn_state(conn) == RDS_CONN_DOWN &&
- !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
- queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
+ WARN_ON(conn->c_trans->t_mp_capable);
+ rds_conn_path_connect_if_down(&conn->c_path[0]);
}
EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
@@ -586,3 +703,15 @@ __rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
rds_conn_drop(conn);
}
+
+void
+__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprintk(fmt, ap);
+ va_end(ap);
+
+ rds_conn_path_drop(cp);
+}