From 73f646cec35477b5099d7e952297cb9e1855be45 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:44 -0400 Subject: tipc: delay ESTABLISH state event when link is established Link establishing, just like link teardown, is a non-atomic action, in the sense that discovering that conditions are right to establish a link, and the actual adding of the link to one of the node's send slots is done in two different lock contexts. The link FSM is designed to help bridging the gap between the two contexts in a safe manner. We have now discovered a weakness in the implementaton of this FSM. Because we directly let the link go from state LINK_ESTABLISHING to state LINK_ESTABLISHED already in the first lock context, we are unable to distinguish between a fully established link, i.e., a link that has been added to its slot, and a link that has not yet reached the second lock context. It may hence happen that a manual intervention, e.g., when disabling an interface, causes the function tipc_node_link_down() to try removing the link from the node slots, decrementing its active link counter etc, although the link was never added there in the first place. We solve this by delaying the actual state change until we reach the second lock context, inside the function tipc_node_link_up(). This makes it possible for potentail callers of __tipc_node_link_down() to know if they should proceed or not, and the problem is solved. Unforunately, the situation described above also has a second problem. Since there by necessity is a tipc_node_link_up() call pending once the node lock has been released, we must defuse that call by setting the link back from LINK_ESTABLISHING to LINK_RESET state. This forces us to make a slight modification to the link FSM, which will now look as follows. +------------------------------------+ |RESET_EVT | | | | +--------------+ | +-----------------| SYNCHING |-----------------+ | |FAILURE_EVT +--------------+ PEER_RESET_EVT| | | A | | | | | | | | | | | | | | |SYNCH_ |SYNCH_ | | | |BEGIN_EVT |END_EVT | | | | | | | V | V V | +-------------+ +--------------+ +------------+ | | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET | | +-------------+ FAILURE_ +--------------+ PEER_ +------------+ | | EVT | A RESET_EVT | | | | | | | | +----------------+ | | | RESET_EVT| |RESET_EVT | | | | | | | | | | |ESTABLISH_EVT | | | | +-------------+ | | | | | | RESET_EVT | | | | | | | | | | | V V V | | | | +-------------+ +--------------+ RESET_EVT| +--->| RESET |--------->| ESTABLISHING |<----------------+ +-------------+ PEER_ +--------------+ | A RESET_EVT | | | | | | | |FAILOVER_ |FAILOVER_ |FAILOVER_ |BEGIN_EVT |END_EVT |BEGIN_EVT | | | V | | +-------------+ | | FAILINGOVER |<----------------+ +-------------+ Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 703875fd6cde..656b5791f1a5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -317,7 +317,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, struct tipc_link *ol = node_active_link(n, 0); struct tipc_link *nl = n->links[bearer_id].link; - if (!nl || !tipc_link_is_up(nl)) + if (!nl) + return; + + tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(nl)) return; n->working_links++; @@ -437,17 +441,26 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_link *l = le->link; struct tipc_media_addr *maddr; struct sk_buff_head xmitq; + if (!l) + return; + __skb_queue_head_init(&xmitq); tipc_node_lock(n); - __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete && le->link) { - kfree(le->link); - le->link = NULL; - n->link_cnt--; + if (!tipc_link_is_establishing(l)) { + __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } + } else { + /* Defuse pending tipc_node_link_up() */ + tipc_link_fsm_evt(l, LINK_RESET_EVT); } tipc_node_unlock(n); @@ -579,7 +592,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_unlock(n); - if (reset) + if (reset && !tipc_link_is_reset(l)) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } @@ -686,10 +699,10 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) break; case SELF_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: - break; case NODE_SYNCH_END_EVT: - case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_BEGIN_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; -- cgit v1.2.3 From 282b3a056225b35024246f63feb91d769d714dad Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:45 -0400 Subject: tipc: send out RESET immediately when link goes down When a link is taken down because of a node local event, such as disabling of a bearer or an interface, we currently leave it to the peer node to discover the broken communication. The default time for such failure discovery is 1.5-2 seconds. If we instead allow the terminating link endpoint to send out a RESET message at the moment it is reset, we can achieve the impression that both endpoints are going down instantly. Since this is a very common scenario, we find it worthwhile to make this small modification. Apart from letting the link produce the said message, we also have to ensure that the interface is able to transmit it before TIPC is detached. We do this by performing the disabling of a bearer in three steps: 1) Disable reception of TIPC packets from the interface in question. 2) Take down the links, while allowing them so send out a RESET message. 3) Disable transmission of TIPC packets on the interface. Apart from this, we now have to react on the NETDEV_GOING_DOWN event, instead of as currently the NEDEV_DOWN event, to ensure that such transmission is possible during the teardown phase. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 656b5791f1a5..fba6e1af28e8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -41,7 +41,7 @@ #include "socket.h" #include "bcast.h" #include "discover.h" - +#define pr_debug printk /* Node FSM states and events: */ enum { @@ -421,6 +421,8 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, if (!tipc_node_is_up(n)) { tipc_link_reset(l); + tipc_link_build_reset_msg(l, xmitq); + *maddr = &n->links[*bearer_id].maddr; node_lost_contact(n, &le->inputq); return; } @@ -463,7 +465,6 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_link_fsm_evt(l, LINK_RESET_EVT); } tipc_node_unlock(n); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } -- cgit v1.2.3 From c819930090fe3f74c822be765c185b3431360193 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:46 -0400 Subject: tipc: update node FSM when peer RESET message is received The change made in the previous commit revealed a small flaw in the way the node FSM is updated. When the function tipc_node_link_down() is called for the last link to a node, we should check whether this was caused by a local reset or by a received RESET message from the peer. In the latter case, we can directly issue a PEER_LOST_CONTACT_EVT to the node FSM, so that it is ready to re-establish contact. If this is not done, the peer node will sometimes have to go through a second establish cycle before the link becomes stable. We fix this in this commit by conditionally issuing the mentioned event in the function tipc_node_link_down(). We also move LINK_RESET FSM even away from the link_reset() function and into the caller function, partially because it is easier to follow the code when state changes are gathered at a limited number of locations, partially because there will be cases in future commits where we don't want the link to go RESET mode when link_reset() is called. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index fba6e1af28e8..d1f340116c84 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -41,7 +41,7 @@ #include "socket.h" #include "bcast.h" #include "discover.h" -#define pr_debug printk + /* Node FSM states and events: */ enum { @@ -420,6 +420,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, } if (!tipc_node_is_up(n)) { + if (tipc_link_peer_is_down(l)) + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); + tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_reset(l); tipc_link_build_reset_msg(l, xmitq); *maddr = &n->links[*bearer_id].maddr; @@ -434,6 +438,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); *maddr = &n->links[tnl->bearer_id].maddr; @@ -581,6 +586,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, goto exit; } tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; @@ -863,9 +869,6 @@ static void node_lost_contact(struct tipc_node *n_ptr, tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } - /* Prevent re-contact with node until cleanup is done */ - tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT); - /* Notify publications from this node */ n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; -- cgit v1.2.3 From 0e05498e9eae16a6d8c86543e77930ec152e655e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:36 -0400 Subject: tipc: make link implementation independent from struct tipc_bearer In reality, the link implementation is already independent from struct tipc_bearer, in that it doesn't store any reference to it. However, we still pass on a pointer to a bearer instance in the function tipc_link_create(), just to have it extract some initialization information from it. I later commits, we need to create instances of tipc_link without having any associated struct tipc_bearer. To facilitate this, we want to extract the initialization data already in the creator function in node.c, before calling tipc_link_create(), and pass this info on as individual parameters in the call. This commit introduces this change. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 2670751d0e2e..d3f7ca202281 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -493,6 +493,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, bool link_up = false; bool accept_addr = false; bool reset = true; + char *if_name; *dupl_addr = false; *respond = false; @@ -579,7 +580,10 @@ void tipc_node_check_dest(struct net *net, u32 onode, pr_warn("Cannot establish 3rd link to %x\n", n->addr); goto exit; } - if (!tipc_link_create(n, b, mod(tipc_net(net)->random), + if_name = strchr(b->name, ':') + 1; + if (!tipc_link_create(n, if_name, b->identity, b->tolerance, + b->net_plane, b->mtu, b->priority, + b->window, mod(tipc_net(net)->random), tipc_own_addr(net), onode, &le->maddr, &le->inputq, &n->bclink.namedq, &l)) { *respond = false; -- cgit v1.2.3 From fd556f209af53b9cdc45df8c467feb235376c4df Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:40 -0400 Subject: tipc: introduce capability bit for broadcast synchronization Until now, we have tried to support both the newer, dedicated broadcast synchronization mechanism along with the older, less safe, RESET_MSG/ ACTIVATE_MSG based one. The latter method has turned out to be a hazard in a highly dynamic cluster, so we find it safer to disable it completely when we find that the former mechanism is supported by the peer node. For this purpose, we now introduce a new capabability bit, TIPC_BCAST_SYNCH, to inform any peer nodes that dedicated broadcast syncronization is supported by the present node. The new bit is conveyed between peers in the 'capabilities' field of neighbor discovery messages. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index d3f7ca202281..28bcd7be23c6 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -584,8 +584,10 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (!tipc_link_create(n, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, b->window, mod(tipc_net(net)->random), - tipc_own_addr(net), onode, &le->maddr, - &le->inputq, &n->bclink.namedq, &l)) { + tipc_own_addr(net), onode, + n->capabilities, + &le->maddr, &le->inputq, + &n->bclink.namedq, &l)) { *respond = false; goto exit; } -- cgit v1.2.3 From 5266698661401afc5e4a1a521cf9ba10724d10dd Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:41 -0400 Subject: tipc: let broadcast packet reception use new link receive function The code path for receiving broadcast packets is currently distinct from the unicast path. This leads to unnecessary code and data duplication, something that can be avoided with some effort. We now introduce separate per-peer tipc_link instances for handling broadcast packet reception. Each receive link keeps a pointer to the common, single, broadcast link instance, and can hence handle release and retransmission of send buffers as if they belonged to the own instance. Furthermore, we let each unicast link instance keep a reference to both the pertaining broadcast receive link, and to the common send link. This makes it possible for the unicast links to easily access data for broadcast link synchronization, as well as for carrying acknowledges for received broadcast packets. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 158 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 106 insertions(+), 52 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 28bcd7be23c6..cd924552244b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -72,7 +72,6 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); -static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); @@ -165,8 +164,10 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bclink.namedq); - __skb_queue_head_init(&n_ptr->bclink.deferdq); + skb_queue_head_init(&n_ptr->bc_entry.namedq); + skb_queue_head_init(&n_ptr->bc_entry.inputq1); + __skb_queue_head_init(&n_ptr->bc_entry.arrvq); + skb_queue_head_init(&n_ptr->bc_entry.inputq2); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -177,6 +178,18 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(n_ptr, tipc_own_addr(net), n_ptr->addr, + U16_MAX, tipc_bc_sndlink(net)->window, + n_ptr->capabilities, + &n_ptr->bc_entry.inputq1, + &n_ptr->bc_entry.namedq, + tipc_bc_sndlink(net), + &n_ptr->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n_ptr); + n_ptr = NULL; + goto exit; + } tipc_node_get(n_ptr); setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); n_ptr->keepalive_intv = U32_MAX; @@ -203,6 +216,7 @@ static void tipc_node_delete(struct tipc_node *node) { list_del_rcu(&node->list); hlist_del_rcu(&node->hash); + kfree(node->bc_entry.link); kfree_rcu(node, rcu); } @@ -340,8 +354,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; - tipc_link_build_bcast_sync_msg(nl, xmitq); - node_established_contact(n); + tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_bcast_add_peer(n->net, n->addr, nl, xmitq); return; } @@ -585,9 +600,10 @@ void tipc_node_check_dest(struct net *net, u32 onode, b->net_plane, b->mtu, b->priority, b->window, mod(tipc_net(net)->random), tipc_own_addr(net), onode, - n->capabilities, - &le->maddr, &le->inputq, - &n->bclink.namedq, &l)) { + n->capabilities, &le->maddr, + tipc_bc_sndlink(n->net), n->bc_entry.link, + &le->inputq, + &n->bc_entry.namedq, &l)) { *respond = false; goto exit; } @@ -830,58 +846,36 @@ bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) return true; } -static void node_established_contact(struct tipc_node *n_ptr) -{ - tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT); - n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; - n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); - tipc_bclink_add_node(n_ptr->net, n_ptr->addr); -} - -static void node_lost_contact(struct tipc_node *n_ptr, +static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { char addr_string[16]; struct tipc_sock_conn *conn, *safe; struct tipc_link *l; - struct list_head *conns = &n_ptr->conn_sks; + struct list_head *conns = &n->conn_sks; struct sk_buff *skb; - struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); uint i; pr_debug("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); - - /* Flush broadcast link info associated with lost node */ - if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferdq); + tipc_addr_string_fill(addr_string, n->addr)); - if (n_ptr->bclink.reasm_buf) { - kfree_skb(n_ptr->bclink.reasm_buf); - n_ptr->bclink.reasm_buf = NULL; - } - - tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); - tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); - - n_ptr->bclink.recv_permitted = false; - } + /* Clean up broadcast state */ + tipc_bcast_remove_peer(n->net, n->addr, n->bc_entry.link); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { - l = n_ptr->links[i].link; + l = n->links[i].link; if (l) tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } /* Notify publications from this node */ - n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; + n->action_flags |= TIPC_NOTIFY_NODE_DOWN; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tn->own_addr, + SHORT_H_SIZE, 0, tipc_own_addr(n->net), conn->peer_node, conn->port, conn->peer_port, TIPC_ERR_NO_NODE); if (likely(skb)) @@ -1085,6 +1079,67 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +/** + * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer_id: id of bearer message arrived on + * + * Invoked with no locks held. + */ +void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) +{ + int rc; + struct sk_buff_head xmitq; + struct tipc_bclink_entry *be; + struct tipc_link_entry *le; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + u32 dnode = msg_destnode(hdr); + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + /* If NACK for other node, let rcv link for that node peek into it */ + if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net))) + n = tipc_node_find(net, dnode); + else + n = tipc_node_find(net, msg_prevnode(hdr)); + if (!n) { + kfree_skb(skb); + return; + } + be = &n->bc_entry; + le = &n->links[bearer_id]; + + rc = tipc_bcast_rcv(net, be->link, skb); + + /* Broadcast link reset may happen at reassembly failure */ + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_reset_links(n); + + /* Broadcast ACKs are sent on a unicast link */ + if (rc & TIPC_LINK_SND_BC_ACK) { + tipc_node_lock(n); + tipc_link_build_ack_msg(le->link, &xmitq); + tipc_node_unlock(n); + } + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + + /* Deliver. 'arrvq' is under inputq2's lock protection */ + if (!skb_queue_empty(&be->inputq1)) { + spin_lock_bh(&be->inputq2.lock); + spin_lock_bh(&be->inputq1.lock); + skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); + spin_unlock_bh(&be->inputq1.lock); + spin_unlock_bh(&be->inputq2.lock); + tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2); + } + tipc_node_put(n); +} + /** * tipc_node_check_state - check and if necessary update node state * @skb: TIPC packet @@ -1227,6 +1282,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int usr = msg_user(hdr); int bearer_id = b->identity; struct tipc_link_entry *le; + u16 bc_ack = msg_bcast_ack(hdr); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1235,13 +1291,12 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(!tipc_msg_validate(skb))) goto discard; - /* Handle arrival of a non-unicast link packet */ + /* Handle arrival of discovery or broadcast packet */ if (unlikely(msg_non_seq(hdr))) { - if (usr == LINK_CONFIG) - tipc_disc_rcv(net, skb, b); + if (unlikely(usr == LINK_CONFIG)) + return tipc_disc_rcv(net, skb, b); else - tipc_bclink_rcv(net, skb); - return; + return tipc_node_bc_rcv(net, skb, bearer_id); } /* Locate neighboring node that sent packet */ @@ -1250,19 +1305,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) goto discard; le = &n->links[bearer_id]; + /* Ensure broadcast reception is in synch with peer's send state */ + if (unlikely(usr == LINK_PROTOCOL)) + tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); + else if (unlikely(n->bc_entry.link->acked != bc_ack)) + tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); + tipc_node_lock(n); /* Is reception permitted at the moment ? */ if (!tipc_node_filter_pkt(n, hdr)) goto unlock; - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) - tipc_bclink_sync_state(n, hdr); - - /* Release acked broadcast packets */ - if (unlikely(n->bclink.acked != msg_bcast_ack(hdr))) - tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); - /* Check and if necessary update node state */ if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { rc = tipc_link_rcv(le->link, skb, &xmitq); @@ -1277,8 +1331,8 @@ unlock: if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_node_link_down(n, bearer_id, false); - if (unlikely(!skb_queue_empty(&n->bclink.namedq))) - tipc_named_rcv(net, &n->bclink.namedq); + if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) + tipc_named_rcv(net, &n->bc_entry.namedq); if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq); -- cgit v1.2.3 From b06b281e79375fcbd9ffaec7c5fdc350b888d089 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:42 -0400 Subject: tipc: simplify bearer level broadcast Until now, we have been keeping track of the exact set of broadcast destinations though the help structure tipc_node_map. This leads us to have to maintain a whole infrastructure for supporting this, including a pseudo-bearer and a number of functions to manipulate both the bearers and the node map correctly. Apart from the complexity, this approach is also limiting, as struct tipc_node_map only can support cluster local broadcast if we want to avoid it becoming excessively large. We want to eliminate this limitation, in order to enable introduction of scoped multicast in the future. A closer analysis reveals that it is unnecessary maintaining this "full set" overview; it is sufficient to keep a counter per bearer, indicating how many nodes can be reached via this bearer at the moment. The protocol is now robust enough to handle transitional discrepancies between the nominal number of reachable destinations, as expected by the broadcast protocol itself, and the number which is actually reachable at the moment. The initial broadcast synchronization, in conjunction with the retransmission mechanism, ensures that all packets will eventually be acknowledged by the correct set of destinations. This commit introduces these changes. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index cd924552244b..b27439097d6c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -346,6 +346,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); + tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", nl->name, nl->net_plane); @@ -356,7 +357,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); n->action_flags |= TIPC_NOTIFY_NODE_UP; - tipc_bcast_add_peer(n->net, n->addr, nl, xmitq); + tipc_bcast_add_peer(n->net, nl, xmitq); return; } @@ -443,8 +444,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_link_build_reset_msg(l, xmitq); *maddr = &n->links[*bearer_id].maddr; node_lost_contact(n, &le->inputq); + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); return; } + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ tnl = node_active_link(n, 0); @@ -860,7 +863,7 @@ static void node_lost_contact(struct tipc_node *n, tipc_addr_string_fill(addr_string, n->addr)); /* Clean up broadcast state */ - tipc_bcast_remove_peer(n->net, n->addr, n->bc_entry.link); + tipc_bcast_remove_peer(n->net, n->bc_entry.link); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { -- cgit v1.2.3 From c72fa872a23f03b2b9c17e88f3b0a8070924e5f1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:46 -0400 Subject: tipc: eliminate link's reference to owner node With the recent commit series, we have established a one-way dependency between the link aggregation (struct tipc_node) instances and their pertaining tipc_link instances. This has enabled quite significant code and structure simplifications. In this commit, we eliminate the field 'owner', which points to an instance of struct tipc_node, from struct tipc_link, and replace it with a pointer to struct net, which is the only external reference now needed by a link instance. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index b27439097d6c..eb739d20ed46 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -178,7 +178,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(n_ptr, tipc_own_addr(net), n_ptr->addr, + if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, U16_MAX, tipc_bc_sndlink(net)->window, n_ptr->capabilities, &n_ptr->bc_entry.inputq1, @@ -366,7 +366,10 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Old link <%s> becomes standby\n", ol->name); *slot0 = bearer_id; *slot1 = bearer_id; + tipc_link_set_active(nl, true); + tipc_link_set_active(ol, false); } else if (nl->priority == ol->priority) { + tipc_link_set_active(nl, true); *slot0 = bearer_id; } else { pr_debug("New link <%s> is standby\n", nl->name); @@ -599,7 +602,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, goto exit; } if_name = strchr(b->name, ':') + 1; - if (!tipc_link_create(n, if_name, b->identity, b->tolerance, + if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, b->window, mod(tipc_net(net)->random), tipc_own_addr(net), onode, -- cgit v1.2.3 From c49a0a84391bcc313b3dc2a9ceee6de684e07655 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:47 -0400 Subject: tipc: ensure binding table initial distribution is sent via first link Correct synchronization of the broadcast link at first contact between two nodes is dependent on the assumption that the binding table "bulk" update passes via the same link as the initial broadcast syncronization message, i.e., via the first link that is established. This is not guaranteed in the current implementation. If two link come up very close to each other in time, the "bulk" may quite well pass via the second link, and hence void the guarantee of a correct initial synchronization before the broadcast link is opened. This commit makes two small changes to strengthen this guarantee. 1) We let the second established link occupy slot 1 of the "active_links" array, while the first link will retain slot 0. (This is in reality a cosmetic change, we could just as well keep the current, opposite order) 2) We let the name distributor always use link selector/slot 0 when it sends it binding table updates. The extra traffic bias on the first link caused by this change should be negligible, since binding table updates constitutes a very small fraction of the total traffic. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index eb739d20ed46..f4772f53f41a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -370,7 +370,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, tipc_link_set_active(ol, false); } else if (nl->priority == ol->priority) { tipc_link_set_active(nl, true); - *slot0 = bearer_id; + *slot1 = bearer_id; } else { pr_debug("New link <%s> is standby\n", nl->name); } -- cgit v1.2.3 From 2af5ae372a4b6d6e2d3314af0e9c865d6d64f8d3 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 22 Oct 2015 08:51:48 -0400 Subject: tipc: clean up unused code and structures After the previous changes in this series, we can now remove some unused code and structures, both in the broadcast, link aggregation and link code. There are no functional changes in this commit. Signed-off-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index f4772f53f41a..7493506b069b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -606,7 +606,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, b->net_plane, b->mtu, b->priority, b->window, mod(tipc_net(net)->random), tipc_own_addr(net), onode, - n->capabilities, &le->maddr, + n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, &le->inputq, &n->bc_entry.namedq, &l)) { @@ -943,18 +943,13 @@ void tipc_node_unlock(struct tipc_node *node) publ_list = &node->publ_list; node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | - TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_BCAST_RESET); + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); spin_unlock_bh(&node->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) tipc_publ_notify(net, publ_list, addr); - if (flags & TIPC_WAKEUP_BCAST_USERS) - tipc_bclink_wakeup_users(net); - if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(net, addr); @@ -966,11 +961,6 @@ void tipc_node_unlock(struct tipc_node *node) tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); - if (flags & TIPC_BCAST_MSG_EVT) - tipc_bclink_input(net); - - if (flags & TIPC_BCAST_RESET) - tipc_node_reset_links(node); } /* Caller should hold node lock for the passed node */ -- cgit v1.2.3 From 742e038330a485350334ee5eb75dce4a9dff87cd Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 24 Oct 2015 22:56:01 +0800 Subject: tipc: link_is_bc_sndlink() can be static TO: "David S. Miller" CC: netdev@vger.kernel.org CC: Jon Maloy CC: Ying Xue CC: tipc-discussion@lists.sourceforge.net CC: linux-kernel@vger.kernel.org Signed-off-by: Fengguang Wu Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 7493506b069b..20cddec0a43c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1083,7 +1083,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, * * Invoked with no locks held. */ -void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) +static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) { int rc; struct sk_buff_head xmitq; -- cgit v1.2.3