From 07f6c4bc048a7a8939c68a668bf77474890794c5 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 7 Jan 2015 13:41:58 +0800 Subject: tipc: convert tipc reference table to use generic rhashtable As tipc reference table is statically allocated, its memory size requested on stack initialization stage is quite big even if the maximum port number is just restricted to 8191 currently, however, the number already becomes insufficient in practice. But if the maximum ports is allowed to its theory value - 2^32, its consumed memory size will reach a ridiculously unacceptable value. Apart from this, heavy tipc users spend a considerable amount of time in tipc_sk_get() due to the read-lock on ref_table_lock. If tipc reference table is converted with generic rhashtable, above mentioned both disadvantages would be resolved respectively: making use of the new resizable hash table can avoid locking on the lookup; smaller memory size is required at initial stage, for example, 256 hash bucket slots are requested at the beginning phase instead of allocating the entire 8191 slots in old mode. The hash table will grow if entries exceeds 75% of table size up to a total table size of 1M, and it will automatically shrink if usage falls below 30%, but the minimum table size is allowed down to 256. Also converts ref_table_lock to a separate mutex to protect hash table mutations on write side. Lastly defers the release of the socket reference using call_rcu() to allow using an RCU read-side protected call to rhashtable_lookup(). Signed-off-by: Ying Xue Acked-by: Jon Maloy Acked-by: Erik Hugne Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/tipc/socket.c | 480 +++++++++++++++++++----------------------------------- 1 file changed, 171 insertions(+), 309 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4731cad99d1c..701f31bbbbfb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -34,22 +34,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include +#include #include "core.h" #include "name_table.h" #include "node.h" #include "link.h" -#include #include "config.h" #include "socket.h" -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ +#define SS_LISTENING -1 /* socket is listening */ +#define SS_READY -2 /* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 /** * struct tipc_sock - TIPC socket structure @@ -59,7 +62,7 @@ * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry + * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port @@ -74,6 +77,8 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @node: hash table node + * @rcu: rcu struct for tipc_sock */ struct tipc_sock { struct sock sk; @@ -82,7 +87,7 @@ struct tipc_sock { u32 conn_instance; int published; u32 max_pkt; - u32 ref; + u32 portid; struct tipc_msg phdr; struct list_head sock_list; struct list_head publications; @@ -95,6 +100,8 @@ struct tipc_sock { bool link_cong; uint sent_unacked; uint rcv_unacked; + struct rhash_head node; + struct rcu_head rcu; }; static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -103,16 +110,14 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); -static void tipc_sk_timeout(unsigned long ref); +static void tipc_sk_timeout(unsigned long portid); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); -static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); -static void tipc_sk_ref_discard(u32 ref); -static struct tipc_sock *tipc_sk_get(u32 ref); -static struct tipc_sock *tipc_sk_get_next(u32 *ref); -static void tipc_sk_put(struct tipc_sock *tsk); +static struct tipc_sock *tipc_sk_lookup(u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -174,6 +179,9 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { * - port reference */ +/* Protects tipc socket hash table mutations */ +static struct rhashtable tipc_sk_rht; + static u32 tsk_peer_node(struct tipc_sock *tsk) { return msg_destnode(&tsk->phdr); @@ -305,7 +313,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref; /* Validate arguments */ if (unlikely(protocol != 0)) @@ -339,24 +346,22 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -ENOMEM; tsk = tipc_sk(sk); - ref = tipc_sk_ref_acquire(tsk); - if (!ref) { - pr_warn("Socket create failed; reference table exhausted\n"); - return -ENOMEM; - } tsk->max_pkt = MAX_PKT_DEFAULT; - tsk->ref = ref; INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); - msg_set_origport(msg, ref); /* Finish initializing socket data structures */ sock->ops = ops; sock->state = state; sock_init_data(sock, sk); - k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref); + if (tipc_sk_insert(tsk)) { + pr_warn("Socket create failed; port numbrer exhausted\n"); + return -EINVAL; + } + msg_set_origport(msg, tsk->portid); + k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, tsk->portid); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -442,6 +447,13 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, return ret; } +static void tipc_sk_callback(struct rcu_head *head) +{ + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); + + sock_put(&tsk->sk); +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -491,7 +503,7 @@ static int tipc_release(struct socket *sock) (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; tsk->connected = 0; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(dnode, tsk->portid); } if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) tipc_link_xmit_skb(skb, dnode, 0); @@ -499,16 +511,16 @@ static int tipc_release(struct socket *sock) } tipc_sk_withdraw(tsk, 0, NULL); - tipc_sk_ref_discard(tsk->ref); k_cancel_timer(&tsk->timer); + tipc_sk_remove(tsk); if (tsk->connected) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tipc_own_addr, tsk_peer_port(tsk), - tsk->ref, TIPC_ERR_NO_PORT); + tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + tipc_link_xmit_skb(skb, dnode, tsk->portid); + tipc_node_remove_conn(dnode, tsk->portid); } k_term_timer(&tsk->timer); @@ -518,7 +530,8 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); - sock_put(sk); + + call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; return 0; @@ -611,7 +624,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsk->ref; + addr->addr.id.ref = tsk->portid; addr->addr.id.node = tipc_own_addr; } @@ -946,7 +959,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, } new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->ref); + mtu = tipc_node_get_mtu(dnode, tsk->portid); __skb_queue_head_init(&head); rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); if (rc < 0) @@ -955,7 +968,7 @@ new_mtu: do { skb = skb_peek(&head); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(&head, dnode, tsk->ref); + rc = tipc_link_xmit(&head, dnode, tsk->portid); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -1028,7 +1041,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct tipc_msg *mhdr = &tsk->phdr; struct sk_buff_head head; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - u32 ref = tsk->ref; + u32 portid = tsk->portid; int rc = -EINVAL; long timeo; u32 dnode; @@ -1067,7 +1080,7 @@ next: goto exit; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(&head, dnode, ref); + rc = tipc_link_xmit(&head, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1076,7 +1089,7 @@ next: goto next; } if (rc == -EMSGSIZE) { - tsk->max_pkt = tipc_node_get_mtu(dnode, ref); + tsk->max_pkt = tipc_node_get_mtu(dnode, portid); goto next; } if (rc != -ELINKCONG) @@ -1130,8 +1143,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; k_start_timer(&tsk->timer, tsk->probing_interval); - tipc_node_add_conn(peer_node, tsk->ref, peer_port); - tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref); + tipc_node_add_conn(peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid); } /** @@ -1238,7 +1251,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!tsk->connected) return; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, - tipc_own_addr, peer_port, tsk->ref, TIPC_OK); + tipc_own_addr, peer_port, tsk->portid, TIPC_OK); if (!skb) return; msg = buf_msg(skb); @@ -1552,7 +1565,7 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) tsk->connected = 0; /* let timer expire on it's own */ tipc_node_remove_conn(tsk_peer_node(tsk), - tsk->ref); + tsk->portid); } retval = TIPC_OK; } @@ -1743,7 +1756,7 @@ int tipc_sk_rcv(struct sk_buff *skb) u32 dnode; /* Validate destination and message */ - tsk = tipc_sk_get(dport); + tsk = tipc_sk_lookup(dport); if (unlikely(!tsk)) { rc = tipc_msg_eval(skb, &dnode); goto exit; @@ -1763,7 +1776,7 @@ int tipc_sk_rcv(struct sk_buff *skb) rc = -TIPC_ERR_OVERLOAD; } spin_unlock_bh(&sk->sk_lock.slock); - tipc_sk_put(tsk); + sock_put(sk); if (likely(!rc)) return 0; exit: @@ -2050,20 +2063,20 @@ restart: goto restart; } if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + tipc_link_xmit_skb(skb, dnode, tsk->portid); + tipc_node_remove_conn(dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tipc_own_addr, tsk_peer_port(tsk), - tsk->ref, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(skb, dnode, tsk->ref); + tsk->portid, TIPC_CONN_SHUTDOWN); + tipc_link_xmit_skb(skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(dnode, tsk->portid); /* fall through */ case SS_DISCONNECTING: @@ -2084,14 +2097,14 @@ restart: return res; } -static void tipc_sk_timeout(unsigned long ref) +static void tipc_sk_timeout(unsigned long portid) { struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb = NULL; u32 peer_port, peer_node; - tsk = tipc_sk_get(ref); + tsk = tipc_sk_lookup(portid); if (!tsk) return; @@ -2108,20 +2121,20 @@ static void tipc_sk_timeout(unsigned long ref) /* Previous probe not answered -> self abort */ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, tipc_own_addr, - peer_node, ref, peer_port, + peer_node, portid, peer_port, TIPC_ERR_NO_PORT); } else { skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, tipc_own_addr, - peer_port, ref, TIPC_OK); + peer_port, portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; k_start_timer(&tsk->timer, tsk->probing_interval); } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(skb, peer_node, ref); + tipc_link_xmit_skb(skb, peer_node, portid); exit: - tipc_sk_put(tsk); + sock_put(sk); } static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, @@ -2132,12 +2145,12 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, if (tsk->connected) return -EINVAL; - key = tsk->ref + tsk->pub_count + 1; - if (key == tsk->ref) + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) return -EADDRINUSE; publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, tsk->ref, key); + scope, tsk->portid, key); if (unlikely(!publ)) return -EINVAL; @@ -2188,9 +2201,9 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf, ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), tsk->ref); + tipc_node(tipc_own_addr), tsk->portid); else - ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); + ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid); if (tsk->connected) { u32 dport = tsk_peer_port(tsk); @@ -2224,13 +2237,15 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf, struct sk_buff *tipc_sk_socks_show(void) { + const struct bucket_table *tbl; + struct rhash_head *pos; struct sk_buff *buf; struct tlv_desc *rep_tlv; char *pb; int pb_len; struct tipc_sock *tsk; int str_len = 0; - u32 ref = 0; + int i; buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); if (!buf) @@ -2239,14 +2254,18 @@ struct sk_buff *tipc_sk_socks_show(void) pb = TLV_DATA(rep_tlv); pb_len = ULTRA_STRING_MAX_LEN; - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - str_len += tipc_sk_show(tsk, pb + str_len, - pb_len - str_len, 0); - release_sock(&tsk->sk); - tipc_sk_put(tsk); + rcu_read_lock(); + tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + str_len += tipc_sk_show(tsk, pb + str_len, + pb_len - str_len, 0); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } } + rcu_read_unlock(); + str_len += 1; /* for "\0" */ skb_put(buf, TLV_SPACE(str_len)); TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); @@ -2259,255 +2278,91 @@ struct sk_buff *tipc_sk_socks_show(void) */ void tipc_sk_reinit(void) { + const struct bucket_table *tbl; + struct rhash_head *pos; + struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref = 0; - struct tipc_sock *tsk = tipc_sk_get_next(&ref); + int i; - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - msg = &tsk->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); - release_sock(&tsk->sk); - tipc_sk_put(tsk); + rcu_read_lock(); + tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + msg = &tsk->phdr; + msg_set_prevnode(msg, tipc_own_addr); + msg_set_orignode(msg, tipc_own_addr); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } } + rcu_read_unlock(); } -/** - * struct reference - TIPC socket reference entry - * @tsk: pointer to socket associated with reference entry - * @ref: reference value for socket (combines instance & array index info) - */ -struct reference { - struct tipc_sock *tsk; - u32 ref; -}; - -/** - * struct tipc_ref_table - table of TIPC socket reference entries - * @entries: pointer to array of reference entries - * @capacity: array index of first unusable entry - * @init_point: array index of first uninitialized entry - * @first_free: array index of first unused socket reference entry - * @last_free: array index of last unused socket reference entry - * @index_mask: bitmask for array index portion of reference values - * @start_mask: initial value for instance value portion of reference values - */ -struct ref_table { - struct reference *entries; - u32 capacity; - u32 init_point; - u32 first_free; - u32 last_free; - u32 index_mask; - u32 start_mask; -}; - -/* Socket reference table consists of 2**N entries. - * - * State Socket ptr Reference - * ----- ---------- --------- - * In use non-NULL XXXX|own index - * (XXXX changes each time entry is acquired) - * Free NULL YYYY|next free index - * (YYYY is one more than last used XXXX) - * Uninitialized NULL 0 - * - * Entry 0 is not used; this allows index 0 to denote the end of the free list. - * - * Note that a reference value of 0 does not necessarily indicate that an - * entry is uninitialized, since the last entry in the free list could also - * have a reference value of 0 (although this is unlikely). - */ - -static struct ref_table tipc_ref_table; - -static DEFINE_RWLOCK(ref_table_lock); - -/** - * tipc_ref_table_init - create reference table for sockets - */ -int tipc_sk_ref_table_init(u32 req_sz, u32 start) +static struct tipc_sock *tipc_sk_lookup(u32 portid) { - struct reference *table; - u32 actual_sz; - - /* account for unused entry, then round up size to a power of 2 */ - - req_sz++; - for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { - /* do nothing */ - }; - - /* allocate table & mark all entries as uninitialized */ - table = vzalloc(actual_sz * sizeof(struct reference)); - if (table == NULL) - return -ENOMEM; - - tipc_ref_table.entries = table; - tipc_ref_table.capacity = req_sz; - tipc_ref_table.init_point = 1; - tipc_ref_table.first_free = 0; - tipc_ref_table.last_free = 0; - tipc_ref_table.index_mask = actual_sz - 1; - tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + struct tipc_sock *tsk; - return 0; -} + rcu_read_lock(); + tsk = rhashtable_lookup(&tipc_sk_rht, &portid); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); -/** - * tipc_ref_table_stop - destroy reference table for sockets - */ -void tipc_sk_ref_table_stop(void) -{ - if (!tipc_ref_table.entries) - return; - vfree(tipc_ref_table.entries); - tipc_ref_table.entries = NULL; + return tsk; } -/* tipc_ref_acquire - create reference to a socket - * - * Register an socket pointer in the reference table. - * Returns a unique reference value that is used from then on to retrieve the - * socket pointer, or to determine if the socket has been deregistered. - */ -u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +static int tipc_sk_insert(struct tipc_sock *tsk) { - u32 index; - u32 index_mask; - u32 next_plus_upper; - u32 ref = 0; - struct reference *entry; - - if (unlikely(!tsk)) { - pr_err("Attempt to acquire ref. to non-existent obj\n"); - return 0; - } - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found in acquisition attempt\n"); - return 0; - } - - /* Take a free entry, if available; otherwise initialize a new one */ - write_lock_bh(&ref_table_lock); - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; - if (likely(index)) { - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - index_mask = tipc_ref_table.index_mask; - next_plus_upper = entry->ref; - tipc_ref_table.first_free = next_plus_upper & index_mask; - ref = (next_plus_upper & ~index_mask) + index; - entry->tsk = tsk; - } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { - index = tipc_ref_table.init_point++; - entry = &tipc_ref_table.entries[index]; - ref = tipc_ref_table.start_mask + index; + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (rhashtable_lookup_insert(&tipc_sk_rht, &tsk->node)) + return 0; + sock_put(&tsk->sk); } - if (ref) { - entry->ref = ref; - entry->tsk = tsk; - } - write_unlock_bh(&ref_table_lock); - return ref; + return -1; } -/* tipc_sk_ref_discard - invalidate reference to an socket - * - * Disallow future references to an socket and free up the entry for re-use. - */ -void tipc_sk_ref_discard(u32 ref) +static void tipc_sk_remove(struct tipc_sock *tsk) { - struct reference *entry; - u32 index; - u32 index_mask; - - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found during discard attempt\n"); - return; - } - - index_mask = tipc_ref_table.index_mask; - index = ref & index_mask; - entry = &tipc_ref_table.entries[index]; - - write_lock_bh(&ref_table_lock); + struct sock *sk = &tsk->sk; - if (unlikely(!entry->tsk)) { - pr_err("Attempt to discard ref. to non-existent socket\n"); - goto exit; - } - if (unlikely(entry->ref != ref)) { - pr_err("Attempt to discard non-existent reference\n"); - goto exit; + if (rhashtable_remove(&tipc_sk_rht, &tsk->node)) { + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); } - - /* Mark entry as unused; increment instance part of entry's - * reference to invalidate any subsequent references - */ - - entry->tsk = NULL; - entry->ref = (ref & ~index_mask) + (index_mask + 1); - - /* Append entry to free entry list */ - if (unlikely(tipc_ref_table.first_free == 0)) - tipc_ref_table.first_free = index; - else - tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; - tipc_ref_table.last_free = index; -exit: - write_unlock_bh(&ref_table_lock); } -/* tipc_sk_get - find referenced socket and return pointer to it - */ -struct tipc_sock *tipc_sk_get(u32 ref) +int tipc_sk_rht_init(void) { - struct reference *entry; - struct tipc_sock *tsk; + struct rhashtable_params rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .hashfn = jhash, + .max_shift = 20, /* 1M */ + .min_shift = 8, /* 256 */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + }; - if (unlikely(!tipc_ref_table.entries)) - return NULL; - read_lock_bh(&ref_table_lock); - entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; - tsk = entry->tsk; - if (likely(tsk && (entry->ref == ref))) - sock_hold(&tsk->sk); - else - tsk = NULL; - read_unlock_bh(&ref_table_lock); - return tsk; + return rhashtable_init(&tipc_sk_rht, &rht_params); } -/* tipc_sk_get_next - lock & return next socket after referenced one -*/ -struct tipc_sock *tipc_sk_get_next(u32 *ref) +void tipc_sk_rht_destroy(void) { - struct reference *entry; - struct tipc_sock *tsk = NULL; - uint index = *ref & tipc_ref_table.index_mask; + /* Wait for socket readers to complete */ + synchronize_net(); - read_lock_bh(&ref_table_lock); - while (++index < tipc_ref_table.capacity) { - entry = &tipc_ref_table.entries[index]; - if (!entry->tsk) - continue; - tsk = entry->tsk; - sock_hold(&tsk->sk); - *ref = entry->ref; - break; - } - read_unlock_bh(&ref_table_lock); - return tsk; -} - -static void tipc_sk_put(struct tipc_sock *tsk) -{ - sock_put(&tsk->sk); + rhashtable_destroy(&tipc_sk_rht); } /** @@ -2829,7 +2684,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, attrs = nla_nest_start(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) goto attr_msg_cancel; if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) goto attr_msg_cancel; @@ -2859,22 +2714,29 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; struct tipc_sock *tsk; - u32 prev_ref = cb->args[0]; - u32 ref = prev_ref; - - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - err = __tipc_nl_add_sk(skb, cb, tsk); - release_sock(&tsk->sk); - tipc_sk_put(tsk); - if (err) - break; + const struct bucket_table *tbl; + struct rhash_head *pos; + u32 prev_portid = cb->args[0]; + u32 portid = prev_portid; + int i; - prev_ref = ref; + rcu_read_lock(); + tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + portid = tsk->portid; + err = __tipc_nl_add_sk(skb, cb, tsk); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + if (err) + break; + + prev_portid = portid; + } } + rcu_read_unlock(); - cb->args[0] = prev_ref; + cb->args[0] = prev_portid; return skb->len; } @@ -2962,12 +2824,12 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; - u32 tsk_ref = cb->args[0]; + u32 tsk_portid = cb->args[0]; u32 last_publ = cb->args[1]; u32 done = cb->args[2]; struct tipc_sock *tsk; - if (!tsk_ref) { + if (!tsk_portid) { struct nlattr **attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; @@ -2984,13 +2846,13 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!sock[TIPC_NLA_SOCK_REF]) return -EINVAL; - tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); } if (done) return 0; - tsk = tipc_sk_get(tsk_ref); + tsk = tipc_sk_lookup(tsk_portid); if (!tsk) return -EINVAL; @@ -2999,9 +2861,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!err) done = 1; release_sock(&tsk->sk); - tipc_sk_put(tsk); + sock_put(&tsk->sk); - cb->args[0] = tsk_ref; + cb->args[0] = tsk_portid; cb->args[1] = last_publ; cb->args[2] = done; -- cgit v1.2.3 From 2f55c43788df7358be8c6e78ae2a3d3268e7afb6 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:00 +0800 Subject: tipc: remove unnecessary wrapper functions of kernel timer APIs Not only some wrapper function like k_term_timer() is empty, but also some others including k_start_timer() and k_cancel_timer() don't return back any value to its caller, what's more, there is no any component in the kernel world to do such thing. Therefore, these timer interfaces defined in tipc module should be purged. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 701f31bbbbfb..e16197eb7b9f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -47,7 +47,7 @@ #define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ +#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 #define TIPC_CONN_OK 0 #define TIPC_CONN_PROBING 1 @@ -68,7 +68,7 @@ * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_interval: + * @probing_intv: * @timer: * @port: port - interacts with 'sk' and with the rest of the TIPC stack * @peer_name: the peer of the connection, if any @@ -93,7 +93,7 @@ struct tipc_sock { struct list_head publications; u32 pub_count; u32 probing_state; - u32 probing_interval; + unsigned long probing_intv; struct timer_list timer; uint conn_timeout; atomic_t dupl_rcvcnt; @@ -361,7 +361,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -EINVAL; } msg_set_origport(msg, tsk->portid); - k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, tsk->portid); + setup_timer(&tsk->timer, tipc_sk_timeout, tsk->portid); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -511,7 +511,7 @@ static int tipc_release(struct socket *sock) } tipc_sk_withdraw(tsk, 0, NULL); - k_cancel_timer(&tsk->timer); + del_timer_sync(&tsk->timer); tipc_sk_remove(tsk); if (tsk->connected) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, @@ -522,7 +522,6 @@ static int tipc_release(struct socket *sock) tipc_link_xmit_skb(skb, dnode, tsk->portid); tipc_node_remove_conn(dnode, tsk->portid); } - k_term_timer(&tsk->timer); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -1139,10 +1138,10 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_interval = CONN_PROBING_INTERVAL; + tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; - k_start_timer(&tsk->timer, tsk->probing_interval); + mod_timer(&tsk->timer, jiffies + tsk->probing_intv); tipc_node_add_conn(peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid); } @@ -2128,7 +2127,7 @@ static void tipc_sk_timeout(unsigned long portid) 0, peer_node, tipc_own_addr, peer_port, portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; - k_start_timer(&tsk->timer, tsk->probing_interval); + mod_timer(&tsk->timer, jiffies + tsk->probing_intv); } bh_unlock_sock(sk); if (skb) -- cgit v1.2.3 From f2f2a96a20d52d65aa79bd4019af43bbfb0e1528 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:02 +0800 Subject: tipc: feed tipc sock pointer to tipc_sk_timeout routine In order to make tipc socket table aware of namespace, a networking namespace instance must be passed to tipc_sk_lookup(), allowing it to look up tipc socket instance with a given port ID from a concrete socket table. However, as now tipc_sk_timeout() only has one port ID parameter and is not namespace aware, it's unable to obtain a correct socket instance through tipc_sk_lookup() just with a port ID, especially after namespace is completely supported. If port ID is replaced with socket instance as tipc_sk_timeout()'s parameter, it's unnecessary to look up socket table. But as the timer handler - tipc_sk_timeout() is run asynchronously, socket reference must be held before its timer is launched, and must be carefully checked to identify whether the socket reference needs to be put or not when its timer is terminated. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e16197eb7b9f..c58f66be7e18 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -110,7 +110,7 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); -static void tipc_sk_timeout(unsigned long portid); +static void tipc_sk_timeout(unsigned long data); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, @@ -361,7 +361,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -EINVAL; } msg_set_origport(msg, tsk->portid); - setup_timer(&tsk->timer, tipc_sk_timeout, tsk->portid); + setup_timer(&tsk->timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -475,7 +475,7 @@ static int tipc_release(struct socket *sock) struct sock *sk = sock->sk; struct tipc_sock *tsk; struct sk_buff *skb; - u32 dnode; + u32 dnode, probing_state; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -511,7 +511,9 @@ static int tipc_release(struct socket *sock) } tipc_sk_withdraw(tsk, 0, NULL); - del_timer_sync(&tsk->timer); + probing_state = tsk->probing_state; + if (del_timer_sync(&tsk->timer) && probing_state != TIPC_CONN_PROBING) + sock_put(sk); tipc_sk_remove(tsk); if (tsk->connected) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, @@ -1141,7 +1143,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; - mod_timer(&tsk->timer, jiffies + tsk->probing_intv); + if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv)) + sock_hold(&tsk->sk); tipc_node_add_conn(peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid); } @@ -2096,18 +2099,13 @@ restart: return res; } -static void tipc_sk_timeout(unsigned long portid) +static void tipc_sk_timeout(unsigned long data) { - struct tipc_sock *tsk; - struct sock *sk; + struct tipc_sock *tsk = (struct tipc_sock *)data; + struct sock *sk = &tsk->sk; struct sk_buff *skb = NULL; u32 peer_port, peer_node; - tsk = tipc_sk_lookup(portid); - if (!tsk) - return; - - sk = &tsk->sk; bh_lock_sock(sk); if (!tsk->connected) { bh_unlock_sock(sk); @@ -2120,18 +2118,19 @@ static void tipc_sk_timeout(unsigned long portid) /* Previous probe not answered -> self abort */ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, tipc_own_addr, - peer_node, portid, peer_port, + peer_node, tsk->portid, peer_port, TIPC_ERR_NO_PORT); } else { skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, tipc_own_addr, - peer_port, portid, TIPC_OK); + peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; - mod_timer(&tsk->timer, jiffies + tsk->probing_intv); + if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv)) + sock_hold(sk); } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(skb, peer_node, portid); + tipc_link_xmit_skb(skb, peer_node, tsk->portid); exit: sock_put(sk); } -- cgit v1.2.3 From f2f9800d4955a96d92896841d8ba9b04201deaa1 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:05 +0800 Subject: tipc: make tipc node table aware of net namespace Global variables associated with node table are below: - node table list (node_htable) - node hash table list (tipc_node_list) - node table lock (node_list_lock) - node number counter (tipc_num_nodes) - node link number counter (tipc_num_links) To make node table support namespace, above global variables must be moved to tipc_net structure in order to keep secret for different namespaces. As a consequence, these variables are allocated and initialized when namespace is created, and deallocated when namespace is destroyed. After the change, functions associated with these variables have to utilize a namespace pointer to access them. So adding namespace pointer as a parameter of these functions is the major change made in the commit. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 72 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c58f66be7e18..68831453bc0e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -257,7 +257,7 @@ static void tsk_rej_rx_queue(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -473,6 +473,7 @@ static void tipc_sk_callback(struct rcu_head *head) static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk; struct sk_buff *skb; u32 dnode, probing_state; @@ -503,10 +504,10 @@ static int tipc_release(struct socket *sock) (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; tsk->connected = 0; - tipc_node_remove_conn(dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + tipc_link_xmit_skb(net, skb, dnode, 0); } } @@ -521,8 +522,8 @@ static int tipc_release(struct socket *sock) tsk_peer_port(tsk), tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(skb, dnode, tsk->portid); - tipc_node_remove_conn(dnode, tsk->portid); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } /* Discard any remaining (connection-based) messages in receive queue */ @@ -725,6 +726,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct msghdr *msg, size_t dsz, long timeo) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; struct sk_buff_head head; uint mtu; @@ -747,7 +749,7 @@ new_mtu: return rc; do { - rc = tipc_bclink_xmit(&head); + rc = tipc_bclink_xmit(net, &head); if (likely(rc >= 0)) { rc = dsz; break; @@ -766,7 +768,7 @@ new_mtu: /* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets */ -void tipc_sk_mcast_rcv(struct sk_buff *buf) +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct tipc_port_list dports = {0, NULL, }; @@ -798,7 +800,7 @@ void tipc_sk_mcast_rcv(struct sk_buff *buf) continue; } msg_set_destport(msg, item->ports[i]); - tipc_sk_rcv(b); + tipc_sk_rcv(net, b); } } tipc_port_list_free(&dports); @@ -886,6 +888,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; struct sk_buff_head head; @@ -960,7 +963,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, } new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->portid); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); __skb_queue_head_init(&head); rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); if (rc < 0) @@ -969,7 +972,7 @@ new_mtu: do { skb = skb_peek(&head); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(&head, dnode, tsk->portid); + rc = tipc_link_xmit(net, &head, dnode, tsk->portid); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -1038,6 +1041,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; struct sk_buff_head head; @@ -1081,7 +1085,7 @@ next: goto exit; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(&head, dnode, portid); + rc = tipc_link_xmit(net, &head, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1090,7 +1094,8 @@ next: goto next; } if (rc == -EMSGSIZE) { - tsk->max_pkt = tipc_node_get_mtu(dnode, portid); + tsk->max_pkt = tipc_node_get_mtu(net, dnode, + portid); goto next; } if (rc != -ELINKCONG) @@ -1132,6 +1137,7 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, u32 peer_node) { + struct net *net = sock_net(&tsk->sk); struct tipc_msg *msg = &tsk->phdr; msg_set_destnode(msg, peer_node); @@ -1145,8 +1151,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, tsk->connected = 1; if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv)) sock_hold(&tsk->sk); - tipc_node_add_conn(peer_node, tsk->portid, peer_port); - tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); } /** @@ -1245,6 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) { + struct net *net = sock_net(&tsk->sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); @@ -1258,7 +1265,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) return; msg = buf_msg(skb); msg_set_msgcnt(msg, ack); - tipc_link_xmit_skb(skb, dnode, msg_link_selector(msg)); + tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1551,6 +1558,7 @@ static void tipc_data_ready(struct sock *sk) static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); int retval = -TIPC_ERR_NO_PORT; @@ -1566,7 +1574,7 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) sock->state = SS_DISCONNECTING; tsk->connected = 0; /* let timer expire on it's own */ - tipc_node_remove_conn(tsk_peer_node(tsk), + tipc_node_remove_conn(net, tsk_peer_node(tsk), tsk->portid); } retval = TIPC_OK; @@ -1737,7 +1745,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc)) return 0; - tipc_link_xmit_skb(skb, onode, 0); + tipc_link_xmit_skb(sock_net(sk), skb, onode, 0); return 0; } @@ -1748,7 +1756,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Consumes buffer * Returns 0 if success, or errno: -EHOSTUNREACH */ -int tipc_sk_rcv(struct sk_buff *skb) +int tipc_sk_rcv(struct net *net, struct sk_buff *skb) { struct tipc_sock *tsk; struct sock *sk; @@ -1785,7 +1793,7 @@ exit: if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc)) return -EHOSTUNREACH; - tipc_link_xmit_skb(skb, dnode, 0); + tipc_link_xmit_skb(net, skb, dnode, 0); return (rc < 0) ? -EHOSTUNREACH : 0; } @@ -2042,6 +2050,7 @@ exit: static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; u32 dnode; @@ -2065,8 +2074,9 @@ restart: goto restart; } if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(skb, dnode, tsk->portid); - tipc_node_remove_conn(dnode, tsk->portid); + tipc_link_xmit_skb(net, skb, dnode, + tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, @@ -2074,11 +2084,11 @@ restart: 0, dnode, tipc_own_addr, tsk_peer_port(tsk), tsk->portid, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(skb, dnode, tsk->portid); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); /* fall through */ case SS_DISCONNECTING: @@ -2130,7 +2140,7 @@ static void tipc_sk_timeout(unsigned long data) } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(skb, peer_node, tsk->portid); + tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); exit: sock_put(sk); } @@ -2138,6 +2148,7 @@ exit: static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; u32 key; @@ -2147,7 +2158,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, if (key == tsk->portid) return -EADDRINUSE; - publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, + publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, scope, tsk->portid, key); if (unlikely(!publ)) return -EINVAL; @@ -2161,6 +2172,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; struct publication *safe; int rc = -EINVAL; @@ -2175,12 +2187,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, continue; if (publ->upper != seq->upper) break; - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; break; } - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; } @@ -2492,8 +2504,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } -static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + struct sock *sk = sock->sk; struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; @@ -2501,7 +2514,8 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, + if (!tipc_node_get_linkname(sock_net(sk), + lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; -- cgit v1.2.3 From e05b31f4bf8994d49322e9afb004ad479a129db0 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:08 +0800 Subject: tipc: make tipc socket support net namespace Now tipc socket table is statically allocated as a global variable. Through it, we can look up one socket instance with port ID, insert a new socket instance to the table, and delete a socket from the table. But when tipc supports net namespace, each namespace must own its specific socket table. So the global variable of socket table must be redefined in tipc_net structure. As a concequence, a new socket table will be allocated when a new namespace is created, and a socket table will be deallocated when namespace is destroyed. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 68831453bc0e..accb02cb3527 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -115,7 +115,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); -static struct tipc_sock *tipc_sk_lookup(u32 portid); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); @@ -179,9 +179,6 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { * - port reference */ -/* Protects tipc socket hash table mutations */ -static struct rhashtable tipc_sk_rht; - static u32 tsk_peer_node(struct tipc_sock *tsk) { return msg_destnode(&tsk->phdr); @@ -1766,7 +1763,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) u32 dnode; /* Validate destination and message */ - tsk = tipc_sk_lookup(dport); + tsk = tipc_sk_lookup(net, dport); if (unlikely(!tsk)) { rc = tipc_msg_eval(skb, &dnode); goto exit; @@ -2245,8 +2242,9 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf, return ret; } -struct sk_buff *tipc_sk_socks_show(void) +struct sk_buff *tipc_sk_socks_show(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); const struct bucket_table *tbl; struct rhash_head *pos; struct sk_buff *buf; @@ -2265,7 +2263,7 @@ struct sk_buff *tipc_sk_socks_show(void) pb_len = ULTRA_STRING_MAX_LEN; rcu_read_lock(); - tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); for (i = 0; i < tbl->size; i++) { rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2286,8 +2284,9 @@ struct sk_buff *tipc_sk_socks_show(void) /* tipc_sk_reinit: set non-zero address in all existing sockets * when we go from standalone to network mode. */ -void tipc_sk_reinit(void) +void tipc_sk_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); const struct bucket_table *tbl; struct rhash_head *pos; struct tipc_sock *tsk; @@ -2295,7 +2294,7 @@ void tipc_sk_reinit(void) int i; rcu_read_lock(); - tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); for (i = 0; i < tbl->size; i++) { rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2308,12 +2307,13 @@ void tipc_sk_reinit(void) rcu_read_unlock(); } -static struct tipc_sock *tipc_sk_lookup(u32 portid) +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock *tsk; rcu_read_lock(); - tsk = rhashtable_lookup(&tipc_sk_rht, &portid); + tsk = rhashtable_lookup(&tn->sk_rht, &portid); if (tsk) sock_hold(&tsk->sk); rcu_read_unlock(); @@ -2323,6 +2323,9 @@ static struct tipc_sock *tipc_sk_lookup(u32 portid) static int tipc_sk_insert(struct tipc_sock *tsk) { + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; @@ -2332,7 +2335,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk) portid = TIPC_MIN_PORT; tsk->portid = portid; sock_hold(&tsk->sk); - if (rhashtable_lookup_insert(&tipc_sk_rht, &tsk->node)) + if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) return 0; sock_put(&tsk->sk); } @@ -2343,15 +2346,17 @@ static int tipc_sk_insert(struct tipc_sock *tsk) static void tipc_sk_remove(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (rhashtable_remove(&tipc_sk_rht, &tsk->node)) { + if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } -int tipc_sk_rht_init(void) +int tipc_sk_rht_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct rhashtable_params rht_params = { .nelem_hint = 192, .head_offset = offsetof(struct tipc_sock, node), @@ -2364,15 +2369,17 @@ int tipc_sk_rht_init(void) .shrink_decision = rht_shrink_below_30, }; - return rhashtable_init(&tipc_sk_rht, &rht_params); + return rhashtable_init(&tn->sk_rht, &rht_params); } -void tipc_sk_rht_destroy(void) +void tipc_sk_rht_destroy(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + /* Wait for socket readers to complete */ synchronize_net(); - rhashtable_destroy(&tipc_sk_rht); + rhashtable_destroy(&tn->sk_rht); } /** @@ -2730,10 +2737,12 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) struct rhash_head *pos; u32 prev_portid = cb->args[0]; u32 portid = prev_portid; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); int i; rcu_read_lock(); - tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); for (i = 0; i < tbl->size; i++) { rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2839,6 +2848,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 tsk_portid = cb->args[0]; u32 last_publ = cb->args[1]; u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_sock *tsk; if (!tsk_portid) { @@ -2864,7 +2874,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (done) return 0; - tsk = tipc_sk_lookup(tsk_portid); + tsk = tipc_sk_lookup(net, tsk_portid); if (!tsk) return -EINVAL; -- cgit v1.2.3 From 4ac1c8d0ee9faf3a4be185cc4db1381fa0d81280 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:09 +0800 Subject: tipc: name tipc name table support net namespace TIPC name table is used to store the mapping relationship between TIPC service name and socket port ID. When tipc supports namespace, it allows users to publish service names only owned by a certain namespace. Therefore, every namespace must have its private name table to prevent service names published to one namespace from being contaminated by other service names in another namespace. Therefore, The name table global variable (ie, nametbl) and its lock must be moved to tipc_net structure, and a parameter of namespace must be added for necessary functions so that they can obtain name table variable defined in tipc_net structure. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index accb02cb3527..4670e1e46c89 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -778,11 +778,8 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) scope = TIPC_NODE_SCOPE; /* Create destination port list: */ - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - scope, - &dports); + tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg), + msg_nameupper(msg), scope, &dports); last = dports.count; if (!last) { kfree_skb(buf); @@ -943,7 +940,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_nametype(mhdr, type); msg_set_nameinst(mhdr, inst); msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); - dport = tipc_nametbl_translate(type, inst, &dnode); + dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); if (unlikely(!dport && !dnode)) { @@ -1765,7 +1762,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) /* Validate destination and message */ tsk = tipc_sk_lookup(net, dport); if (unlikely(!tsk)) { - rc = tipc_msg_eval(skb, &dnode); + rc = tipc_msg_eval(net, skb, &dnode); goto exit; } sk = &tsk->sk; -- cgit v1.2.3 From 347475395434abb2b61bf59c2952470f37072567 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:10 +0800 Subject: tipc: make tipc node address support net namespace If net namespace is supported in tipc, each namespace will be treated as a separate tipc node. Therefore, every namespace must own its private tipc node address. This means the "tipc_own_addr" global variable of node address must be moved to tipc_net structure to satisfy the requirement. It's turned out that users also can assign node address for every namespace. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 86 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 35 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4670e1e46c89..9b8470edc783 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -251,10 +251,11 @@ static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; u32 dnode; + struct net *net = sock_net(sk); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); + if (tipc_msg_reverse(net, skb, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(net, skb, dnode, 0); } } @@ -265,6 +266,7 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { + struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; @@ -281,10 +283,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) if (likely(orig_node == peer_node)) return true; - if (!orig_node && (peer_node == tipc_own_addr)) + if (!orig_node && (peer_node == tn->own_addr)) return true; - if (!peer_node && (orig_node == tipc_own_addr)) + if (!peer_node && (orig_node == tn->own_addr)) return true; return false; @@ -346,7 +348,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->max_pkt = MAX_PKT_DEFAULT; INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; - tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + tipc_msg_init(net, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); /* Finish initializing socket data structures */ @@ -471,6 +473,7 @@ static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock *tsk; struct sk_buff *skb; u32 dnode, probing_state; @@ -503,7 +506,8 @@ static int tipc_release(struct socket *sock) tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) + if (tipc_msg_reverse(net, skb, &dnode, + TIPC_ERR_NO_PORT)) tipc_link_xmit_skb(net, skb, dnode, 0); } } @@ -514,9 +518,9 @@ static int tipc_release(struct socket *sock) sock_put(sk); tipc_sk_remove(tsk); if (tsk->connected) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, dnode, tipc_own_addr, - tsk_peer_port(tsk), + skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tn->own_addr, tsk_peer_port(tsk), tsk->portid, TIPC_ERR_NO_PORT); if (skb) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); @@ -614,6 +618,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { @@ -624,7 +629,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.node = tsk_peer_node(tsk); } else { addr->addr.id.ref = tsk->portid; - addr->addr.id.node = tipc_own_addr; + addr->addr.id.node = tn->own_addr; } *uaddr_len = sizeof(*addr); @@ -741,7 +746,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head); + rc = tipc_msg_build(net, mhdr, msg, 0, dsz, mtu, &head); if (unlikely(rc < 0)) return rc; @@ -774,7 +779,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) uint i, last, dst = 0; u32 scope = TIPC_CLUSTER_SCOPE; - if (in_own_node(msg_orignode(msg))) + if (in_own_node(net, msg_orignode(msg))) scope = TIPC_NODE_SCOPE; /* Create destination port list: */ @@ -826,7 +831,7 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) + if (!tipc_msg_reverse(sock_net(&tsk->sk), buf, dnode, TIPC_OK)) return TIPC_OK; msg_set_type(msg, CONN_PROBE_REPLY); return TIPC_FWD_MSG; @@ -959,7 +964,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); + rc = tipc_msg_build(net, mhdr, m, 0, dsz, mtu, &head); if (rc < 0) goto exit; @@ -1074,7 +1079,7 @@ next: mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head); + rc = tipc_msg_build(net, mhdr, m, sent, send, mtu, &head); if (unlikely(rc < 0)) goto exit; do { @@ -1246,6 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) { struct net *net = sock_net(&tsk->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); @@ -1253,8 +1259,9 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!tsk->connected) return; - skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, - tipc_own_addr, peer_port, tsk->portid, TIPC_OK); + skb = tipc_msg_create(net, CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tn->own_addr, peer_port, tsk->portid, + TIPC_OK); if (!skb) return; msg = buf_msg(skb); @@ -1726,6 +1733,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) int rc; u32 onode; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); uint truesize = skb->truesize; rc = filter_rcv(sk, skb); @@ -1736,10 +1744,10 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } - if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc)) + if ((rc < 0) && !tipc_msg_reverse(net, skb, &onode, -rc)) return 0; - tipc_link_xmit_skb(sock_net(sk), skb, onode, 0); + tipc_link_xmit_skb(net, skb, onode, 0); return 0; } @@ -1784,7 +1792,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) if (likely(!rc)) return 0; exit: - if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc)) + if ((rc < 0) && !tipc_msg_reverse(net, skb, &dnode, -rc)) return -EHOSTUNREACH; tipc_link_xmit_skb(net, skb, dnode, 0); @@ -2045,6 +2053,7 @@ static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; u32 dnode; @@ -2067,15 +2076,16 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) + if (tipc_msg_reverse(net, skb, &dnode, + TIPC_CONN_SHUTDOWN)) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tipc_own_addr, + 0, dnode, tn->own_addr, tsk_peer_port(tsk), tsk->portid, TIPC_CONN_SHUTDOWN); tipc_link_xmit_skb(net, skb, dnode, tsk->portid); @@ -2107,6 +2117,8 @@ static void tipc_sk_timeout(unsigned long data) { struct tipc_sock *tsk = (struct tipc_sock *)data; struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb = NULL; u32 peer_port, peer_node; @@ -2120,13 +2132,13 @@ static void tipc_sk_timeout(unsigned long data) if (tsk->probing_state == TIPC_CONN_PROBING) { /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - peer_node, tsk->portid, peer_port, - TIPC_ERR_NO_PORT); + skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, + tn->own_addr, peer_node, tsk->portid, + peer_port, TIPC_ERR_NO_PORT); } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, - 0, peer_node, tipc_own_addr, + skb = tipc_msg_create(net, CONN_MANAGER, CONN_PROBE, INT_H_SIZE, + 0, peer_node, tn->own_addr, peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv)) @@ -2198,14 +2210,16 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, static int tipc_sk_show(struct tipc_sock *tsk, char *buf, int len, int full_id) { + struct net *net = sock_net(&tsk->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; int ret; if (full_id) ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), tsk->portid); + tipc_zone(tn->own_addr), + tipc_cluster(tn->own_addr), + tipc_node(tn->own_addr), tsk->portid); else ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid); @@ -2296,8 +2310,8 @@ void tipc_sk_reinit(struct net *net) rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { spin_lock_bh(&tsk->sk.sk_lock.slock); msg = &tsk->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); + msg_set_prevnode(msg, tn->own_addr); + msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } } @@ -2691,6 +2705,8 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, int err; void *hdr; struct nlattr *attrs; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); @@ -2702,7 +2718,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, goto genlmsg_cancel; if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) goto attr_msg_cancel; if (tsk->connected) { -- cgit v1.2.3 From a62fbccecd62bacb4416fc427239f5b43b25d05e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:11 +0800 Subject: tipc: make subscriber server support net namespace TIPC establishes one subscriber server which allows users to subscribe their interesting name service status. After tipc supports namespace, one dedicated tipc stack instance is created for each namespace, and each instance can be deemed as one independent TIPC node. As a result, subscriber server must be built for each namespace. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9b8470edc783..2cec496ba691 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -388,7 +388,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, * * Returns 0 on success, errno otherwise */ -int tipc_sock_create_local(int type, struct socket **res) +int tipc_sock_create_local(struct net *net, int type, struct socket **res) { int rc; @@ -397,7 +397,7 @@ int tipc_sock_create_local(int type, struct socket **res) pr_err("Failed to create kernel socket\n"); return rc; } - tipc_sk_create(&init_net, *res, 0, 1); + tipc_sk_create(net, *res, 0, 1); return 0; } -- cgit v1.2.3 From 3721e9c7c194f576fbd30926e98e0abb13c641b5 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 13 Jan 2015 17:07:48 +0800 Subject: tipc: remove redundant timer defined in tipc_sock struct Remove the redundant timer defined in tipc_sock structure, instead we can directly reuse the sk_timer defined in sock structure. Signed-off-by: Ying Xue Acked-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2cec496ba691..c9c34a667921 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -69,7 +69,6 @@ * @pub_count: total # of publications port has made during its lifetime * @probing_state: * @probing_intv: - * @timer: * @port: port - interacts with 'sk' and with the rest of the TIPC stack * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request @@ -94,7 +93,6 @@ struct tipc_sock { u32 pub_count; u32 probing_state; unsigned long probing_intv; - struct timer_list timer; uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; @@ -360,7 +358,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -EINVAL; } msg_set_origport(msg, tsk->portid); - setup_timer(&tsk->timer, tipc_sk_timeout, (unsigned long)tsk); + setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -514,7 +512,8 @@ static int tipc_release(struct socket *sock) tipc_sk_withdraw(tsk, 0, NULL); probing_state = tsk->probing_state; - if (del_timer_sync(&tsk->timer) && probing_state != TIPC_CONN_PROBING) + if (del_timer_sync(&sk->sk_timer) && + probing_state != TIPC_CONN_PROBING) sock_put(sk); tipc_sk_remove(tsk); if (tsk->connected) { @@ -1136,7 +1135,8 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, u32 peer_node) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct tipc_msg *msg = &tsk->phdr; msg_set_destnode(msg, peer_node); @@ -1148,8 +1148,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; - if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv)) - sock_hold(&tsk->sk); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); } @@ -2141,8 +2140,7 @@ static void tipc_sk_timeout(unsigned long data) 0, peer_node, tn->own_addr, peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; - if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv)) - sock_hold(sk); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); } bh_unlock_sock(sk); if (skb) -- cgit v1.2.3 From 357c4774b5b08878d980847f496af38869e7aad0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 13 Jan 2015 12:46:41 -0500 Subject: tipc: correctly handle releasing a not fully initialized sock Commit f2f9800d4955 "tipc: make tipc node table aware of net namespace" has added a dereference of sock->sk before making sure it's not NULL, which makes releasing a tipc socket NULL pointer dereference for sockets that are not fully initialized. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/tipc/socket.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c9c34a667921..720fda6cc2e6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -470,8 +470,8 @@ static void tipc_sk_callback(struct rcu_head *head) static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct net *net; + struct tipc_net *tn; struct tipc_sock *tsk; struct sk_buff *skb; u32 dnode, probing_state; @@ -483,6 +483,9 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; + net = sock_net(sk); + tn = net_generic(net, tipc_net_id); + tsk = tipc_sk(sk); lock_sock(sk); -- cgit v1.2.3 From d6e164e3215794f9920af69cd2c6794632773478 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 16 Jan 2015 12:30:40 +0100 Subject: tipc: fix socket list regression in new nl api Commit 07f6c4bc (tipc: convert tipc reference table to use generic rhashtable) introduced a problem with port listing in the new netlink API. It broke the resume functionality resulting in a never ending loop. This was caused by starting with the first hash table every time subsequently never returning an empty skb (terminating). This patch fixes the resume mechanism by keeping a logical reference to the last hash table along with a logical reference to the socket (port) that didn't fit in the previous message. Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 720fda6cc2e6..679a22082fcb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2749,29 +2749,35 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_sock *tsk; const struct bucket_table *tbl; struct rhash_head *pos; - u32 prev_portid = cb->args[0]; - u32 portid = prev_portid; struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); - int i; + u32 tbl_id = cb->args[0]; + u32 prev_portid = cb->args[1]; rcu_read_lock(); tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + for (; tbl_id < tbl->size; tbl_id++) { + rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { spin_lock_bh(&tsk->sk.sk_lock.slock); - portid = tsk->portid; + if (prev_portid && prev_portid != tsk->portid) { + spin_unlock_bh(&tsk->sk.sk_lock.slock); + continue; + } + err = __tipc_nl_add_sk(skb, cb, tsk); + if (err) { + prev_portid = tsk->portid; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + goto out; + } + prev_portid = 0; spin_unlock_bh(&tsk->sk.sk_lock.slock); - if (err) - break; - - prev_portid = portid; } } +out: rcu_read_unlock(); - - cb->args[0] = prev_portid; + cb->args[0] = tbl_id; + cb->args[1] = prev_portid; return skb->len; } -- cgit v1.2.3 From f25dcc7687d42a72de18aa41b04990a24c9e77c7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 15:52:29 -0500 Subject: tipc: tipc ->sendmsg() conversion This one needs to copy the same data from user potentially more than once. Sadly, MTU changes can trigger that ;-/ Cc: Jon Maloy Signed-off-by: Al Viro --- net/tipc/socket.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 679a22082fcb..caa4d663fd90 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -733,6 +733,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; struct sk_buff_head head; + struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -758,8 +759,10 @@ new_mtu: rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + msg->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tipc_sk(sk)->link_cong = 1; @@ -895,6 +898,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff_head head; struct sk_buff *skb; struct tipc_name_seq *seq = &dest->addr.nameseq; + struct iov_iter save; u32 mtu; long timeo; int rc; @@ -963,6 +967,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + save = m->msg_iter; new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); __skb_queue_head_init(&head); @@ -980,8 +985,10 @@ new_mtu: rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + m->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tsk->link_cong = 1; @@ -1052,6 +1059,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, long timeo; u32 dnode; uint mtu, send, sent = 0; + struct iov_iter save; /* Handle implied connection establishment */ if (unlikely(dest)) { @@ -1078,6 +1086,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, dnode = tsk_peer_node(tsk); next: + save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); __skb_queue_head_init(&head); @@ -1097,6 +1106,7 @@ next: if (rc == -EMSGSIZE) { tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); + m->msg_iter = save; goto next; } if (rc != -ELINKCONG) -- cgit v1.2.3 From c5898636c440da91d58f10beac00f073e68378df Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:36 -0500 Subject: tipc: reduce usage of context info in socket and link The most common usage of namespace information is when we fetch the own node addess from the net structure. This leads to a lot of passing around of a parameter of type 'struct net *' between functions just to make them able to obtain this address. However, in many cases this is unnecessary. The own node address is readily available as a member of both struct tipc_sock and tipc_link, and can be fetched from there instead. The fact that the vast majority of functions in socket.c and link.c anyway are maintaining a pointer to their respective base structures makes this option even more compelling. In this commit, we introduce the inline functions tsk_own_node() and link_own_node() to make it easy for functions to fetch the node address from those structs instead of having to pass along and dereference the namespace struct. In particular, we make calls to the msg_xx() functions in msg.{h,c} context independent by directly passing them the own node address as parameter when needed. Those functions should be regarded as leaves in the code dependency tree, and it is hence desirable to keep them namspace unaware. Apart from a potential positive effect on cache behavior, these changes make it easier to introduce the changes that will follow later in this series. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 67 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index caa4d663fd90..b384e658dfeb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -177,6 +177,11 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { * - port reference */ +static u32 tsk_own_node(struct tipc_sock *tsk) +{ + return msg_prevnode(&tsk->phdr); +} + static u32 tsk_peer_node(struct tipc_sock *tsk) { return msg_destnode(&tsk->phdr); @@ -249,11 +254,11 @@ static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; u32 dnode; - struct net *net = sock_net(sk); + u32 own_node = tsk_own_node(tipc_sk(sk)); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(net, skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(net, skb, dnode, 0); + if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -305,6 +310,7 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int kern) { + struct tipc_net *tn; const struct proto_ops *ops; socket_state state; struct sock *sk; @@ -346,7 +352,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->max_pkt = MAX_PKT_DEFAULT; INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; - tipc_msg_init(net, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + tn = net_generic(sock_net(sk), tipc_net_id); + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); /* Finish initializing socket data structures */ @@ -471,7 +478,6 @@ static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; struct net *net; - struct tipc_net *tn; struct tipc_sock *tsk; struct sk_buff *skb; u32 dnode, probing_state; @@ -484,8 +490,6 @@ static int tipc_release(struct socket *sock) return 0; net = sock_net(sk); - tn = net_generic(net, tipc_net_id); - tsk = tipc_sk(sk); lock_sock(sk); @@ -507,7 +511,7 @@ static int tipc_release(struct socket *sock) tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(net, skb, &dnode, + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, TIPC_ERR_NO_PORT)) tipc_link_xmit_skb(net, skb, dnode, 0); } @@ -520,9 +524,9 @@ static int tipc_release(struct socket *sock) sock_put(sk); tipc_sk_remove(tsk); if (tsk->connected) { - skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, - tn->own_addr, tsk_peer_port(tsk), + tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, TIPC_ERR_NO_PORT); if (skb) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); @@ -730,8 +734,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct msghdr *msg, size_t dsz, long timeo) { struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); - struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; + struct tipc_msg *mhdr = &tsk->phdr; struct sk_buff_head head; struct iov_iter save = msg->msg_iter; uint mtu; @@ -749,7 +754,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); __skb_queue_head_init(&head); - rc = tipc_msg_build(net, mhdr, msg, 0, dsz, mtu, &head); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head); if (unlikely(rc < 0)) return rc; @@ -836,7 +841,7 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (!tipc_msg_reverse(sock_net(&tsk->sk), buf, dnode, TIPC_OK)) + if (!tipc_msg_reverse(tsk_own_node(tsk), buf, dnode, TIPC_OK)) return TIPC_OK; msg_set_type(msg, CONN_PROBE_REPLY); return TIPC_FWD_MSG; @@ -971,7 +976,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); __skb_queue_head_init(&head); - rc = tipc_msg_build(net, mhdr, m, 0, dsz, mtu, &head); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); if (rc < 0) goto exit; @@ -1090,7 +1095,7 @@ next: mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); __skb_queue_head_init(&head); - rc = tipc_msg_build(net, mhdr, m, sent, send, mtu, &head); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head); if (unlikely(rc < 0)) goto exit; do { @@ -1263,7 +1268,6 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) { struct net *net = sock_net(&tsk->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); @@ -1271,9 +1275,9 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!tsk->connected) return; - skb = tipc_msg_create(net, CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, - dnode, tn->own_addr, peer_port, tsk->portid, - TIPC_OK); + skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tsk_own_node(tsk), peer_port, + tsk->portid, TIPC_OK); if (!skb) return; msg = buf_msg(skb); @@ -1756,7 +1760,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } - if ((rc < 0) && !tipc_msg_reverse(net, skb, &onode, -rc)) + if ((rc < 0) && !tipc_msg_reverse(tsk_own_node(tsk), skb, &onode, -rc)) return 0; tipc_link_xmit_skb(net, skb, onode, 0); @@ -1773,6 +1777,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) int tipc_sk_rcv(struct net *net, struct sk_buff *skb) { struct tipc_sock *tsk; + struct tipc_net *tn; struct sock *sk; u32 dport = msg_destport(buf_msg(skb)); int rc = TIPC_OK; @@ -1804,7 +1809,8 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) if (likely(!rc)) return 0; exit: - if ((rc < 0) && !tipc_msg_reverse(net, skb, &dnode, -rc)) + tn = net_generic(net, tipc_net_id); + if ((rc < 0) && !tipc_msg_reverse(tn->own_addr, skb, &dnode, -rc)) return -EHOSTUNREACH; tipc_link_xmit_skb(net, skb, dnode, 0); @@ -2065,7 +2071,6 @@ static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; u32 dnode; @@ -2088,16 +2093,17 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(net, skb, &dnode, + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, TIPC_CONN_SHUTDOWN)) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); - skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tn->own_addr, + 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, TIPC_CONN_SHUTDOWN); tipc_link_xmit_skb(net, skb, dnode, tsk->portid); @@ -2129,10 +2135,9 @@ static void tipc_sk_timeout(unsigned long data) { struct tipc_sock *tsk = (struct tipc_sock *)data; struct sock *sk = &tsk->sk; - struct net *net = sock_net(sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb = NULL; u32 peer_port, peer_node; + u32 own_node = tsk_own_node(tsk); bh_lock_sock(sk); if (!tsk->connected) { @@ -2144,13 +2149,13 @@ static void tipc_sk_timeout(unsigned long data) if (tsk->probing_state == TIPC_CONN_PROBING) { /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE, + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, - tn->own_addr, peer_node, tsk->portid, + own_node, peer_node, tsk->portid, peer_port, TIPC_ERR_NO_PORT); } else { - skb = tipc_msg_create(net, CONN_MANAGER, CONN_PROBE, INT_H_SIZE, - 0, peer_node, tn->own_addr, + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); -- cgit v1.2.3 From 1186adf7df04e3b4298943fe89d9741ab42e30ff Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:37 -0500 Subject: tipc: simplify message forwarding and rejection in socket layer Despite recent improvements, the handling of error codes and return values at reception of messages in the socket layer is still confusing. In this commit, we try to make it more comprehensible. First, we separate between the return values coming from the functions called by tipc_sk_rcv(), -those are TIPC specific error codes, and the return values returned by tipc_sk_rcv() itself. Second, we don't use the returned TIPC error code as indication for whether a buffer should be forwarded/rejected or not; instead we use the buffer pointer passed along with filter_msg(). This separation is necessary because we sometimes want to forward messages even when there is no error (i.e., protocol messages and successfully secondary looked up data messages). Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 120 ++++++++++++++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 62 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b384e658dfeb..f9cd587e4090 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -818,17 +818,14 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) /** * tipc_sk_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket - * @dnode: node to send response message to, if any - * @buf: buffer containing protocol message - * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if - * (CONN_PROBE_REPLY) message should be forwarded. + * @skb: pointer to message buffer. Set to NULL if buffer is consumed. */ -static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, - struct sk_buff *buf) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(*skb); int conn_cong; - + u32 dnode; + u32 own_node = tsk_own_node(tsk); /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, msg)) goto exit; @@ -841,15 +838,15 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (!tipc_msg_reverse(tsk_own_node(tsk), buf, dnode, TIPC_OK)) - return TIPC_OK; - msg_set_type(msg, CONN_PROBE_REPLY); - return TIPC_FWD_MSG; + if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) { + msg_set_type(msg, CONN_PROBE_REPLY); + return; + } } /* Do nothing if msg_type() == CONN_PROBE_REPLY */ exit: - kfree_skb(buf); - return TIPC_OK; + kfree_skb(*skb); + *skb = NULL; } static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) @@ -1568,16 +1565,16 @@ static void tipc_data_ready(struct sock *sk) /** * filter_connect - Handle all incoming messages for a connection-based socket * @tsk: TIPC socket - * @msg: message + * @skb: pointer to message buffer. Set to NULL if buffer is consumed * * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise */ -static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; - struct tipc_msg *msg = buf_msg(*buf); + struct tipc_msg *msg = buf_msg(*skb); int retval = -TIPC_ERR_NO_PORT; if (msg_mcast(msg)) @@ -1627,8 +1624,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) * connect() routine if sleeping. */ if (msg_data_sz(msg) == 0) { - kfree_skb(*buf); - *buf = NULL; + kfree_skb(*skb); + *skb = NULL; if (waitqueue_active(sk_sleep(sk))) wake_up_interruptible(sk_sleep(sk)); } @@ -1680,32 +1677,33 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) /** * filter_rcv - validate incoming message * @sk: socket - * @buf: message + * @skb: pointer to message. Set to NULL if buffer is consumed. * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * - * Called with socket lock already taken; port lock may also be taken. + * Called with socket lock already taken * - * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message - * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded + * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected */ -static int filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff **skb) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_msg *msg = buf_msg(buf); - unsigned int limit = rcvbuf_limit(sk, buf); - u32 onode; + struct tipc_msg *msg = buf_msg(*skb); + unsigned int limit = rcvbuf_limit(sk, *skb); int rc = TIPC_OK; - if (unlikely(msg_user(msg) == CONN_MANAGER)) - return tipc_sk_proto_rcv(tsk, &onode, buf); + if (unlikely(msg_user(msg) == CONN_MANAGER)) { + tipc_sk_proto_rcv(tsk, skb); + return TIPC_OK; + } if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { - kfree_skb(buf); + kfree_skb(*skb); tsk->link_cong = 0; sk->sk_write_space(sk); + *skb = NULL; return TIPC_OK; } @@ -1717,21 +1715,22 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) if (msg_connected(msg)) return -TIPC_ERR_NO_PORT; } else { - rc = filter_connect(tsk, &buf); - if (rc != TIPC_OK || buf == NULL) + rc = filter_connect(tsk, skb); + if (rc != TIPC_OK || !*skb) return rc; } /* Reject message if there isn't room to queue it */ - if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) + if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit) return -TIPC_ERR_OVERLOAD; /* Enqueue message */ - TIPC_SKB_CB(buf)->handle = NULL; - __skb_queue_tail(&sk->sk_receive_queue, buf); - skb_set_owner_r(buf, sk); + TIPC_SKB_CB(*skb)->handle = NULL; + __skb_queue_tail(&sk->sk_receive_queue, *skb); + skb_set_owner_r(*skb, sk); sk->sk_data_ready(sk); + *skb = NULL; return TIPC_OK; } @@ -1746,25 +1745,22 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - int rc; - u32 onode; + int err; + atomic_t *dcnt; + u32 dnode; struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); uint truesize = skb->truesize; - rc = filter_rcv(sk, skb); - - if (likely(!rc)) { - if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, &tsk->dupl_rcvcnt); + err = filter_rcv(sk, &skb); + if (likely(!skb)) { + dcnt = &tsk->dupl_rcvcnt; + if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, dcnt); return 0; } - - if ((rc < 0) && !tipc_msg_reverse(tsk_own_node(tsk), skb, &onode, -rc)) - return 0; - - tipc_link_xmit_skb(net, skb, onode, 0); - + if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1780,14 +1776,14 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) struct tipc_net *tn; struct sock *sk; u32 dport = msg_destport(buf_msg(skb)); - int rc = TIPC_OK; + int err = TIPC_OK; uint limit; u32 dnode; /* Validate destination and message */ tsk = tipc_sk_lookup(net, dport); if (unlikely(!tsk)) { - rc = tipc_msg_eval(net, skb, &dnode); + err = tipc_msg_eval(net, skb, &dnode); goto exit; } sk = &tsk->sk; @@ -1796,25 +1792,25 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) spin_lock_bh(&sk->sk_lock.slock); if (!sock_owned_by_user(sk)) { - rc = filter_rcv(sk, skb); + err = filter_rcv(sk, &skb); } else { if (sk->sk_backlog.len == 0) atomic_set(&tsk->dupl_rcvcnt, 0); limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt); - if (sk_add_backlog(sk, skb, limit)) - rc = -TIPC_ERR_OVERLOAD; + if (likely(!sk_add_backlog(sk, skb, limit))) + skb = NULL; + else + err = -TIPC_ERR_OVERLOAD; } spin_unlock_bh(&sk->sk_lock.slock); sock_put(sk); - if (likely(!rc)) - return 0; exit: - tn = net_generic(net, tipc_net_id); - if ((rc < 0) && !tipc_msg_reverse(tn->own_addr, skb, &dnode, -rc)) - return -EHOSTUNREACH; - - tipc_link_xmit_skb(net, skb, dnode, 0); - return (rc < 0) ? -EHOSTUNREACH : 0; + if (unlikely(skb)) { + tn = net_generic(net, tipc_net_id); + if (!err || tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + tipc_link_xmit_skb(net, skb, dnode, 0); + } + return err ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) -- cgit v1.2.3 From d570d86497eeb11410b1c096d82ade11bcdd966c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:38 -0500 Subject: tipc: enqueue arrived buffers in socket in separate function The code for enqueuing arriving buffers in the function tipc_sk_rcv() contains long code lines and currently goes to two indentation levels. As a cosmetic preparaton for the next commits, we break it out into a separate function. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9cd587e4090..1d98bfcda6f6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1764,6 +1764,35 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } +/** + * tipc_sk_enqueue_skb - enqueue buffer to socket or backlog queue + * @sk: socket + * @skb: pointer to message. Set to NULL if buffer is consumed. + * @dnode: if buffer should be forwarded/returned, send to this node + * + * Caller must hold socket lock + * + * Returns TIPC_OK (0) or -tipc error code + */ +static int tipc_sk_enqueue_skb(struct sock *sk, struct sk_buff **skb) +{ + unsigned int lim; + atomic_t *dcnt; + + if (unlikely(!*skb)) + return TIPC_OK; + if (!sock_owned_by_user(sk)) + return filter_rcv(sk, skb); + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, *skb) + atomic_read(dcnt); + if (unlikely(sk_add_backlog(sk, *skb, lim))) + return -TIPC_ERR_OVERLOAD; + *skb = NULL; + return TIPC_OK; +} + /** * tipc_sk_rcv - handle incoming message * @skb: buffer containing arriving message @@ -1776,8 +1805,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) struct tipc_net *tn; struct sock *sk; u32 dport = msg_destport(buf_msg(skb)); - int err = TIPC_OK; - uint limit; + int err; u32 dnode; /* Validate destination and message */ @@ -1788,20 +1816,8 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) } sk = &tsk->sk; - /* Queue message */ spin_lock_bh(&sk->sk_lock.slock); - - if (!sock_owned_by_user(sk)) { - err = filter_rcv(sk, &skb); - } else { - if (sk->sk_backlog.len == 0) - atomic_set(&tsk->dupl_rcvcnt, 0); - limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt); - if (likely(!sk_add_backlog(sk, skb, limit))) - skb = NULL; - else - err = -TIPC_ERR_OVERLOAD; - } + err = tipc_sk_enqueue_skb(sk, &skb); spin_unlock_bh(&sk->sk_lock.slock); sock_put(sk); exit: -- cgit v1.2.3 From e3a77561e7d326e18881ef3cb84807892b353459 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:39 -0500 Subject: tipc: split up function tipc_msg_eval() The function tipc_msg_eval() is in reality doing two related, but different tasks. First it tries to find a new destination for named messages, in case there was no first lookup, or if the first lookup failed. Second, it does what its name suggests, evaluating the validity of the message and its destination, and returning an appropriate error code depending on the result. This is confusing, and in this commit we choose to break it up into two functions. A new function, tipc_msg_lookup_dest(), first attempts to find a new destination, if the message is of the right type. If this lookup fails, or if the message should not be subject to a second lookup, the already existing tipc_msg_reverse() is called. This function performs prepares the message for rejection, if applicable. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1d98bfcda6f6..e14b2aedb212 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1739,7 +1739,7 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb) * @sk: socket * @skb: message * - * Caller must hold socket lock, but not port lock. + * Caller must hold socket lock * * Returns 0 */ @@ -1805,27 +1805,31 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb) struct tipc_net *tn; struct sock *sk; u32 dport = msg_destport(buf_msg(skb)); - int err; + int err = -TIPC_ERR_NO_PORT; u32 dnode; - /* Validate destination and message */ + /* Find destination */ tsk = tipc_sk_lookup(net, dport); - if (unlikely(!tsk)) { - err = tipc_msg_eval(net, skb, &dnode); - goto exit; - } - sk = &tsk->sk; - - spin_lock_bh(&sk->sk_lock.slock); - err = tipc_sk_enqueue_skb(sk, &skb); - spin_unlock_bh(&sk->sk_lock.slock); - sock_put(sk); -exit: - if (unlikely(skb)) { - tn = net_generic(net, tipc_net_id); - if (!err || tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) - tipc_link_xmit_skb(net, skb, dnode, 0); + if (likely(tsk)) { + sk = &tsk->sk; + spin_lock_bh(&sk->sk_lock.slock); + err = tipc_sk_enqueue_skb(sk, &skb); + spin_unlock_bh(&sk->sk_lock.slock); + sock_put(sk); } + if (likely(!skb)) + return 0; + if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) + goto xmit; + if (!err) { + dnode = msg_destnode(buf_msg(skb)); + goto xmit; + } + tn = net_generic(net, tipc_net_id); + if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + return -EHOSTUNREACH; +xmit: + tipc_link_xmit_skb(net, skb, dnode, dport); return err ? -EHOSTUNREACH : 0; } -- cgit v1.2.3 From 94153e36e709e78fc4e1f93dc4e4da785690c7d1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:40 -0500 Subject: tipc: use existing sk_write_queue for outgoing packet chain The list for outgoing traffic buffers from a socket is currently allocated on the stack. This forces us to initialize the queue for each sent message, something costing extra CPU cycles in the most critical data path. Later in this series we will introduce a new safe input buffer queue, something that would force us to initialize even the spinlock of the outgoing queue. A closer analysis reveals that the queue always is filled and emptied within the same lock_sock() session. It is therefore safe to use a queue aggregated in the socket itself for this purpose. Since there already exists a queue for this in struct sock, sk_write_queue, we introduce use of that queue in this commit. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e14b2aedb212..611a04fb0ddc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -69,8 +69,6 @@ * @pub_count: total # of publications port has made during its lifetime * @probing_state: * @probing_intv: - * @port: port - interacts with 'sk' and with the rest of the TIPC stack - * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion @@ -737,7 +735,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -753,13 +751,12 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bclink_xmit(net, &head); + rc = tipc_bclink_xmit(net, pktchain); if (likely(rc >= 0)) { rc = dsz; break; @@ -773,7 +770,7 @@ new_mtu: tipc_sk(sk)->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); return rc; } @@ -897,7 +894,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff *skb; struct tipc_name_seq *seq = &dest->addr.nameseq; struct iov_iter save; @@ -972,15 +969,14 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, save = m->msg_iter; new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); if (rc < 0) goto exit; do { - skb = skb_peek(&head); + skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(net, &head, dnode, tsk->portid); + rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -996,7 +992,7 @@ new_mtu: tsk->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); exit: if (iocb) @@ -1054,7 +1050,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); u32 portid = tsk->portid; int rc = -EINVAL; @@ -1091,13 +1087,12 @@ next: save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); if (unlikely(rc < 0)) goto exit; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(net, &head, dnode, portid); + rc = tipc_link_xmit(net, pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1117,7 +1112,7 @@ next: } rc = tipc_wait_for_sndpkt(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); exit: if (iocb) -- cgit v1.2.3 From c637c1035534867b85b78b453c38c495b58e2c5a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:41 -0500 Subject: tipc: resolve race problem at unicast message reception TIPC handles message cardinality and sequencing at the link layer, before passing messages upwards to the destination sockets. During the upcall from link to socket no locks are held. It is therefore possible, and we see it happen occasionally, that messages arriving in different threads and delivered in sequence still bypass each other before they reach the destination socket. This must not happen, since it violates the sequentiality guarantee. We solve this by adding a new input buffer queue to the link structure. Arriving messages are added safely to the tail of that queue by the link, while the head of the queue is consumed, also safely, by the receiving socket. Sequentiality is secured per socket by only allowing buffers to be dequeued inside the socket lock. Since there may be multiple simultaneous readers of the queue, we use a 'filter' parameter to reduce the risk that they peek the same buffer from the queue, hence also reducing the risk of contention on the receiving socket locks. This solves the sequentiality problem, and seems to cause no measurable performance degradation. A nice side effect of this change is that lock handling in the functions tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that will enable future simplifications of those functions. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 132 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 47 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 611a04fb0ddc..c1a4611649ab 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -41,6 +41,7 @@ #include "node.h" #include "link.h" #include "config.h" +#include "name_distr.h" #include "socket.h" #define SS_LISTENING -1 /* socket is listening */ @@ -785,10 +786,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) struct sk_buff *b; uint i, last, dst = 0; u32 scope = TIPC_CLUSTER_SCOPE; + struct sk_buff_head msgs; if (in_own_node(net, msg_orignode(msg))) scope = TIPC_NODE_SCOPE; + if (unlikely(!msg_mcast(msg))) { + pr_warn("Received non-multicast msg in multicast\n"); + kfree_skb(buf); + goto exit; + } /* Create destination port list: */ tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg), msg_nameupper(msg), scope, &dports); @@ -806,9 +813,12 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) continue; } msg_set_destport(msg, item->ports[i]); - tipc_sk_rcv(net, b); + skb_queue_head_init(&msgs); + skb_queue_tail(&msgs, b); + tipc_sk_rcv(net, &msgs); } } +exit: tipc_port_list_free(&dports); } @@ -1760,71 +1770,99 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) } /** - * tipc_sk_enqueue_skb - enqueue buffer to socket or backlog queue - * @sk: socket - * @skb: pointer to message. Set to NULL if buffer is consumed. - * @dnode: if buffer should be forwarded/returned, send to this node + * tipc_sk_enqueue - extract all buffers with destination 'dport' from + * inputq and try adding them to socket or backlog queue + * @inputq: list of incoming buffers with potentially different destinations + * @sk: socket where the buffers should be enqueued + * @dport: port number for the socket + * @_skb: returned buffer to be forwarded or rejected, if applicable * * Caller must hold socket lock * - * Returns TIPC_OK (0) or -tipc error code + * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD + * or -TIPC_ERR_NO_PORT */ -static int tipc_sk_enqueue_skb(struct sock *sk, struct sk_buff **skb) +static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport, struct sk_buff **_skb) { unsigned int lim; atomic_t *dcnt; - - if (unlikely(!*skb)) - return TIPC_OK; - if (!sock_owned_by_user(sk)) - return filter_rcv(sk, skb); - dcnt = &tipc_sk(sk)->dupl_rcvcnt; - if (sk->sk_backlog.len) - atomic_set(dcnt, 0); - lim = rcvbuf_limit(sk, *skb) + atomic_read(dcnt); - if (unlikely(sk_add_backlog(sk, *skb, lim))) + int err; + struct sk_buff *skb; + unsigned long time_limit = jiffies + 2; + + while (skb_queue_len(inputq)) { + skb = tipc_skb_dequeue(inputq, dport); + if (unlikely(!skb)) + return TIPC_OK; + /* Return if softirq window exhausted */ + if (unlikely(time_after_eq(jiffies, time_limit))) + return TIPC_OK; + if (!sock_owned_by_user(sk)) { + err = filter_rcv(sk, &skb); + if (likely(!skb)) + continue; + *_skb = skb; + return err; + } + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + if (likely(!sk_add_backlog(sk, skb, lim))) + continue; + *_skb = skb; return -TIPC_ERR_OVERLOAD; - *skb = NULL; + } return TIPC_OK; } /** - * tipc_sk_rcv - handle incoming message - * @skb: buffer containing arriving message - * Consumes buffer - * Returns 0 if success, or errno: -EHOSTUNREACH + * tipc_sk_rcv - handle a chain of incoming buffers + * @inputq: buffer list containing the buffers + * Consumes all buffers in list until inputq is empty + * Note: may be called in multiple threads referring to the same queue + * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH + * Only node local calls check the return value, sending single-buffer queues */ -int tipc_sk_rcv(struct net *net, struct sk_buff *skb) +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + u32 dnode, dport = 0; + int err = -TIPC_ERR_NO_PORT; + struct sk_buff *skb; struct tipc_sock *tsk; struct tipc_net *tn; struct sock *sk; - u32 dport = msg_destport(buf_msg(skb)); - int err = -TIPC_ERR_NO_PORT; - u32 dnode; - /* Find destination */ - tsk = tipc_sk_lookup(net, dport); - if (likely(tsk)) { - sk = &tsk->sk; - spin_lock_bh(&sk->sk_lock.slock); - err = tipc_sk_enqueue_skb(sk, &skb); - spin_unlock_bh(&sk->sk_lock.slock); - sock_put(sk); - } - if (likely(!skb)) - return 0; - if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) - goto xmit; - if (!err) { - dnode = msg_destnode(buf_msg(skb)); - goto xmit; - } - tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) - return -EHOSTUNREACH; + while (skb_queue_len(inputq)) { + skb = NULL; + dport = tipc_skb_peek_port(inputq, dport); + tsk = tipc_sk_lookup(net, dport); + if (likely(tsk)) { + sk = &tsk->sk; + if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { + err = tipc_sk_enqueue(inputq, sk, dport, &skb); + spin_unlock_bh(&sk->sk_lock.slock); + dport = 0; + } + sock_put(sk); + } else { + skb = tipc_skb_dequeue(inputq, dport); + } + if (likely(!skb)) + continue; + if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) + goto xmit; + if (!err) { + dnode = msg_destnode(buf_msg(skb)); + goto xmit; + } + tn = net_generic(net, tipc_net_id); + if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + continue; xmit: - tipc_link_xmit_skb(net, skb, dnode, dport); + tipc_link_xmit_skb(net, skb, dnode, dport); + } return err ? -EHOSTUNREACH : 0; } -- cgit v1.2.3 From 3c724acdd5049907555a831f814bfd5927c3350c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:43 -0500 Subject: tipc: simplify socket multicast reception The structure 'tipc_port_list' is used to collect port numbers representing multicast destination socket on a receiving node. The list is not based on a standard linked list, and is in reality optimized for the uncommon case that there are more than one multicast destinations per node. This makes the list handling unecessarily complex, and as a consequence, even the socket multicast reception becomes more complex. In this commit, we replace 'tipc_port_list' with a new 'struct tipc_plist', which is based on a standard list. We give the new list stack (push/pop) semantics, someting that simplifies the implementation of the function tipc_sk_mcast_rcv(). Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c1a4611649ab..26aec8414ac1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2014, Ericsson AB + * Copyright (c) 2001-2007, 2012-2015, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -778,48 +778,42 @@ new_mtu: /* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets */ -void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf) +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port_list *item; - struct sk_buff *b; - uint i, last, dst = 0; + struct tipc_msg *msg = buf_msg(skb); + struct tipc_plist dports; + struct sk_buff *cskb; + u32 portid; u32 scope = TIPC_CLUSTER_SCOPE; - struct sk_buff_head msgs; + struct sk_buff_head msgq; + uint hsz = skb_headroom(skb) + msg_hdr_sz(msg); + + skb_queue_head_init(&msgq); + tipc_plist_init(&dports); if (in_own_node(net, msg_orignode(msg))) scope = TIPC_NODE_SCOPE; if (unlikely(!msg_mcast(msg))) { pr_warn("Received non-multicast msg in multicast\n"); - kfree_skb(buf); goto exit; } /* Create destination port list: */ tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg), msg_nameupper(msg), scope, &dports); - last = dports.count; - if (!last) { - kfree_skb(buf); - return; - } - - for (item = &dports; item; item = item->next) { - for (i = 0; i < PLSIZE && ++dst <= last; i++) { - b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf; - if (!b) { - pr_warn("Failed do clone mcast rcv buffer\n"); - continue; - } - msg_set_destport(msg, item->ports[i]); - skb_queue_head_init(&msgs); - skb_queue_tail(&msgs, b); - tipc_sk_rcv(net, &msgs); + portid = tipc_plist_pop(&dports); + for (; portid; portid = tipc_plist_pop(&dports)) { + cskb = __pskb_copy(skb, hsz, GFP_ATOMIC); + if (!cskb) { + pr_warn("Failed do clone mcast rcv buffer\n"); + continue; } + msg_set_destport(buf_msg(cskb), portid); + skb_queue_tail(&msgq, cskb); } + tipc_sk_rcv(net, &msgq); exit: - tipc_port_list_free(&dports); + kfree_skb(skb); } /** -- cgit v1.2.3 From cb1b728096f54e7408d60fb571944bed00c5b771 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:44 -0500 Subject: tipc: eliminate race condition at multicast reception In a previous commit in this series we resolved a race problem during unicast message reception. Here, we resolve the same problem at multicast reception. We apply the same technique: an input queue serializing the delivery of arriving buffers. The main difference is that here we do it in two steps. First, the broadcast link feeds arriving buffers into the tail of an arrival queue, which head is consumed at the socket level, and where destination lookup is performed. Second, if the lookup is successful, the resulting buffer clones are fed into a second queue, the input queue. This queue is consumed at reception in the socket just like in the unicast case. Both queues are protected by the same lock, -the one of the input queue. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 72 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 28 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 26aec8414ac1..66666805b53c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -776,44 +776,60 @@ new_mtu: return rc; } -/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets +/** + * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @arrvq: queue with arriving messages, to be cloned after destination lookup + * @inputq: queue with cloned messages, delivered to socket after dest lookup + * + * Multi-threaded: parallel calls with reference to same queues may occur */ -void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *skb) +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq) { - struct tipc_msg *msg = buf_msg(skb); + struct tipc_msg *msg; struct tipc_plist dports; - struct sk_buff *cskb; u32 portid; u32 scope = TIPC_CLUSTER_SCOPE; - struct sk_buff_head msgq; - uint hsz = skb_headroom(skb) + msg_hdr_sz(msg); + struct sk_buff_head tmpq; + uint hsz; + struct sk_buff *skb, *_skb; - skb_queue_head_init(&msgq); + __skb_queue_head_init(&tmpq); tipc_plist_init(&dports); - if (in_own_node(net, msg_orignode(msg))) - scope = TIPC_NODE_SCOPE; - - if (unlikely(!msg_mcast(msg))) { - pr_warn("Received non-multicast msg in multicast\n"); - goto exit; - } - /* Create destination port list: */ - tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg), - msg_nameupper(msg), scope, &dports); - portid = tipc_plist_pop(&dports); - for (; portid; portid = tipc_plist_pop(&dports)) { - cskb = __pskb_copy(skb, hsz, GFP_ATOMIC); - if (!cskb) { - pr_warn("Failed do clone mcast rcv buffer\n"); - continue; + skb = tipc_skb_peek(arrvq, &inputq->lock); + for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { + msg = buf_msg(skb); + hsz = skb_headroom(skb) + msg_hdr_sz(msg); + + if (in_own_node(net, msg_orignode(msg))) + scope = TIPC_NODE_SCOPE; + + /* Create destination port list and message clones: */ + tipc_nametbl_mc_translate(net, + msg_nametype(msg), msg_namelower(msg), + msg_nameupper(msg), scope, &dports); + portid = tipc_plist_pop(&dports); + for (; portid; portid = tipc_plist_pop(&dports)) { + _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + if (_skb) { + msg_set_destport(buf_msg(_skb), portid); + __skb_queue_tail(&tmpq, _skb); + continue; + } + pr_warn("Failed to clone mcast rcv buffer\n"); } - msg_set_destport(buf_msg(cskb), portid); - skb_queue_tail(&msgq, cskb); + /* Append to inputq if not already done by other thread */ + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + skb_queue_splice_tail_init(&tmpq, inputq); + kfree_skb(__skb_dequeue(arrvq)); + } + spin_unlock_bh(&inputq->lock); + __skb_queue_purge(&tmpq); + kfree_skb(skb); } - tipc_sk_rcv(net, &msgq); -exit: - kfree_skb(skb); + tipc_sk_rcv(net, inputq); } /** -- cgit v1.2.3 From 51a00daf7369b581e5241c5cae5924886deda261 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Sun, 8 Feb 2015 11:10:50 -0500 Subject: tipc: fix bug in socket reception function In commit c637c1035534867b85b78b453c38c495b58e2c5a ("tipc: resolve race problem at unicast message reception") we introduced a time limit for how long the function tipc_sk_eneque() would be allowed to execute its loop. Unfortunately, the test for when this limit is passed was put in the wrong place, resulting in a lost message when the test is true. We fix this by moving the test to before we dequeue the next buffer from the input queue. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 66666805b53c..4a98d15a1323 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1802,12 +1802,11 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, unsigned long time_limit = jiffies + 2; while (skb_queue_len(inputq)) { + if (unlikely(time_after_eq(jiffies, time_limit))) + return TIPC_OK; skb = tipc_skb_dequeue(inputq, dport); if (unlikely(!skb)) return TIPC_OK; - /* Return if softirq window exhausted */ - if (unlikely(time_after_eq(jiffies, time_limit))) - return TIPC_OK; if (!sock_owned_by_user(sk)) { err = filter_rcv(sk, &skb); if (likely(!skb)) -- cgit v1.2.3 From bfb3e5dd8dfd84dfd13649393abab63e43267b00 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 9 Feb 2015 09:50:03 +0100 Subject: tipc: move and rename the legacy nl api to "nl compat" The new netlink API is no longer "v2" but rather the standard API and the legacy API is now "nl compat". We split them into separate start/stop and put them in different files in order to further distinguish them. Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4a98d15a1323..d76c171f7b7e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2783,7 +2783,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); if (!hdr) goto msg_cancel; @@ -2864,7 +2864,7 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, struct nlattr *attrs; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); if (!hdr) goto msg_cancel; -- cgit v1.2.3 From 487d2a3a1326d339ce273ffbcd03247f2b7b052e Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 9 Feb 2015 09:50:11 +0100 Subject: tipc: convert legacy nl socket dump to nl compat Convert socket (port) listing to compat dumpit call. If a socket (port) has publications a second dumpit call is issued to collect them and format then into the legacy buffer before continuing to process the sockets (ports). Command converted in this patch: TIPC_CMD_SHOW_PORTS Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 85 ------------------------------------------------------- 1 file changed, 85 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d76c171f7b7e..e77d738bb771 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2281,91 +2281,6 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, return rc; } -static int tipc_sk_show(struct tipc_sock *tsk, char *buf, - int len, int full_id) -{ - struct net *net = sock_net(&tsk->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct publication *publ; - int ret; - - if (full_id) - ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tn->own_addr), - tipc_cluster(tn->own_addr), - tipc_node(tn->own_addr), tsk->portid); - else - ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid); - - if (tsk->connected) { - u32 dport = tsk_peer_port(tsk); - u32 destnode = tsk_peer_node(tsk); - - ret += tipc_snprintf(buf + ret, len - ret, - " connected to <%u.%u.%u:%u>", - tipc_zone(destnode), - tipc_cluster(destnode), - tipc_node(destnode), dport); - if (tsk->conn_type != 0) - ret += tipc_snprintf(buf + ret, len - ret, - " via {%u,%u}", tsk->conn_type, - tsk->conn_instance); - } else if (tsk->published) { - ret += tipc_snprintf(buf + ret, len - ret, " bound to"); - list_for_each_entry(publ, &tsk->publications, pport_list) { - if (publ->lower == publ->upper) - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u}", publ->type, - publ->lower); - else - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u,%u}", publ->type, - publ->lower, publ->upper); - } - } - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -struct sk_buff *tipc_sk_socks_show(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - const struct bucket_table *tbl; - struct rhash_head *pos; - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - struct tipc_sock *tsk; - int str_len = 0; - int i; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - rcu_read_lock(); - tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { - spin_lock_bh(&tsk->sk.sk_lock.slock); - str_len += tipc_sk_show(tsk, pb + str_len, - pb_len - str_len, 0); - spin_unlock_bh(&tsk->sk.sk_lock.slock); - } - } - rcu_read_unlock(); - - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - /* tipc_sk_reinit: set non-zero address in all existing sockets * when we go from standalone to network mode. */ -- cgit v1.2.3 From 22ae7cff509f3bb22caaa0003f67eeb93d338fed Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 9 Feb 2015 09:50:18 +0100 Subject: tipc: nl compat add noop and remove legacy nl framework Add TIPC_CMD_NOOP to compat layer and remove the old framework. All legacy nl commands are now converted to the compat layer in netlink_compat.c. Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e77d738bb771..f73e975af80b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -40,7 +40,6 @@ #include "name_table.h" #include "node.h" #include "link.h" -#include "config.h" #include "name_distr.h" #include "socket.h" -- cgit v1.2.3