summaryrefslogtreecommitdiff
path: root/net/tipc/socket.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 09:00:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 09:00:47 -0700
commit6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch)
tree74d1ec65087df1da1021b43ac51acc1ee8601809 /net/tipc/socket.c
parentbb0fd7ab0986105765d11baa82e619c618a235aa (diff)
parent9f9151412dd7aae0e3f51a89ae4a1f8755fdb4d0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add BQL support to via-rhine, from Tino Reichardt. 2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers can support hw switch offloading. From Floria Fainelli. 3) Allow 'ip address' commands to initiate multicast group join/leave, from Madhu Challa. 4) Many ipv4 FIB lookup optimizations from Alexander Duyck. 5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel Borkmann. 6) Remove the ugly compat support in ARP for ugly layers like ax25, rose, etc. And use this to clean up the neigh layer, then use it to implement MPLS support. All from Eric Biederman. 7) Support L3 forwarding offloading in switches, from Scott Feldman. 8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed up route lookups even further. From Alexander Duyck. 9) Many improvements and bug fixes to the rhashtable implementation, from Herbert Xu and Thomas Graf. In particular, in the case where an rhashtable user bulk adds a large number of items into an empty table, we expand the table much more sanely. 10) Don't make the tcp_metrics hash table per-namespace, from Eric Biederman. 11) Extend EBPF to access SKB fields, from Alexei Starovoitov. 12) Split out new connection request sockets so that they can be established in the main hash table. Much less false sharing since hash lookups go direct to the request sockets instead of having to go first to the listener then to the request socks hashed underneath. From Eric Dumazet. 13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk. 14) Support stable privacy address generation for RFC7217 in IPV6. From Hannes Frederic Sowa. 15) Hash network namespace into IP frag IDs, also from Hannes Frederic Sowa. 16) Convert PTP get/set methods to use 64-bit time, from Richard Cochran. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits) fm10k: Bump driver version to 0.15.2 fm10k: corrected VF multicast update fm10k: mbx_update_max_size does not drop all oversized messages fm10k: reset head instead of calling update_max_size fm10k: renamed mbx_tx_dropped to mbx_tx_oversized fm10k: update xcast mode before synchronizing multicast addresses fm10k: start service timer on probe fm10k: fix function header comment fm10k: comment next_vf_mbx flow fm10k: don't handle mailbox events in iov_event path and always process mailbox fm10k: use separate workqueue for fm10k driver fm10k: Set PF queues to unlimited bandwidth during virtualization fm10k: expose tx_timeout_count as an ethtool stat fm10k: only increment tx_timeout_count in Tx hang path fm10k: remove extraneous "Reset interface" message fm10k: separate PF only stats so that VF does not display them fm10k: use hw->mac.max_queues for stats fm10k: only show actual queues, not the maximum in hardware fm10k: allow creation of VLAN on default vid fm10k: fix unused warnings ...
Diffstat (limited to 'net/tipc/socket.c')
-rw-r--r--net/tipc/socket.c272
1 files changed, 102 insertions, 170 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b4d4467d0bb0..ee90d74d7516 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,7 +35,6 @@
*/
#include <linux/rhashtable.h>
-#include <linux/jhash.h>
#include "core.h"
#include "name_table.h"
#include "node.h"
@@ -74,6 +73,7 @@
* @link_cong: non-zero if owner must sleep because of link congestion
* @sent_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @remote: 'connected' peer for dgram/rdm
* @node: hash table node
* @rcu: rcu struct for tipc_sock
*/
@@ -96,6 +96,7 @@ struct tipc_sock {
bool link_cong;
uint sent_unacked;
uint rcv_unacked;
+ struct sockaddr_tipc remote;
struct rhash_head node;
struct rcu_head rcu;
};
@@ -114,13 +115,14 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
+static int __tipc_send_stream(struct socket *sock, struct msghdr *m,
+ size_t dsz);
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
-
static struct proto tipc_proto;
-static struct proto tipc_proto_kern;
static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
[TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
@@ -130,6 +132,8 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
[TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
};
+static const struct rhashtable_params tsk_rht_params;
+
/*
* Revised TIPC socket locking policy:
*
@@ -338,11 +342,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
}
/* Allocate socket's protocol area */
- if (!kern)
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
- else
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);
-
+ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
if (sk == NULL)
return -ENOMEM;
@@ -380,75 +380,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
return 0;
}
-/**
- * tipc_sock_create_local - create TIPC socket from inside TIPC module
- * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
- *
- * We cannot use sock_creat_kern here because it bumps module user count.
- * Since socket owner and creator is the same module we must make sure
- * that module count remains zero for module local sockets, otherwise
- * we cannot do rmmod.
- *
- * Returns 0 on success, errno otherwise
- */
-int tipc_sock_create_local(struct net *net, int type, struct socket **res)
-{
- int rc;
-
- rc = sock_create_lite(AF_TIPC, type, 0, res);
- if (rc < 0) {
- pr_err("Failed to create kernel socket\n");
- return rc;
- }
- tipc_sk_create(net, *res, 0, 1);
-
- return 0;
-}
-
-/**
- * tipc_sock_release_local - release socket created by tipc_sock_create_local
- * @sock: the socket to be released.
- *
- * Module reference count is not incremented when such sockets are created,
- * so we must keep it from being decremented when they are released.
- */
-void tipc_sock_release_local(struct socket *sock)
-{
- tipc_release(sock);
- sock->ops = NULL;
- sock_release(sock);
-}
-
-/**
- * tipc_sock_accept_local - accept a connection on a socket created
- * with tipc_sock_create_local. Use this function to avoid that
- * module reference count is inadvertently incremented.
- *
- * @sock: the accepting socket
- * @newsock: reference to the new socket to be created
- * @flags: socket flags
- */
-
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
- int flags)
-{
- struct sock *sk = sock->sk;
- int ret;
-
- ret = sock_create_lite(sk->sk_family, sk->sk_type,
- sk->sk_protocol, newsock);
- if (ret < 0)
- return ret;
-
- ret = tipc_accept(sock, *newsock, flags);
- if (ret < 0) {
- sock_release(*newsock);
- return ret;
- }
- (*newsock)->ops = sock->ops;
- return ret;
-}
-
static void tipc_sk_callback(struct rcu_head *head)
{
struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
@@ -892,7 +823,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
/**
* tipc_sendmsg - send message in connectionless manner
- * @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
* @dsz: amount of user data to be sent
@@ -904,9 +834,21 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
+static int tipc_sendmsg(struct socket *sock,
struct msghdr *m, size_t dsz)
{
+ struct sock *sk = sock->sk;
+ int ret;
+
+ lock_sock(sk);
+ ret = __tipc_sendmsg(sock, m, dsz);
+ release_sock(sk);
+
+ return ret;
+}
+
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
+{
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -915,49 +857,40 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
u32 dnode, dport;
struct sk_buff_head *pktchain = &sk->sk_write_queue;
struct sk_buff *skb;
- struct tipc_name_seq *seq = &dest->addr.nameseq;
+ struct tipc_name_seq *seq;
struct iov_iter save;
u32 mtu;
long timeo;
int rc;
- if (unlikely(!dest))
- return -EDESTADDRREQ;
-
- if (unlikely((m->msg_namelen < sizeof(*dest)) ||
- (dest->family != AF_TIPC)))
- return -EINVAL;
-
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
-
- if (iocb)
- lock_sock(sk);
-
+ if (unlikely(!dest)) {
+ if (tsk->connected && sock->state == SS_READY)
+ dest = &tsk->remote;
+ else
+ return -EDESTADDRREQ;
+ } else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
+ dest->family != AF_TIPC) {
+ return -EINVAL;
+ }
if (unlikely(sock->state != SS_READY)) {
- if (sock->state == SS_LISTENING) {
- rc = -EPIPE;
- goto exit;
- }
- if (sock->state != SS_UNCONNECTED) {
- rc = -EISCONN;
- goto exit;
- }
- if (tsk->published) {
- rc = -EOPNOTSUPP;
- goto exit;
- }
+ if (sock->state == SS_LISTENING)
+ return -EPIPE;
+ if (sock->state != SS_UNCONNECTED)
+ return -EISCONN;
+ if (tsk->published)
+ return -EOPNOTSUPP;
if (dest->addrtype == TIPC_ADDR_NAME) {
tsk->conn_type = dest->addr.name.name.type;
tsk->conn_instance = dest->addr.name.name.instance;
}
}
-
+ seq = &dest->addr.nameseq;
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
if (dest->addrtype == TIPC_ADDR_MCAST) {
- rc = tipc_sendmcast(sock, seq, m, dsz, timeo);
- goto exit;
+ return tipc_sendmcast(sock, seq, m, dsz, timeo);
} else if (dest->addrtype == TIPC_ADDR_NAME) {
u32 type = dest->addr.name.name.type;
u32 inst = dest->addr.name.name.instance;
@@ -972,10 +905,8 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
dport = tipc_nametbl_translate(net, type, inst, &dnode);
msg_set_destnode(mhdr, dnode);
msg_set_destport(mhdr, dport);
- if (unlikely(!dport && !dnode)) {
- rc = -EHOSTUNREACH;
- goto exit;
- }
+ if (unlikely(!dport && !dnode))
+ return -EHOSTUNREACH;
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
msg_set_type(mhdr, TIPC_DIRECT_MSG);
@@ -990,7 +921,7 @@ new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
if (rc < 0)
- goto exit;
+ return rc;
do {
skb = skb_peek(pktchain);
@@ -1013,9 +944,6 @@ new_mtu:
if (rc)
__skb_queue_purge(pktchain);
} while (!rc);
-exit:
- if (iocb)
- release_sock(sk);
return rc;
}
@@ -1052,7 +980,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
/**
* tipc_send_stream - send stream-oriented data
- * @iocb: (unused)
* @sock: socket structure
* @m: data to send
* @dsz: total length of data to be transmitted
@@ -1062,8 +989,19 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
* Returns the number of bytes sent on success (or partial success),
* or errno if no data sent
*/
-static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t dsz)
+static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
+{
+ struct sock *sk = sock->sk;
+ int ret;
+
+ lock_sock(sk);
+ ret = __tipc_send_stream(sock, m, dsz);
+ release_sock(sk);
+
+ return ret;
+}
+
+static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
@@ -1080,7 +1018,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
/* Handle implied connection establishment */
if (unlikely(dest)) {
- rc = tipc_sendmsg(iocb, sock, m, dsz);
+ rc = __tipc_sendmsg(sock, m, dsz);
if (dsz && (dsz == rc))
tsk->sent_unacked = 1;
return rc;
@@ -1088,15 +1026,11 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
if (dsz > (uint)INT_MAX)
return -EMSGSIZE;
- if (iocb)
- lock_sock(sk);
-
if (unlikely(sock->state != SS_CONNECTED)) {
if (sock->state == SS_DISCONNECTING)
- rc = -EPIPE;
+ return -EPIPE;
else
- rc = -ENOTCONN;
- goto exit;
+ return -ENOTCONN;
}
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
@@ -1108,7 +1042,7 @@ next:
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
if (unlikely(rc < 0))
- goto exit;
+ return rc;
do {
if (likely(!tsk_conn_cong(tsk))) {
rc = tipc_link_xmit(net, pktchain, dnode, portid);
@@ -1133,15 +1067,12 @@ next:
if (rc)
__skb_queue_purge(pktchain);
} while (!rc);
-exit:
- if (iocb)
- release_sock(sk);
+
return sent ? sent : rc;
}
/**
* tipc_send_packet - send a connection-oriented message
- * @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
* @dsz: length of data to be transmitted
@@ -1150,13 +1081,12 @@ exit:
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t dsz)
+static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
{
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
- return tipc_send_stream(iocb, sock, m, dsz);
+ return tipc_send_stream(sock, m, dsz);
}
/* tipc_sk_finish_conn - complete the setup of a connection
@@ -1317,12 +1247,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
break;
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- break;
err = -EAGAIN;
if (!timeo)
break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
}
finish_wait(sk_sleep(sk), &wait);
*timeop = timeo;
@@ -1331,7 +1261,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
/**
* tipc_recvmsg - receive packet-oriented message
- * @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
* @flags: receive flags
@@ -1341,8 +1270,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
*
* Returns size of returned message data, errno otherwise
*/
-static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
+ int flags)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -1426,7 +1355,6 @@ exit:
/**
* tipc_recv_stream - receive stream-oriented data
- * @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
* @flags: receive flags
@@ -1436,8 +1364,8 @@ exit:
*
* Returns size of returned message data, errno otherwise
*/
-static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
+ size_t buf_len, int flags)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -1909,17 +1837,26 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
int destlen, int flags)
{
struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
struct msghdr m = {NULL,};
- long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout;
+ long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
socket_state previous;
- int res;
+ int res = 0;
lock_sock(sk);
- /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
+ /* DGRAM/RDM connect(), just save the destaddr */
if (sock->state == SS_READY) {
- res = -EOPNOTSUPP;
+ if (dst->family == AF_UNSPEC) {
+ memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc));
+ tsk->connected = 0;
+ } else if (destlen != sizeof(struct sockaddr_tipc)) {
+ res = -EINVAL;
+ } else {
+ memcpy(&tsk->remote, dest, destlen);
+ tsk->connected = 1;
+ }
goto exit;
}
@@ -1947,7 +1884,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
if (!timeout)
m.msg_flags = MSG_DONTWAIT;
- res = tipc_sendmsg(NULL, sock, &m, 0);
+ res = __tipc_sendmsg(sock, &m, 0);
if ((res < 0) && (res != -EWOULDBLOCK))
goto exit;
@@ -2027,12 +1964,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
err = -EINVAL;
if (sock->state != SS_LISTENING)
break;
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- break;
err = -EAGAIN;
if (!timeo)
break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
}
finish_wait(sk_sleep(sk), &wait);
return err;
@@ -2103,7 +2040,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
struct msghdr m = {NULL,};
tsk_advance_rx_queue(sk);
- tipc_send_packet(NULL, new_sock, &m, 0);
+ __tipc_send_stream(new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
@@ -2154,7 +2091,6 @@ restart:
TIPC_CONN_SHUTDOWN))
tipc_link_xmit_skb(net, skb, dnode,
tsk->portid);
- tipc_node_remove_conn(net, dnode, tsk->portid);
} else {
dnode = tsk_peer_node(tsk);
@@ -2312,7 +2248,7 @@ static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
struct tipc_sock *tsk;
rcu_read_lock();
- tsk = rhashtable_lookup(&tn->sk_rht, &portid);
+ tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
if (tsk)
sock_hold(&tsk->sk);
rcu_read_unlock();
@@ -2334,7 +2270,8 @@ static int tipc_sk_insert(struct tipc_sock *tsk)
portid = TIPC_MIN_PORT;
tsk->portid = portid;
sock_hold(&tsk->sk);
- if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node))
+ if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
+ tsk_rht_params))
return 0;
sock_put(&tsk->sk);
}
@@ -2347,26 +2284,27 @@ static void tipc_sk_remove(struct tipc_sock *tsk)
struct sock *sk = &tsk->sk;
struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
- if (rhashtable_remove(&tn->sk_rht, &tsk->node)) {
+ if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
}
+static const struct rhashtable_params tsk_rht_params = {
+ .nelem_hint = 192,
+ .head_offset = offsetof(struct tipc_sock, node),
+ .key_offset = offsetof(struct tipc_sock, portid),
+ .key_len = sizeof(u32), /* portid */
+ .max_size = 1048576,
+ .min_size = 256,
+ .automatic_shrinking = true,
+};
+
int tipc_sk_rht_init(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct rhashtable_params rht_params = {
- .nelem_hint = 192,
- .head_offset = offsetof(struct tipc_sock, node),
- .key_offset = offsetof(struct tipc_sock, portid),
- .key_len = sizeof(u32), /* portid */
- .hashfn = jhash,
- .max_shift = 20, /* 1M */
- .min_shift = 8, /* 256 */
- };
- return rhashtable_init(&tn->sk_rht, &rht_params);
+ return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
}
void tipc_sk_rht_destroy(struct net *net)
@@ -2609,12 +2547,6 @@ static struct proto tipc_proto = {
.sysctl_rmem = sysctl_tipc_rmem
};
-static struct proto tipc_proto_kern = {
- .name = "TIPC",
- .obj_size = sizeof(struct tipc_sock),
- .sysctl_rmem = sysctl_tipc_rmem
-};
-
/**
* tipc_socket_init - initialize TIPC socket interface
*