summaryrefslogtreecommitdiff
path: root/net/unix
diff options
context:
space:
mode:
Diffstat (limited to 'net/unix')
-rw-r--r--net/unix/af_unix.c85
-rw-r--r--net/unix/diag.c3
-rw-r--r--net/unix/scm.c13
-rw-r--r--net/unix/sysctl_net_unix.c3
-rw-r--r--net/unix/unix_bpf.c20
5 files changed, 80 insertions, 44 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 86930a8ed012..30b178ebba60 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -116,6 +116,7 @@
#include <linux/freezer.h>
#include <linux/file.h>
#include <linux/btf_ids.h>
+#include <linux/bpf-cgroup.h>
#include "scm.h"
@@ -212,8 +213,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
}
#endif /* CONFIG_SECURITY_NETWORK */
-#define unix_peer(sk) (unix_sk(sk)->peer)
-
static inline int unix_our_peer(struct sock *sk, struct sock *osk)
{
return unix_peer(osk) == sk;
@@ -680,7 +679,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
* What the above comment does talk about? --ANK(980817)
*/
- if (unix_tot_inflight)
+ if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
@@ -1345,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
- if (sk1 < sk2) {
- unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+
+ unix_state_lock(sk1);
+ unix_state_lock_nested(sk2, U_LOCK_SECOND);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1381,6 +1378,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
+ err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
+ if (err)
+ goto out;
+
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
!unix_sk(sk)->addr) {
@@ -1490,6 +1491,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (err)
goto out;
+ err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
+ if (err)
+ goto out;
+
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
@@ -1584,7 +1589,7 @@ restart:
goto out_unlock;
}
- unix_state_lock_nested(sk);
+ unix_state_lock_nested(sk, U_LOCK_SECOND);
if (sk->sk_state != st) {
unix_state_unlock(sk);
@@ -1770,6 +1775,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
} else {
err = addr->len;
memcpy(sunaddr, addr->name, addr->len);
+
+ if (peer)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+ CGROUP_UNIX_GETPEERNAME);
+ else
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+ CGROUP_UNIX_GETSOCKNAME);
}
sock_put(sk);
out:
@@ -1922,6 +1934,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
err = unix_validate_addr(sunaddr, msg->msg_namelen);
if (err)
goto out;
+
+ err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
+ msg->msg_name,
+ &msg->msg_namelen,
+ NULL);
+ if (err)
+ goto out;
} else {
sunaddr = NULL;
err = -ENOTCONN;
@@ -2390,9 +2409,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
EPOLLOUT | EPOLLWRNORM |
EPOLLWRBAND);
- if (msg->msg_name)
+ if (msg->msg_name) {
unix_copy_addr(msg, skb->sk);
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+ msg->msg_name,
+ &msg->msg_namelen);
+ }
+
if (size > skb->len - skip)
size = skb->len - skip;
else if (size < skb->len - skip)
@@ -2553,15 +2577,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
-
+ else
+ skb_get(oob_skb);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
- if (!(state->flags & MSG_PEEK)) {
+ if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
- kfree_skb(oob_skb);
- }
+
+ consume_skb(oob_skb);
mutex_unlock(&u->iolock);
@@ -2744,6 +2769,11 @@ unlock:
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
state->msg->msg_name);
unix_copy_addr(state->msg, skb->sk);
+
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+ state->msg->msg_name,
+ &state->msg->msg_namelen);
+
sunaddr = NULL;
}
@@ -3311,7 +3341,7 @@ static const struct seq_operations unix_seq_ops = {
.show = unix_seq_show,
};
-#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+#ifdef CONFIG_BPF_SYSCALL
struct bpf_unix_iter_state {
struct seq_net_private p;
unsigned int cur_sk;
@@ -3573,7 +3603,7 @@ static struct pernet_operations unix_net_ops = {
.exit = unix_net_exit,
};
-#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
struct unix_sock *unix_sk, uid_t uid)
@@ -3673,7 +3703,7 @@ static int __init af_unix_init(void)
register_pernet_subsys(&unix_net_ops);
unix_bpf_build_proto();
-#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
bpf_iter_register();
#endif
@@ -3681,20 +3711,5 @@ out:
return rc;
}
-static void __exit af_unix_exit(void)
-{
- sock_unregister(PF_UNIX);
- proto_unregister(&unix_dgram_proto);
- proto_unregister(&unix_stream_proto);
- unregister_pernet_subsys(&unix_net_ops);
-}
-
-/* Earlier than device_initcall() so that other drivers invoking
- request_module() don't end up in a loop when modprobe tries
- to use a UNIX socket. But later than subsys_initcall() because
- we depend on stuff initialised there */
+/* Later than subsys_initcall() because we depend on stuff initialised there */
fs_initcall(af_unix_init);
-module_exit(af_unix_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_UNIX);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 616b55c5b890..be19827eca36 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock_nested(req, U_LOCK_DIAG);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
@@ -339,4 +339,5 @@ static void __exit unix_diag_exit(void)
module_init(unix_diag_init);
module_exit(unix_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UNIX socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
diff --git a/net/unix/scm.c b/net/unix/scm.c
index f9152881d77f..822ce0d0d791 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -29,15 +29,14 @@ struct sock *unix_get_socket(struct file *filp)
/* Socket ? */
if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct sock *s = sock->sk;
/* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
+ if (s && ops && ops->family == PF_UNIX)
u_sock = s;
- } else {
- /* Could be an io_uring instance */
- u_sock = io_uring_get_socket(filp);
}
+
return u_sock;
}
EXPORT_SYMBOL(unix_get_socket);
@@ -63,7 +62,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
- user->unix_inflight++;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
spin_unlock(&unix_gc_lock);
}
@@ -84,7 +83,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
- user->unix_inflight--;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
spin_unlock(&unix_gc_lock);
}
@@ -98,7 +97,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
{
struct user_struct *user = current_user();
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
return false;
}
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 500129aa710c..3e84b31c355a 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -36,7 +36,8 @@ int __net_init unix_sysctl_register(struct net *net)
table[0].data = &net->unx.sysctl_max_dgram_qlen;
}
- net->unx.ctl = register_net_sysctl(net, "net/unix", table);
+ net->unx.ctl = register_net_sysctl_sz(net, "net/unix", table,
+ ARRAY_SIZE(unix_table));
if (net->unx.ctl == NULL)
goto err_reg;
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 2f9d8271c6ec..bd84785bf8d6 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -159,12 +159,32 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re
int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
+ struct sock *sk_pair;
+
+ /* Restore does not decrement the sk_pair reference yet because we must
+ * keep the a reference to the socket until after an RCU grace period
+ * and any pending sends have completed.
+ */
if (restore) {
sk->sk_write_space = psock->saved_write_space;
sock_replace_proto(sk, psock->sk_proto);
return 0;
}
+ /* psock_update_sk_prot can be called multiple times if psock is
+ * added to multiple maps and/or slots in the same map. There is
+ * also an edge case where replacing a psock with itself can trigger
+ * an extra psock_update_sk_prot during the insert process. So it
+ * must be safe to do multiple calls. Here we need to ensure we don't
+ * increment the refcnt through sock_hold many times. There will only
+ * be a single matching destroy operation.
+ */
+ if (!psock->sk_pair) {
+ sk_pair = unix_peer(sk);
+ sock_hold(sk_pair);
+ psock->sk_pair = sk_pair;
+ }
+
unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
sock_replace_proto(sk, &unix_stream_bpf_prot);
return 0;