diff options
Diffstat (limited to 'net')
44 files changed, 615 insertions, 461 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 09f8ced9f8bb..61461b9fa134 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1548,7 +1548,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int count = iov_iter_count(to); - int rsize, received, non_zc = 0; + u32 rsize, received; + bool non_zc = false; char *dataptr; *err = 0; @@ -1571,7 +1572,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, 0, 11, "dqd", fid->fid, offset, rsize); } else { - non_zc = 1; + non_zc = true; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); } @@ -1592,11 +1593,13 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, return 0; } if (rsize < received) { - pr_err("bogus RREAD count (%d > %d)\n", received, rsize); - received = rsize; + pr_err("bogus RREAD count (%u > %u)\n", received, rsize); + *err = -EIO; + p9_req_put(clnt, req); + return 0; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %u\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1623,9 +1626,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) *err = 0; while (iov_iter_count(from)) { - int count = iov_iter_count(from); - int rsize = fid->iounit; - int written; + size_t count = iov_iter_count(from); + u32 rsize = fid->iounit; + u32 written; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; @@ -1633,7 +1636,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %u (/%zu)\n", fid->fid, offset, rsize, count); /* Don't bother zerocopy for small IO (< 1024) */ @@ -1659,11 +1662,14 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) break; } if (rsize < written) { - pr_err("bogus RWRITE count (%d > %d)\n", written, rsize); - written = rsize; + pr_err("bogus RWRITE count (%u > %u)\n", written, rsize); + *err = -EIO; + iov_iter_revert(from, count - iov_iter_count(from)); + p9_req_put(clnt, req); + break; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %u\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); @@ -1712,7 +1718,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) if (written > len) { pr_err("bogus RWRITE count (%d > %u)\n", written, len); - written = len; + written = -EIO; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); @@ -2098,7 +2104,8 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize, non_zc = 0; + int err, non_zc = 0; + u32 rsize; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -2107,7 +2114,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) iov_iter_kvec(&to, ITER_DEST, &kv, 1, count); - p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %u\n", fid->fid, offset, count); clnt = fid->clnt; @@ -2142,11 +2149,12 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) goto free_and_error; } if (rsize < count) { - pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize); - count = rsize; + pr_err("bogus RREADDIR count (%u > %u)\n", count, rsize); + err = -EIO; + goto free_and_error; } - p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %u\n", count); if (non_zc) memmove(data, dataptr, count); diff --git a/net/9p/error.c b/net/9p/error.c index 8da744494b68..8ba8afc91482 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -16,6 +16,7 @@ #include <linux/list.h> #include <linux/jhash.h> #include <linux/errno.h> +#include <linux/hashtable.h> #include <net/9p/9p.h> /** @@ -33,8 +34,8 @@ struct errormap { struct hlist_node list; }; -#define ERRHASHSZ 32 -static struct hlist_head hash_errmap[ERRHASHSZ]; +#define ERRHASH_BITS 5 +static DEFINE_HASHTABLE(hash_errmap, ERRHASH_BITS); /* FixMe - reduce to a reasonable size */ static struct errormap errmap[] = { @@ -176,18 +177,14 @@ static struct errormap errmap[] = { int p9_error_init(void) { struct errormap *c; - int bucket; - - /* initialize hash table */ - for (bucket = 0; bucket < ERRHASHSZ; bucket++) - INIT_HLIST_HEAD(&hash_errmap[bucket]); + u32 hash; /* load initial error map into hash table */ for (c = errmap; c->name; c++) { c->namelen = strlen(c->name); - bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; + hash = jhash(c->name, c->namelen, 0); INIT_HLIST_NODE(&c->list); - hlist_add_head(&c->list, &hash_errmap[bucket]); + hash_add(hash_errmap, &c->list, hash); } return 1; @@ -205,12 +202,12 @@ int p9_errstr2errno(char *errstr, int len) { int errno; struct errormap *c; - int bucket; + u32 hash; errno = 0; c = NULL; - bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, &hash_errmap[bucket], list) { + hash = jhash(errstr, len, 0); + hash_for_each_possible(hash_errmap, c, list, hash) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 196060dc6138..339ec4e54778 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> +#include <linux/in6.h> #include <linux/module.h> #include <linux/net.h> #include <linux/ipv6.h> @@ -191,12 +192,13 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_lock(&m->req_lock); - if (m->err) { + if (READ_ONCE(m->err)) { spin_unlock(&m->req_lock); return; } - m->err = err; + WRITE_ONCE(m->err, err); + ASSERT_EXCLUSIVE_WRITER(m->err); list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); @@ -283,7 +285,7 @@ static void p9_read_work(struct work_struct *work) m = container_of(work, struct p9_conn, rq); - if (m->err < 0) + if (READ_ONCE(m->err) < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); @@ -450,7 +452,7 @@ static void p9_write_work(struct work_struct *work) m = container_of(work, struct p9_conn, wq); - if (m->err < 0) { + if (READ_ONCE(m->err) < 0) { clear_bit(Wworksched, &m->wsched); return; } @@ -622,7 +624,7 @@ static void p9_poll_mux(struct p9_conn *m) __poll_t n; int err = -ECONNRESET; - if (m->err < 0) + if (READ_ONCE(m->err) < 0) return; n = p9_fd_poll(m->client, NULL, &err); @@ -665,6 +667,7 @@ static void p9_poll_mux(struct p9_conn *m) static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; + int err; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; @@ -673,9 +676,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) spin_lock(&m->req_lock); - if (m->err < 0) { + err = READ_ONCE(m->err); + if (err < 0) { spin_unlock(&m->req_lock); - return m->err; + return err; } WRITE_ONCE(req->status, REQ_STATUS_UNSENT); @@ -954,64 +958,55 @@ static void p9_fd_close(struct p9_client *client) kfree(ts); } -/* - * stolen from NFS - maybe should be made a generic function? - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} - static int p9_bind_privport(struct socket *sock) { - struct sockaddr_in cl; + struct sockaddr_storage stor = { 0 }; int port, err = -EINVAL; - memset(&cl, 0, sizeof(cl)); - cl.sin_family = AF_INET; - cl.sin_addr.s_addr = htonl(INADDR_ANY); + stor.ss_family = sock->ops->family; + if (stor.ss_family == AF_INET) + ((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY); + else + ((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any; for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { - cl.sin_port = htons((ushort)port); - err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (stor.ss_family == AF_INET) + ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port); + else + ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&stor, sizeof(stor)); if (err != -EADDRINUSE) break; } return err; } - static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; + char port_str[6]; struct socket *csocket; - struct sockaddr_in sin_server; + struct sockaddr_storage stor = { 0 }; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; - if (addr == NULL || valid_ipaddr4(addr) < 0) + if (!addr) return -EINVAL; + sprintf(port_str, "%u", opts.port); + err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr, + port_str, &stor); + if (err < 0) + return err; + csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(opts.port); - err = __sock_create(current->nsproxy->net_ns, PF_INET, + err = __sock_create(current->nsproxy->net_ns, stor.ss_family, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", @@ -1030,8 +1025,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) } err = READ_ONCE(csocket->ops)->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); + (struct sockaddr *)&stor, + sizeof(stor), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); diff --git a/net/core/Makefile b/net/core/Makefile index a10c3bd96798..b2a76ce33932 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o obj-$(CONFIG_NET_TEST) += net_test.o obj-$(CONFIG_NET_DEVMEM) += devmem.o -obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o +obj-$(CONFIG_DEBUG_NET) += lock_debug.o obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o diff --git a/net/core/dev.c b/net/core/dev.c index be17e0660144..0608605cfc24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev) netdev_unlock_ops(lower_dev); } } +EXPORT_IPV6_MOD(netif_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device @@ -1858,7 +1859,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb, int err; for_each_netdev(net, dev) { + netdev_lock_ops(dev); err = call_netdevice_register_notifiers(nb, dev); + netdev_unlock_ops(dev); if (err) goto rollback; } @@ -10284,7 +10287,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) goto unlock; } + netdev_lock_ops(dev); err = dev_xdp_attach_link(dev, &extack, link); + netdev_unlock_ops(dev); rtnl_unlock(); if (err) { @@ -11045,7 +11050,9 @@ int register_netdevice(struct net_device *dev) memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ + netdev_lock_ops(dev); ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); + netdev_unlock_ops(dev); ret = notifier_to_errno(ret); if (ret) { /* Expect explicit free_netdev() on failure */ @@ -12057,7 +12064,7 @@ void unregister_netdev(struct net_device *dev) } EXPORT_SYMBOL(unregister_netdev); -int netif_change_net_namespace(struct net_device *dev, struct net *net, +int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack) { @@ -12142,11 +12149,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net, * And now a mini version of register_netdevice unregister_netdevice. */ + netdev_lock_ops(dev); /* If device is running close it first. */ netif_close(dev); - /* And unlink it from device chain */ unlist_netdevice(dev); + netdev_unlock_ops(dev); synchronize_net(); @@ -12208,11 +12216,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net, err = netdev_change_owner(dev, net_old, net); WARN_ON(err); + netdev_lock_ops(dev); /* Add the device back in the hashes */ list_netdevice(dev); - /* Notify protocols, that a new device appeared. */ call_netdevice_notifiers(NETDEV_REGISTER, dev); + netdev_unlock_ops(dev); /* * Prevent userspace races by waiting until the network diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 8dbc60612100..90bafb0b1b8c 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int ret; - - netdev_lock_ops(dev); - ret = netif_change_net_namespace(dev, net, pat, 0, NULL); - netdev_unlock_ops(dev); - - return ret; + return __dev_change_net_namespace(dev, net, pat, 0, NULL); } EXPORT_SYMBOL_GPL(dev_change_net_namespace); diff --git a/net/core/devmem.c b/net/core/devmem.c index ee145a2aa41c..6e27a47d0493 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -8,7 +8,6 @@ */ #include <linux/dma-buf.h> -#include <linux/ethtool_netlink.h> #include <linux/genalloc.h> #include <linux/mm.h> #include <linux/netdevice.h> @@ -117,21 +116,19 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; - int err; if (binding->list.next) list_del(&binding->list); xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { - WARN_ON(rxq->mp_params.mp_priv != binding); - - rxq->mp_params.mp_priv = NULL; - rxq->mp_params.mp_ops = NULL; + const struct pp_memory_provider_params mp_params = { + .mp_priv = binding, + .mp_ops = &dmabuf_devmem_ops, + }; rxq_idx = get_netdev_rx_queue_index(rxq); - err = netdev_rx_queue_restart(binding->dev, rxq_idx); - WARN_ON(err && err != -ENETDOWN); + __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); @@ -143,57 +140,28 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack) { + struct pp_memory_provider_params mp_params = { + .mp_priv = binding, + .mp_ops = &dmabuf_devmem_ops, + }; struct netdev_rx_queue *rxq; u32 xa_idx; int err; - if (rxq_idx >= dev->real_num_rx_queues) { - NL_SET_ERR_MSG(extack, "rx queue index out of range"); - return -ERANGE; - } - - if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { - NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); - return -EINVAL; - } - - if (dev->cfg->hds_thresh) { - NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); - return -EINVAL; - } + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack); + if (err) + return err; rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_ops) { - NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); - return -EEXIST; - } - -#ifdef CONFIG_XDP_SOCKETS - if (rxq->pool) { - NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); - return -EBUSY; - } -#endif - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, GFP_KERNEL); if (err) - return err; - - rxq->mp_params.mp_priv = binding; - rxq->mp_params.mp_ops = &dmabuf_devmem_ops; - - err = netdev_rx_queue_restart(dev, rxq_idx); - if (err) - goto err_xa_erase; + goto err_close_rxq; return 0; -err_xa_erase: - rxq->mp_params.mp_priv = NULL; - rxq->mp_params.mp_ops = NULL; - xa_erase(&binding->bound_rxqs, xa_idx); - +err_close_rxq: + __net_mp_close_rxq(dev, rxq_idx, &mp_params); return err; } diff --git a/net/core/dst.c b/net/core/dst.c index c99b95cf9cbb..795ca07e28a4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -165,6 +165,14 @@ static void dst_count_dec(struct dst_entry *dst) void dst_release(struct dst_entry *dst) { if (dst && rcuref_put(&dst->__rcuref)) { +#ifdef CONFIG_DST_CACHE + if (dst->flags & DST_METADATA) { + struct metadata_dst *md_dst = (struct metadata_dst *)dst; + + if (md_dst->type == METADATA_IP_TUNNEL) + dst_cache_reset_now(&md_dst->u.tun_info.dst_cache); + } +#endif dst_count_dec(dst); call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu); } diff --git a/net/core/filter.c b/net/core/filter.c index 46ae8eb7a03c..bc6828761a47 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8137,6 +8137,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_load_bytes_relative_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_netns_cookie: + return &bpf_get_netns_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; case BPF_FUNC_perf_event_output: @@ -9697,7 +9699,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, queue_mapping): if (type == BPF_WRITE) { - u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); + u32 offset = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) { *insn++ = BPF_JMP_A(0); /* noop */ @@ -9706,7 +9708,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, if (BPF_CLASS(si->code) == BPF_STX) *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); - *insn++ = BPF_EMIT_STORE(BPF_H, si, off); + *insn++ = BPF_EMIT_STORE(BPF_H, si, offset); } else { *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, diff --git a/net/core/rtnl_net_debug.c b/net/core/lock_debug.c index 7ecd28cc1c22..b7f22dc92a6f 100644 --- a/net/core/rtnl_net_debug.c +++ b/net/core/lock_debug.c @@ -6,10 +6,11 @@ #include <linux/notifier.h> #include <linux/rtnetlink.h> #include <net/net_namespace.h> +#include <net/netdev_lock.h> #include <net/netns/generic.h> -static int rtnl_net_debug_event(struct notifier_block *nb, - unsigned long event, void *ptr) +int netdev_debug_event(struct notifier_block *nb, unsigned long event, + void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); @@ -17,11 +18,13 @@ static int rtnl_net_debug_event(struct notifier_block *nb, /* Keep enum and don't add default to trigger -Werror=switch */ switch (cmd) { + case NETDEV_REGISTER: case NETDEV_UP: + netdev_ops_assert_locked(dev); + fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: case NETDEV_CHANGE: - case NETDEV_REGISTER: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: @@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb, return NOTIFY_DONE; } +EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL"); static int rtnl_net_debug_net_id; @@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net) struct notifier_block *nb; nb = net_generic(net, rtnl_net_debug_net_id); - nb->notifier_call = rtnl_net_debug_event; + nb->notifier_call = netdev_debug_event; return register_netdevice_notifier_net(net, nb); } @@ -95,14 +99,14 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = { }; static struct notifier_block rtnl_net_debug_block = { - .notifier_call = rtnl_net_debug_event, + .notifier_call = netdev_debug_event, }; static int __init rtnl_net_debug_init(void) { int ret; - ret = register_pernet_device(&rtnl_net_debug_net_ops); + ret = register_pernet_subsys(&rtnl_net_debug_net_ops); if (ret) return ret; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fd1cfa9707dc..5d7af50fe702 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -874,12 +874,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock; } - if (dev_xdp_prog_count(netdev)) { - NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); - err = -EEXIST; - goto err_unlock; - } - binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); @@ -951,12 +945,14 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; + struct net_device *dev; mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { - netdev_lock(binding->dev); + dev = binding->dev; + netdev_lock(dev); net_devmem_unbind_dmabuf(binding); - netdev_unlock(binding->dev); + netdev_unlock(dev); } mutex_unlock(&priv->lock); } diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 3af716f77a13..d126f10197bf 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <net/netdev_lock.h> #include <net/netdev_queues.h> @@ -86,8 +87,9 @@ err_free_new_mem: } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *p) +int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { struct netdev_rx_queue *rxq; int ret; @@ -95,16 +97,41 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, if (!netdev_need_ops_lock(dev)) return -EOPNOTSUPP; - if (ifq_idx >= dev->real_num_rx_queues) + if (rxq_idx >= dev->real_num_rx_queues) return -EINVAL; - ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - rxq = __netif_get_rx_queue(dev, ifq_idx); - if (rxq->mp_params.mp_ops) + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); + return -EINVAL; + } + if (dev->cfg->hds_thresh) { + NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); + return -EINVAL; + } + if (dev_xdp_prog_count(dev)) { + NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached"); return -EEXIST; + } + + rxq = __netif_get_rx_queue(dev, rxq_idx); + if (rxq->mp_params.mp_ops) { + NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); + return -EEXIST; + } +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) { + NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); + return -EBUSY; + } +#endif rxq->mp_params = *p; - ret = netdev_rx_queue_restart(dev, ifq_idx); + ret = netdev_rx_queue_restart(dev, rxq_idx); if (ret) { rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; @@ -112,21 +139,22 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, return ret; } -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, +int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, struct pp_memory_provider_params *p) { int ret; netdev_lock(dev); - ret = __net_mp_open_rxq(dev, ifq_idx, p); + ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL); netdev_unlock(dev); return ret; } -static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p) +void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, + const struct pp_memory_provider_params *old_p) { struct netdev_rx_queue *rxq; + int err; if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) return; @@ -146,7 +174,8 @@ static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; - WARN_ON(netdev_rx_queue_restart(dev, ifq_idx)); + err = netdev_rx_queue_restart(dev, ifq_idx); + WARN_ON(err && err != -ENETDOWN); } void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5a24a30dfc2d..c23852835050 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1177,6 +1177,9 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_TX_DROPPED */ nla_total_size_64bit(sizeof(__u64))); } + if (dev->netdev_ops->ndo_get_vf_guid) + size += num_vfs * 2 * + nla_total_size(sizeof(struct ifla_vf_guid)); return size; } else return 0; @@ -3022,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, char ifname[IFNAMSIZ]; int err; - netdev_lock_ops(dev); - err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; @@ -3039,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = netif_change_net_namespace(dev, tgt_net, pat, + err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex, extack); if (err) - goto errout; + return err; status |= DO_SETLINK_MODIFIED; } + netdev_lock_ops(dev); + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 754f60fb6e25..77e5705ac799 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (!in_dev->arp_parms) goto out_kfree; if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) - dev_disable_lro(dev); + netif_disable_lro(dev); /* Reference in_dev->dev */ netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index a3676155be78..f65d2f727381 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -416,7 +416,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, skb_dst_update_pmtu_no_confirm(skb, mtu); - if (!reply || skb->pkt_type == PACKET_HOST) + if (!reply) return 0; if (skb->protocol == htons(ETH_P_IP)) @@ -451,7 +451,7 @@ static const struct nla_policy geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = { [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, - [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, + [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 }, }; static const struct nla_policy diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ea8de00f669d..6edc441b3702 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1525,25 +1525,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } -/* private version of sock_rfree() avoiding one atomic_sub() */ -void tcp_sock_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - unsigned int len = skb->truesize; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) - len); - - sk_forward_alloc_add(sk, len); - sk_mem_reclaim(sk); -} - static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == tcp_sock_rfree)) { - tcp_sock_rfree(skb); + if (likely(skb->destructor == sock_rfree)) { + sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ca40665145c6..1a6b1bc54245 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e1f952fbac48..a35018e2d0ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5171,7 +5171,7 @@ end: if (tcp_is_sack(tp)) tcp_grow_window(sk, skb, false); skb_condense(skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } } @@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { tcp_add_receive_queue(sk, skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } return eaten; } @@ -5504,7 +5504,7 @@ skip_this: __skb_queue_before(list, skb, nskb); else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ - tcp_skb_set_owner_r(nskb, sk); + skb_set_owner_r(nskb, sk); mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d0bffcfa56d8..2742cc7602bb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1625,12 +1625,12 @@ static bool udp_skb_has_head_state(struct sk_buff *skb) } /* fully reclaim rmem/fwd memory allocated for skb */ -static void udp_rmem_release(struct sock *sk, int size, int partial, - bool rx_queue_lock_held) +static void udp_rmem_release(struct sock *sk, unsigned int size, + int partial, bool rx_queue_lock_held) { struct udp_sock *up = udp_sk(sk); struct sk_buff_head *sk_queue; - int amt; + unsigned int amt; if (likely(partial)) { up->forward_deficit += size; @@ -1650,10 +1650,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (!rx_queue_lock_held) spin_lock(&sk_queue->lock); - - sk_forward_alloc_add(sk, size); - amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); - sk_forward_alloc_add(sk, -amt); + amt = (size + sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); + sk_forward_alloc_add(sk, size - amt); if (amt) __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT); @@ -1725,17 +1723,25 @@ static int udp_rmem_schedule(struct sock *sk, int size) int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { struct sk_buff_head *list = &sk->sk_receive_queue; - int rmem, err = -ENOMEM; + unsigned int rmem, rcvbuf; spinlock_t *busy = NULL; - int size, rcvbuf; + int size, err = -ENOMEM; - /* Immediately drop when the receive queue is full. - * Always allow at least one packet. - */ rmem = atomic_read(&sk->sk_rmem_alloc); rcvbuf = READ_ONCE(sk->sk_rcvbuf); - if (rmem > rcvbuf) - goto drop; + size = skb->truesize; + + /* Immediately drop when the receive queue is full. + * Cast to unsigned int performs the boundary check for INT_MAX. + */ + if (rmem + size > rcvbuf) { + if (rcvbuf > INT_MAX >> 1) + goto drop; + + /* Always allow at least one packet for small buffer. */ + if (rmem > rcvbuf) + goto drop; + } /* Under mem pressure, it might be helpful to help udp_recvmsg() * having linear skbs : @@ -1745,10 +1751,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) */ if (rmem > (rcvbuf >> 1)) { skb_condense(skb); - + size = skb->truesize; busy = busylock_acquire(sk); } - size = skb->truesize; + udp_set_dev_scratch(skb); atomic_add(size, &sk->sk_rmem_alloc); @@ -1835,7 +1841,7 @@ EXPORT_IPV6_MOD_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, - int *total) + unsigned int *total) { struct sk_buff *skb; @@ -1868,8 +1874,8 @@ static int first_packet_length(struct sock *sk) { struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue; struct sk_buff_head *sk_queue = &sk->sk_receive_queue; + unsigned int total = 0; struct sk_buff *skb; - int total = 0; int res; spin_lock_bh(&rcvq->lock); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8cc1076536..c3b908fccbc1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -80,6 +80,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/l3mdev.h> +#include <net/netdev_lock.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> #include <linux/netconf.h> @@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) int err = -ENOMEM; ASSERT_RTNL(); + netdev_ops_assert_locked(dev); if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); @@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(err); } if (ndev->cnf.forwarding) - dev_disable_lro(dev); + netif_disable_lro(dev); /* We refer to the device */ netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); @@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); + netdev_lock_ops(dev); if (dev) err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); else err = -ENODEV; + netdev_unlock_ops(dev); rtnl_net_unlock(net); return err; } @@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!dev) { NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface"); err = -ENODEV; - goto unlock; + goto unlock_rtnl; } + netdev_lock_ops(dev); idev = ipv6_find_idev(dev); if (IS_ERR(idev)) { err = PTR_ERR(idev); @@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, in6_ifa_put(ifa); unlock: + netdev_unlock_ops(dev); +unlock_rtnl: rtnl_net_unlock(net); return err; @@ -5784,6 +5791,27 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, } } +static int inet6_fill_ifla6_stats_attrs(struct sk_buff *skb, + struct inet6_dev *idev) +{ + struct nlattr *nla; + + nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); + if (!nla) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); + + nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); + if (!nla) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, u32 ext_filter_mask) { @@ -5806,18 +5834,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, /* XXX - MC not implemented */ - if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS) - return 0; - - nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (!nla) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); - - nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (!nla) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) { + if (inet6_fill_ifla6_stats_attrs(skb, idev) < 0) + goto nla_put_failure; + } nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); if (!nla) @@ -6503,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, if (idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); + netdev_lock_ops(idev->dev); addrconf_init_auto_addrs(idev->dev); + netdev_unlock_ops(idev->dev); } } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; @@ -6515,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); + netdev_lock_ops(idev->dev); addrconf_init_auto_addrs(idev->dev); + netdev_unlock_ops(idev->dev); } } } diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index dbcea9fee626..62618a058b8f 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1072,8 +1072,13 @@ static int calipso_sock_getattr(struct sock *sk, struct ipv6_opt_hdr *hop; int opt_len, len, ret_val = -ENOMSG, offset; unsigned char *opt; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + + if (!pinfo) + return -EAFNOSUPPORT; + txopts = txopt_get(pinfo); if (!txopts || !txopts->hopopt) goto done; @@ -1125,8 +1130,13 @@ static int calipso_sock_setattr(struct sock *sk, { int ret_val; struct ipv6_opt_hdr *old, *new; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + + if (!pinfo) + return -EAFNOSUPPORT; + txopts = txopt_get(pinfo); old = NULL; if (txopts) old = txopts->hopopt; @@ -1153,8 +1163,13 @@ static int calipso_sock_setattr(struct sock *sk, static void calipso_sock_delattr(struct sock *sk) { struct ipv6_opt_hdr *new_hop; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + + if (!pinfo) + return; + txopts = txopt_get(pinfo); if (!txopts || !txopts->hopopt) goto done; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3406a0d45bd..ab12b816ab94 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -412,12 +412,37 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } +static struct fib6_info * +rt6_multipath_first_sibling_rcu(const struct fib6_info *rt) +{ + struct fib6_info *iter; + struct fib6_node *fn; + + fn = rcu_dereference(rt->fib6_node); + if (!fn) + goto out; + iter = rcu_dereference(fn->leaf); + if (!iter) + goto out; + + while (iter) { + if (iter->fib6_metric == rt->fib6_metric && + rt6_qualify_for_ecmp(iter)) + return iter; + iter = rcu_dereference(iter->fib6_next); + } + +out: + return NULL; +} + void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *match = res->f6i; + struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; + int hash; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -440,16 +465,25 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, return; } - if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) + first = rt6_multipath_first_sibling_rcu(match); + if (!first) goto out; - list_for_each_entry_rcu(sibling, &match->fib6_siblings, + hash = fl6->mp_hash; + if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + rt6_score_route(first->fib6_nh, first->fib6_flags, oif, + strict) >= 0) { + match = first; + goto out; + } + + list_for_each_entry_rcu(sibling, &first->fib6_siblings, fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); - if (fl6->mp_hash > nh_upper_bound) + if (hash > nh_upper_bound) continue; if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) break; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index b9f492ddf93b..83c629529b57 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -33,7 +33,7 @@ struct mpls_dev { #define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \ do { \ - __typeof__(*(mdev)->stats) *ptr = \ + TYPEOF_UNQUAL(*(mdev)->stats) *ptr = \ raw_cpu_ptr((mdev)->stats); \ local_bh_disable(); \ u64_stats_update_begin(&ptr->syncp); \ @@ -45,7 +45,7 @@ struct mpls_dev { #define MPLS_INC_STATS(mdev, field) \ do { \ - __typeof__(*(mdev)->stats) *ptr = \ + TYPEOF_UNQUAL(*(mdev)->stats) *ptr = \ raw_cpu_ptr((mdev)->stats); \ local_bh_disable(); \ u64_stats_update_begin(&ptr->syncp); \ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index efe8d86496db..409bd415ef1d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -754,8 +754,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req = mptcp_subflow_rsk(req); msk = subflow_req->msk; - if (!msk) - return false; subflow_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), @@ -850,12 +848,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) || - !subflow_hmac_valid(req, &mp_opt) || - !mptcp_can_accept_new_subflow(subflow_req->msk)) { - SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); + if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK)) fallback = true; - } } create_child: @@ -905,6 +899,13 @@ create_child: goto dispose_child; } + if (!subflow_hmac_valid(req, &mp_opt) || + !mptcp_can_accept_new_subflow(subflow_req->msk)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + goto dispose_child; + } + /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c2df81b7e950..a133e1c175ce 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2839,11 +2839,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = nft_netdev_register_hooks(ctx->net, &hook.list); if (err < 0) goto err_hooks; + + unregister = true; } } - unregister = true; - if (nla[NFTA_CHAIN_COUNTERS]) { if (!nft_is_base_chain(chain)) { err = -EOPNOTSUPP; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 8bfac4185ac7..abb0c8ec6371 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -309,7 +309,8 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, nft_setelem_expr_foreach(expr, elem_expr, size) { if (expr->ops->gc && - expr->ops->gc(read_pnet(&set->net), expr)) + expr->ops->gc(read_pnet(&set->net), expr) && + set->flags & NFT_SET_EVAL) return true; } diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 681301b46aa4..0c63d1367cf7 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -335,13 +335,13 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = { [NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 }, [NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 }, - [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, + [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 }, }; static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, struct nft_tunnel_opts *opts) { - struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len; + struct geneve_opt *opt = (struct geneve_opt *)(opts->u.data + opts->len); struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1]; int err, data_len; @@ -625,7 +625,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, if (!inner) goto failure; while (opts->len > offset) { - opt = (struct geneve_opt *)opts->u.data + offset; + opt = (struct geneve_opt *)(opts->u.data + offset); if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, opt->opt_class) || nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 704c858cf209..61fea7baae5d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -947,12 +947,6 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, pskb_trim(skb, ovs_mac_header_len(key)); } - /* Need to set the pkt_type to involve the routing layer. The - * packet movement through the OVS datapath doesn't generally - * use routing, but this is needed for tunnel cases. - */ - skb->pkt_type = PACKET_OUTGOING; - if (likely(!mru || (skb->len <= mru + vport->dev->hard_header_len))) { ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index ae5dea7c48a8..2cef4b08befb 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -68,7 +68,7 @@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, - .len = 128 }, + .len = 127 }, }; static const struct nla_policy diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 03505673d523..099ff6a3e1f5 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -766,7 +766,7 @@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, - .len = 128 }, + .len = 127 }, }; static const struct nla_policy diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index defb05c1fba4..f74a097f54ae 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1267,38 +1267,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_MODULES - if (ops == NULL && kind != NULL) { - char name[IFNAMSIZ]; - if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { - /* We dropped the RTNL semaphore in order to - * perform the module load. So, even if we - * succeeded in loading the module we have to - * tell the caller to replay the request. We - * indicate this using -EAGAIN. - * We replay the request because the device may - * go away in the mean time. - */ - netdev_unlock_ops(dev); - rtnl_unlock(); - request_module(NET_SCH_ALIAS_PREFIX "%s", name); - rtnl_lock(); - netdev_lock_ops(dev); - ops = qdisc_lookup_ops(kind); - if (ops != NULL) { - /* We will try again qdisc_lookup_ops, - * so don't keep a reference. - */ - module_put(ops->owner); - err = -EAGAIN; - goto err_out; - } - } - } -#endif - - err = -ENOENT; if (!ops) { + err = -ENOENT; NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } @@ -1623,8 +1593,7 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack, struct net_device *dev, struct nlattr *tca[TCA_MAX + 1], - struct tcmsg *tcm, - bool *replay) + struct tcmsg *tcm) { struct Qdisc *q = NULL; struct Qdisc *p = NULL; @@ -1789,13 +1758,8 @@ create_n_graft2: tcm->tcm_parent, tcm->tcm_handle, tca, &err, extack); } - if (q == NULL) { - if (err == -EAGAIN) { - *replay = true; - return 0; - } + if (!q) return err; - } graft: err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); @@ -1808,6 +1772,27 @@ graft: return 0; } +static void request_qdisc_module(struct nlattr *kind) +{ + struct Qdisc_ops *ops; + char name[IFNAMSIZ]; + + if (!kind) + return; + + ops = qdisc_lookup_ops(kind); + if (ops) { + module_put(ops->owner); + return; + } + + if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { + rtnl_unlock(); + request_module(NET_SCH_ALIAS_PREFIX "%s", name); + rtnl_lock(); + } +} + /* * Create/change qdisc. */ @@ -1818,27 +1803,23 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct tcmsg *tcm; - bool replay; int err; -replay: - /* Reinit, just in case something touches this. */ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; + request_qdisc_module(tca[TCA_KIND]); + tcm = nlmsg_data(n); dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; - replay = false; netdev_lock_ops(dev); - err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay); + err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm); netdev_unlock_ops(dev); - if (replay) - goto replay; return err; } diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 20ff7386b74b..f485f62ab721 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -123,8 +123,6 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Check to update highest and lowest priorities. */ if (skb_queue_empty(lp_qdisc)) { if (q->lowest_prio == q->highest_prio) { - /* The incoming packet is the only packet in queue. */ - BUG_ON(sch->q.qlen != 1); q->lowest_prio = prio; q->highest_prio = prio; } else { @@ -156,7 +154,6 @@ static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) /* Update highest priority field. */ if (skb_queue_empty(hpq)) { if (q->lowest_prio == q->highest_prio) { - BUG_ON(sch->q.qlen); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } else { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 8e1e97be4df7..ee3eac338a9d 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -525,6 +525,8 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, return ret; } +static DEFINE_MUTEX(sctp_sysctl_mutex); + static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -549,6 +551,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, if (new_value > max || new_value < min) return -EINVAL; + mutex_lock(&sctp_sysctl_mutex); net->sctp.udp_port = new_value; sctp_udp_sock_stop(net); if (new_value) { @@ -561,6 +564,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, lock_sock(sk); sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); release_sock(sk); + mutex_unlock(&sctp_sysctl_mutex); } return ret; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 9a27201638e2..8f2d65c1e831 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -138,60 +138,6 @@ out: return ret; } -/** - * krb5_decrypt - simple decryption of an RPCSEC GSS payload - * @tfm: initialized cipher transform - * @iv: pointer to an IV - * @in: ciphertext to decrypt - * @out: OUT: plaintext - * @length: length of input and output buffers, in bytes - * - * @iv may be NULL to force the use of an all-zero IV. - * The buffer containing the IV must be as large as the - * cipher's ivsize. - * - * Return values: - * %0: @in successfully decrypted into @out - * negative errno: @in not decrypted - */ -u32 -krb5_decrypt( - struct crypto_sync_skcipher *tfm, - void * iv, - void * in, - void * out, - int length) -{ - u32 ret = -EINVAL; - struct scatterlist sg[1]; - u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - - if (length % crypto_sync_skcipher_blocksize(tfm) != 0) - goto out; - - if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { - dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", - crypto_sync_skcipher_ivsize(tfm)); - goto out; - } - if (iv) - memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm)); - - memcpy(out, in, length); - sg_init_one(sg, out, length); - - skcipher_request_set_sync_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, length, local_iv); - - ret = crypto_skcipher_decrypt(req); - skcipher_request_zero(req); -out: - dprintk("RPC: gss_k5decrypt returns %d\n",ret); - return ret; -} - static int checksummer(struct scatterlist *sg, void *data) { @@ -202,96 +148,6 @@ checksummer(struct scatterlist *sg, void *data) return crypto_ahash_update(req); } -/* - * checksum the plaintext data and hdrlen bytes of the token header - * The checksum is performed over the first 8 bytes of the - * gss token header and then over the data body - */ -u32 -make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *cksumkey, - unsigned int usage, struct xdr_netobj *cksumout) -{ - struct crypto_ahash *tfm; - struct ahash_request *req; - struct scatterlist sg[1]; - int err = -1; - u8 *checksumdata; - unsigned int checksumlen; - - if (cksumout->len < kctx->gk5e->cksumlength) { - dprintk("%s: checksum buffer length, %u, too small for %s\n", - __func__, cksumout->len, kctx->gk5e->name); - return GSS_S_FAILURE; - } - - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); - if (checksumdata == NULL) - return GSS_S_FAILURE; - - tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto out_free_cksum; - - req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req) - goto out_free_ahash; - - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); - - checksumlen = crypto_ahash_digestsize(tfm); - - if (cksumkey != NULL) { - err = crypto_ahash_setkey(tfm, cksumkey, - kctx->gk5e->keylength); - if (err) - goto out; - } - - err = crypto_ahash_init(req); - if (err) - goto out; - sg_init_one(sg, header, hdrlen); - ahash_request_set_crypt(req, sg, NULL, hdrlen); - err = crypto_ahash_update(req); - if (err) - goto out; - err = xdr_process_buf(body, body_offset, body->len - body_offset, - checksummer, req); - if (err) - goto out; - ahash_request_set_crypt(req, NULL, checksumdata, 0); - err = crypto_ahash_final(req); - if (err) - goto out; - - switch (kctx->gk5e->ctype) { - case CKSUMTYPE_RSA_MD5: - err = krb5_encrypt(kctx->seq, NULL, checksumdata, - checksumdata, checksumlen); - if (err) - goto out; - memcpy(cksumout->data, - checksumdata + checksumlen - kctx->gk5e->cksumlength, - kctx->gk5e->cksumlength); - break; - case CKSUMTYPE_HMAC_SHA1_DES3: - memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength); - break; - default: - BUG(); - break; - } - cksumout->len = kctx->gk5e->cksumlength; -out: - ahash_request_free(req); -out_free_ahash: - crypto_free_ahash(tfm); -out_free_cksum: - kfree(checksumdata); - return err ? GSS_S_FAILURE : 0; -} - /** * gss_krb5_checksum - Compute the MAC for a GSS Wrap or MIC token * @tfm: an initialized hash transform diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h index a47e9ec228a5..8769e9e705bf 100644 --- a/net/sunrpc/auth_gss/gss_krb5_internal.h +++ b/net/sunrpc/auth_gss/gss_krb5_internal.h @@ -155,10 +155,6 @@ static inline int krb5_derive_key(struct krb5_ctx *kctx, void krb5_make_confounder(u8 *p, int conflen); -u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *cksumkey, - unsigned int usage, struct xdr_netobj *cksumout); - u32 gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen, const struct xdr_buf *body, int body_offset, struct xdr_netobj *cksumout); @@ -166,9 +162,6 @@ u32 gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen, u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); -u32 krb5_decrypt(struct crypto_sync_skcipher *key, void *iv, void *in, - void *out, int length); - int xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7ce5e28a6c03..004cdb59f010 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,9 +1536,13 @@ static ssize_t write_flush(struct file *file, const char __user *buf, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. + * + * If someone frequently calls the flush interface, we should + * immediately clean the corresponding cache_detail instead of + * continuously accumulating nextcheck. */ - if (cd->flush_time >= now) + if (cd->flush_time >= now && cd->flush_time < (now + 5)) now = cd->flush_time + 1; cd->flush_time = now; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2fe88ea79a70..6f75862d9782 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -270,9 +270,6 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, old = rcu_dereference_protected(clnt->cl_xprt, lockdep_is_held(&clnt->cl_lock)); - if (!xprt_bound(xprt)) - clnt->cl_autobind = 1; - clnt->cl_timeout = timeout; rcu_assign_pointer(clnt->cl_xprt, xprt); spin_unlock(&clnt->cl_lock); @@ -512,6 +509,8 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, clnt->cl_discrtry = 1; if (!(args->flags & RPC_CLNT_CREATE_QUIET)) clnt->cl_chatty = 1; + if (args->flags & RPC_CLNT_CREATE_NETUNREACH_FATAL) + clnt->cl_netunreach_fatal = 1; return clnt; } @@ -662,6 +661,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_noretranstimeo = clnt->cl_noretranstimeo; new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; + new->cl_netunreach_fatal = clnt->cl_netunreach_fatal; new->cl_principal = clnt->cl_principal; new->cl_max_connect = clnt->cl_max_connect; return new; @@ -1195,6 +1195,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; + if (clnt->cl_netunreach_fatal) + task->tk_flags |= RPC_TASK_NETUNREACH_FATAL; atomic_inc(&clnt->cl_task_count); } @@ -2102,14 +2104,17 @@ call_bind_status(struct rpc_task *task) case -EPROTONOSUPPORT: trace_rpcb_bind_version_err(task); goto retry_timeout; + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + break; + fallthrough; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: - case -ENETDOWN: case -EHOSTUNREACH: - case -ENETUNREACH: case -EPIPE: trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { @@ -2191,19 +2196,22 @@ call_connect_status(struct rpc_task *task) task->tk_status = 0; switch (status) { + case -ENETDOWN: + case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + break; + fallthrough; case -ECONNREFUSED: case -ECONNRESET: /* A positive refusal suggests a rebind is needed. */ - if (RPC_IS_SOFTCONN(task)) - break; if (clnt->cl_autobind) { rpc_force_rebind(clnt); + if (RPC_IS_SOFTCONN(task)) + break; goto out_retry; } fallthrough; case -ECONNABORTED: - case -ENETDOWN: - case -ENETUNREACH: case -EHOSTUNREACH: case -EPIPE: case -EPROTO: @@ -2455,10 +2463,13 @@ call_status(struct rpc_task *task) trace_rpc_call_status(task); task->tk_status = 0; switch(status) { - case -EHOSTDOWN: case -ENETDOWN: - case -EHOSTUNREACH: case -ENETUNREACH: + if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL) + goto out_exit; + fallthrough; + case -EHOSTDOWN: + case -EHOSTUNREACH: case -EPERM: if (RPC_IS_SOFTCONN(task)) goto out_exit; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 102c3818bc54..53bcca365fb1 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -820,9 +820,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) } trace_rpcb_setport(child, map->r_status, map->r_port); - xprt->ops->set_port(xprt, map->r_port); - if (map->r_port) + if (map->r_port) { + xprt->ops->set_port(xprt, map->r_port); xprt_set_bound(xprt); + } } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9b45fbdc90ca..73bc39281ef5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -276,6 +276,8 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) { + if (unlikely(current->flags & PF_EXITING)) + return -EINTR; schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 5c8ecdaaa985..09434e1143c5 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -59,6 +59,16 @@ static struct kobject *rpc_sysfs_object_alloc(const char *name, return NULL; } +static inline struct rpc_clnt * +rpc_sysfs_client_kobj_get_clnt(struct kobject *kobj) +{ + struct rpc_sysfs_client *c = container_of(kobj, + struct rpc_sysfs_client, kobject); + struct rpc_clnt *ret = c->clnt; + + return refcount_inc_not_zero(&ret->cl_count) ? ret : NULL; +} + static inline struct rpc_xprt * rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj) { @@ -86,6 +96,51 @@ rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj) return xprt_switch_get(x->xprt_switch); } +static ssize_t rpc_sysfs_clnt_version_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj); + ssize_t ret; + + if (!clnt) + return sprintf(buf, "<closed>\n"); + + ret = sprintf(buf, "%u", clnt->cl_vers); + refcount_dec(&clnt->cl_count); + return ret; +} + +static ssize_t rpc_sysfs_clnt_program_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj); + ssize_t ret; + + if (!clnt) + return sprintf(buf, "<closed>\n"); + + ret = sprintf(buf, "%s", clnt->cl_program->name); + refcount_dec(&clnt->cl_count); + return ret; +} + +static ssize_t rpc_sysfs_clnt_max_connect_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj); + ssize_t ret; + + if (!clnt) + return sprintf(buf, "<closed>\n"); + + ret = sprintf(buf, "%u\n", clnt->cl_max_connect); + refcount_dec(&clnt->cl_count); + return ret; +} + static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -129,6 +184,31 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, return ret; } +static const char *xprtsec_strings[] = { + [RPC_XPRTSEC_NONE] = "none", + [RPC_XPRTSEC_TLS_ANON] = "tls-anon", + [RPC_XPRTSEC_TLS_X509] = "tls-x509", +}; + +static ssize_t rpc_sysfs_xprt_xprtsec_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + ssize_t ret; + + if (!xprt) { + ret = sprintf(buf, "<closed>\n"); + goto out; + } + + ret = sprintf(buf, "%s\n", xprtsec_strings[xprt->xprtsec.policy]); + xprt_put(xprt); +out: + return ret; + +} + static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -206,6 +286,14 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, return ret; } +static ssize_t rpc_sysfs_xprt_del_xprt_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "# delete this xprt\n"); +} + + static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -225,6 +313,55 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, return ret; } +static ssize_t rpc_sysfs_xprt_switch_add_xprt_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "# add one xprt to this xprt_switch\n"); +} + +static ssize_t rpc_sysfs_xprt_switch_add_xprt_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt_switch *xprt_switch = + rpc_sysfs_xprt_switch_kobj_get_xprt(kobj); + struct xprt_create xprt_create_args; + struct rpc_xprt *xprt, *new; + + if (!xprt_switch) + return 0; + + xprt = rpc_xprt_switch_get_main_xprt(xprt_switch); + if (!xprt) + goto out; + + xprt_create_args.ident = xprt->xprt_class->ident; + xprt_create_args.net = xprt->xprt_net; + xprt_create_args.dstaddr = (struct sockaddr *)&xprt->addr; + xprt_create_args.addrlen = xprt->addrlen; + xprt_create_args.servername = xprt->servername; + xprt_create_args.bc_xprt = xprt->bc_xprt; + xprt_create_args.xprtsec = xprt->xprtsec; + xprt_create_args.connect_timeout = xprt->connect_timeout; + xprt_create_args.reconnect_timeout = xprt->max_reconnect_timeout; + + new = xprt_create_transport(&xprt_create_args); + if (IS_ERR_OR_NULL(new)) { + count = PTR_ERR(new); + goto out_put_xprt; + } + + rpc_xprt_switch_add_xprt(xprt_switch, new); + xprt_put(new); + +out_put_xprt: + xprt_put(xprt); +out: + xprt_switch_put(xprt_switch); + return count; +} + static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -335,6 +472,40 @@ out_put: return count; } +static ssize_t rpc_sysfs_xprt_del_xprt(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); + + if (!xprt || !xps) { + count = 0; + goto out; + } + + if (xprt->main) { + count = -EINVAL; + goto release_tasks; + } + + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { + count = -EINTR; + goto out_put; + } + + xprt_set_offline_locked(xprt, xps); + xprt_delete_locked(xprt, xps); + +release_tasks: + xprt_release_write(xprt, NULL); +out_put: + xprt_put(xprt); + xprt_switch_put(xps); +out: + return count; +} + int rpc_sysfs_init(void) { rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj); @@ -398,23 +569,48 @@ static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj) kobject)->xprt->xprt_net; } +static struct kobj_attribute rpc_sysfs_clnt_version = __ATTR(rpc_version, + 0444, rpc_sysfs_clnt_version_show, NULL); + +static struct kobj_attribute rpc_sysfs_clnt_program = __ATTR(program, + 0444, rpc_sysfs_clnt_program_show, NULL); + +static struct kobj_attribute rpc_sysfs_clnt_max_connect = __ATTR(max_connect, + 0444, rpc_sysfs_clnt_max_connect_show, NULL); + +static struct attribute *rpc_sysfs_rpc_clnt_attrs[] = { + &rpc_sysfs_clnt_version.attr, + &rpc_sysfs_clnt_program.attr, + &rpc_sysfs_clnt_max_connect.attr, + NULL, +}; +ATTRIBUTE_GROUPS(rpc_sysfs_rpc_clnt); + static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr, 0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store); static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr, 0644, rpc_sysfs_xprt_srcaddr_show, NULL); +static struct kobj_attribute rpc_sysfs_xprt_xprtsec = __ATTR(xprtsec, + 0644, rpc_sysfs_xprt_xprtsec_show, NULL); + static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info, 0444, rpc_sysfs_xprt_info_show, NULL); static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state, 0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change); +static struct kobj_attribute rpc_sysfs_xprt_del = __ATTR(del_xprt, + 0644, rpc_sysfs_xprt_del_xprt_show, rpc_sysfs_xprt_del_xprt); + static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_dstaddr.attr, &rpc_sysfs_xprt_srcaddr.attr, + &rpc_sysfs_xprt_xprtsec.attr, &rpc_sysfs_xprt_info.attr, &rpc_sysfs_xprt_change_state.attr, + &rpc_sysfs_xprt_del.attr, NULL, }; ATTRIBUTE_GROUPS(rpc_sysfs_xprt); @@ -422,14 +618,20 @@ ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); +static struct kobj_attribute rpc_sysfs_xprt_switch_add_xprt = + __ATTR(add_xprt, 0644, rpc_sysfs_xprt_switch_add_xprt_show, + rpc_sysfs_xprt_switch_add_xprt_store); + static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, + &rpc_sysfs_xprt_switch_add_xprt.attr, NULL, }; ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static const struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, + .default_groups = rpc_sysfs_rpc_clnt_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_client_namespace, }; diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 7e98d4dd9f10..4c5e08b0aa64 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -92,6 +92,27 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, xprt_put(xprt); } +/** + * rpc_xprt_switch_get_main_xprt - Get the 'main' xprt for an xprt switch. + * @xps: pointer to struct rpc_xprt_switch. + */ +struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps) +{ + struct rpc_xprt_iter xpi; + struct rpc_xprt *xprt; + + xprt_iter_init_listall(&xpi, xps); + + xprt = xprt_iter_get_next(&xpi); + while (xprt && !xprt->main) { + xprt_put(xprt); + xprt = xprt_iter_get_next(&xpi); + } + + xprt_iter_destroy(&xpi); + return xprt; +} + static DEFINE_IDA(rpc_xprtswitch_ids); void xprt_multipath_cleanup_ids(void) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c3fbf0779d4a..aca8bdf65d72 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -621,7 +621,8 @@ static void __svc_rdma_free(struct work_struct *work) /* Destroy the CM ID */ rdma_destroy_id(rdma->sc_cm_id); - rpcrdma_rn_unregister(device, &rdma->sc_rn); + if (!test_bit(XPT_LISTENER, &rdma->sc_xprt.xpt_flags)) + rpcrdma_rn_unregister(device, &rdma->sc_rn); kfree(rdma); } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7e3db87ae433..fc6afbc8d680 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1551,7 +1551,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { + /* If the socket is already closing or it is in an error state, there + * is no point in waiting. + */ + while (sk->sk_state != TCP_ESTABLISHED && + sk->sk_state != TCP_CLOSING && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index e5d104ce7b82..5696af45bcf7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -806,8 +806,11 @@ static int __xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ - if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr)) + err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr); + if (err) { + err = -EAGAIN; goto out; + } skb = xsk_build_skb(xs, &desc); if (IS_ERR(skb)) { |
