summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/devmem.c11
-rw-r--r--net/core/failover.c6
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/gro.c7
-rw-r--r--net/core/netmem_priv.h23
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/page_pool.c24
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c56
-rw-r--r--net/core/skmsg.c9
11 files changed, 114 insertions, 52 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8bfa8313ef62..0c6c270d9f7d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6862,9 +6862,9 @@ static void skb_defer_free_flush(void)
#if defined(CONFIG_NET_RX_BUSY_POLL)
-static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
+static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout)
{
- if (!skip_schedule) {
+ if (!timeout) {
gro_normal_list(&napi->gro);
__napi_schedule(napi);
return;
@@ -6874,6 +6874,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
gro_flush_normal(&napi->gro, HZ >= 1000);
clear_bit(NAPI_STATE_SCHED, &napi->state);
+ hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
}
enum {
@@ -6885,8 +6887,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
unsigned flags, u16 budget)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
- bool skip_schedule = false;
- unsigned long timeout;
+ unsigned long timeout = 0;
int rc;
/* Busy polling means there is a high chance device driver hard irq
@@ -6906,10 +6907,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
- timeout = napi_get_gro_flush_timeout(napi);
- if (napi->defer_hard_irqs_count && timeout) {
- hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
- skip_schedule = true;
+ if (napi->defer_hard_irqs_count) {
+ /* A short enough gro flush timeout and long enough
+ * poll can result in timer firing too early.
+ * Timer will be armed later if necessary.
+ */
+ timeout = napi_get_gro_flush_timeout(napi);
}
}
@@ -6924,7 +6927,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
trace_napi_poll(napi, rc, budget);
netpoll_poll_unlock(have_poll_lock);
if (rc == budget)
- __busy_poll_stop(napi, skip_schedule);
+ __busy_poll_stop(napi, timeout);
bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
}
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 468344739db2..4f71de44c0fb 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -241,6 +241,11 @@ net_devmem_bind_dmabuf(struct net_device *dev,
}
if (direction == DMA_TO_DEVICE) {
+ if (!IS_ALIGNED(dmabuf->size, PAGE_SIZE)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "TX dma-buf size must be a multiple of PAGE_SIZE");
+ goto err_unmap;
+ }
binding->tx_vec = kvmalloc_objs(struct net_iov *,
dmabuf->size / PAGE_SIZE);
if (!binding->tx_vec) {
@@ -267,6 +272,12 @@ net_devmem_bind_dmabuf(struct net_device *dev,
size_t len = sg_dma_len(sg);
struct net_iov *niov;
+ if (!IS_ALIGNED(len, PAGE_SIZE)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "dma-buf SG length must be PAGE_SIZE aligned");
+ goto err_free_chunks;
+ }
+
owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
dev_to_node(&dev->dev));
if (!owner) {
diff --git a/net/core/failover.c b/net/core/failover.c
index 11bb183c7a1b..e43c59cd6868 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -12,6 +12,7 @@
#include <uapi/linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <net/netdev_lock.h>
#include <net/failover.h>
static LIST_HEAD(failover_list);
@@ -221,8 +222,11 @@ failover_existing_slave_register(struct net_device *failover_dev)
for_each_netdev(net, dev) {
if (netif_is_failover(dev))
continue;
- if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+ if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) {
+ netdev_lock_ops(dev);
failover_slave_register(dev);
+ netdev_unlock_ops(dev);
+ }
}
rtnl_unlock();
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 9590877b0714..80439767e0ee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2869,7 +2869,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
psge->length = start - offset;
rsge.length -= psge->length;
- rsge.offset += start;
+ rsge.offset += start - offset;
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
diff --git a/net/core/gro.c b/net/core/gro.c
index 31d21de5b15a..a84753983467 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -109,6 +109,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (p->pp_recycle != skb->pp_recycle)
return -ETOOMANYREFS;
+ if (skb_zcopy(p) || skb_zcopy(skb))
+ return -ETOOMANYREFS;
+
if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) ||
NAPI_GRO_CB(skb)->flush))
return -E2BIG;
@@ -213,10 +216,12 @@ done:
p->data_len += len;
p->truesize += delta_truesize;
p->len += len;
+ skb_shinfo(p)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG;
if (lp != p) {
lp->data_len += len;
lp->truesize += delta_truesize;
lp->len += len;
+ skb_shinfo(lp)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG;
}
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
@@ -244,6 +249,8 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
p->truesize += skb->truesize;
p->len += skb->len;
+ skb_shinfo(p)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
index 3e6fde8f1726..23175cb2bd86 100644
--- a/net/core/netmem_priv.h
+++ b/net/core/netmem_priv.h
@@ -8,18 +8,21 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
}
-static inline bool netmem_is_pp(netmem_ref netmem)
+static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
+{
+ netmem_to_nmdesc(netmem)->pp_magic |= pp_magic;
+}
+
+static inline void netmem_clear_pp_magic(netmem_ref netmem)
{
- struct page *page;
+ WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK);
- /* XXX: Now that the offset of page_type is shared between
- * struct page and net_iov, just cast the netmem to struct page
- * unconditionally by clearing NET_IOV if any, no matter whether
- * it comes from struct net_iov or struct page. This should be
- * adjusted once the offset is no longer shared.
- */
- page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
- return PageNetpp(page);
+ netmem_to_nmdesc(netmem)->pp_magic = 0;
+}
+
+static inline bool netmem_is_pp(netmem_ref netmem)
+{
+ return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE;
}
static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 84faace50ac2..3f4a17fa5713 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -319,6 +319,8 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
lockdep_assert_irqs_disabled();
dev = np->dev;
+ /* npinfo->txq belongs to np->dev, so retries must stay bound to it. */
+ skb->dev = dev;
rcu_read_lock();
npinfo = rcu_dereference_bh(dev->npinfo);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 6e576dec80db..8171d1173221 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -707,18 +707,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
{
- struct page *page;
-
netmem_set_pp(netmem, pool);
-
- /* XXX: Now that the offset of page_type is shared between
- * struct page and net_iov, just cast the netmem to struct page
- * unconditionally by clearing NET_IOV if any, no matter whether
- * it comes from struct net_iov or struct page. This should be
- * adjusted once the offset is no longer shared.
- */
- page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
- __SetPageNetpp(page);
+ netmem_or_pp_magic(netmem, PP_SIGNATURE);
/* Ensuring all pages have been split into one fragment initially:
* page_pool_set_pp_info() is only called once for every page when it
@@ -733,17 +723,7 @@ void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
void page_pool_clear_pp_info(netmem_ref netmem)
{
- struct page *page;
-
- /* XXX: Now that the offset of page_type is shared between
- * struct page and net_iov, just cast the netmem to struct page
- * unconditionally by clearing NET_IOV if any, no matter whether
- * it comes from struct net_iov or struct page. This should be
- * adjusted once the offset is no longer shared.
- */
- page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
- __ClearPageNetpp(page);
-
+ netmem_clear_pp_magic(netmem);
netmem_set_pp(netmem, NULL);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index df042da422ef..511c25bf6f2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -6328,8 +6328,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
0, &filters, &idxattr, &prividx, extack);
if (err < 0) {
- /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
- WARN_ON(err == -EMSGSIZE);
+ /* -EMSGSIZE implies BUG in if_nlmsg_stats_size
+ * or a too big nested attribute.
+ */
kfree_skb(nskb);
} else {
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7dad68e3b518..c02f0a507ba8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2248,6 +2248,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
skb_frag_ref(skb, i);
}
skb_shinfo(n)->nr_frags = i;
+ skb_shinfo(n)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
}
if (skb_has_frag_list(skb)) {
@@ -2786,6 +2787,8 @@ done:
skb->data_len = 0;
skb_set_tail_pointer(skb, len);
}
+ if (!skb_shinfo(skb)->nr_frags && !skb_has_frag_list(skb))
+ skb->unreadable = 0;
if (!skb->sk || skb->destructor == sock_edemux)
skb_condense(skb);
@@ -2793,16 +2796,37 @@ done:
}
EXPORT_SYMBOL(___pskb_trim);
+static int pskb_trim_rcsum_complete(struct sk_buff *skb, unsigned int len)
+{
+ int delta = skb->len - len;
+
+ if (skb_frags_readable(skb)) {
+ skb->csum = csum_block_sub(skb->csum,
+ skb_checksum(skb, len, delta, 0),
+ len);
+ return 0;
+ }
+
+ if (len > skb_headlen(skb))
+ return -EFAULT;
+
+ /* The trimmed bytes are unreadable, but the remaining packet can be
+ * checksummed by software after trimming.
+ */
+ skb->ip_summed = CHECKSUM_NONE;
+ return 0;
+}
+
/* Note : use pskb_trim_rcsum() instead of calling this directly
*/
int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- int delta = skb->len - len;
+ int err;
- skb->csum = csum_block_sub(skb->csum,
- skb_checksum(skb, len, delta, 0),
- len);
+ err = pskb_trim_rcsum_complete(skb, len);
+ if (err)
+ return err;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
@@ -4349,6 +4373,8 @@ onlymerged:
tgt->ip_summed = CHECKSUM_PARTIAL;
skb->ip_summed = CHECKSUM_PARTIAL;
+ skb_shinfo(tgt)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+
skb_len_add(skb, -shiftlen);
skb_len_add(tgt, shiftlen);
@@ -4959,7 +4985,8 @@ normal:
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
+ skb_shinfo(nskb)->flags |= (skb_shinfo(head_skb)->flags |
+ skb_shinfo(frag_skb)->flags) &
SKBFL_SHARED_FRAG;
if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
@@ -4976,6 +5003,9 @@ normal:
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
+
+ skb_shinfo(nskb)->flags |= skb_shinfo(frag_skb)->flags & SKBFL_SHARED_FRAG;
+
if (!skb_headlen(list_skb)) {
BUG_ON(!nfrags);
} else {
@@ -6200,6 +6230,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
from_shinfo->frags,
from_shinfo->nr_frags * sizeof(skb_frag_t));
to_shinfo->nr_frags += from_shinfo->nr_frags;
+ if (from_shinfo->nr_frags)
+ to_shinfo->flags |= from_shinfo->flags & SKBFL_SHARED_FRAG;
if (!skb_cloned(from))
from_shinfo->nr_frags = 0;
@@ -6791,6 +6823,11 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
skb->len -= off;
+ /* Remove SKBFL_MANAGED_FRAG_REFS instead of trying to honour it
+ * while refcounting frags below.
+ */
+ skb_zcopy_downgrade_managed(skb);
+
memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb),
offsetof(struct skb_shared_info,
@@ -6801,6 +6838,8 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb_kfree_head(data);
return -ENOMEM;
}
+ if (skb_zcopy(skb))
+ net_zcopy_get(skb_zcopy(skb));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
@@ -6902,6 +6941,11 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
return -ENOMEM;
size = SKB_WITH_OVERHEAD(size);
+ /* Remove SKBFL_MANAGED_FRAG_REFS instead of trying to honour it
+ * while refcounting frags below.
+ */
+ skb_zcopy_downgrade_managed(skb);
+
memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
if (skb_orphan_frags(skb, gfp_mask)) {
@@ -6944,6 +6988,8 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_kfree_head(data);
return -ENOMEM;
}
+ if (skb_zcopy(skb))
+ net_zcopy_get(skb_zcopy(skb));
skb_release_data(skb, SKB_CONSUMED);
skb->head = data;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6187a83bd741..e1850caf1a71 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1268,12 +1268,19 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
const struct proto_ops *ops = NULL;
+ struct sk_psock *psock;
struct socket *sock;
int copied;
trace_sk_data_ready(sk);
rcu_read_lock();
+ psock = sk_psock(sk);
+ if (psock && tls_sw_has_ctx_rx(sk)) {
+ psock->saved_data_ready(sk);
+ rcu_read_unlock();
+ return;
+ }
sock = READ_ONCE(sk->sk_socket);
if (likely(sock))
ops = READ_ONCE(sock->ops);
@@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
- struct sk_psock *psock;
-
rcu_read_lock();
psock = sk_psock(sk);
if (psock)