diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2023-05-28 14:55:30 +0100 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2023-05-28 14:55:30 +0100 |
| commit | 8a29f74b7498de8b23bbbbc665b9c14ad07175d4 (patch) | |
| tree | 39245b2f585068bf220da7b2291ec24cfdb11f8e /net/core | |
| parent | c21c0f9a20a963f5a1874657a4e3d657503f7815 (diff) | |
| parent | 7877cb91f1081754a1487c144d85dc0d2e2e7fc4 (diff) | |
Merge v6.4-rc4 into char-misc-next
We need the binder fixes in here for future changes and testing.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/datagram.c | 15 | ||||
| -rw-r--r-- | net/core/dev.c | 2 | ||||
| -rw-r--r-- | net/core/page_pool.c | 28 | ||||
| -rw-r--r-- | net/core/skbuff.c | 8 | ||||
| -rw-r--r-- | net/core/skmsg.c | 81 | ||||
| -rw-r--r-- | net/core/sock_map.c | 3 | ||||
| -rw-r--r-- | net/core/stream.c | 12 |
7 files changed, 88 insertions, 61 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 5662dff3d381..176eb5834746 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -807,18 +807,21 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; __poll_t mask; + u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); - if (sk->sk_shutdown & RCV_SHUTDOWN) + shutdown = READ_ONCE(sk->sk_shutdown); + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* readable? */ @@ -827,10 +830,12 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, /* Connection-based need to check for termination and startup */ if (connection_based(sk)) { - if (sk->sk_state == TCP_CLOSE) + int state = READ_ONCE(sk->sk_state); + + if (state == TCP_CLOSE) mask |= EPOLLHUP; /* connection hasn't started yet? */ - if (sk->sk_state == TCP_SYN_SENT) + if (state == TCP_SYN_SENT) return mask; } diff --git a/net/core/dev.c b/net/core/dev.c index 735096d42c1d..b3c13e041935 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3335,7 +3335,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - return __vlan_get_protocol(skb, type, depth); + return vlan_get_protocol_and_depth(skb, type, depth); } /* openvswitch calls this on rx path, so we need a different check. diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e212e9d7edcb..a3e12a61d456 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -134,6 +134,29 @@ EXPORT_SYMBOL(page_pool_ethtool_stats_get); #define recycle_stat_add(pool, __stat, val) #endif +static bool page_pool_producer_lock(struct page_pool *pool) + __acquires(&pool->ring.producer_lock) +{ + bool in_softirq = in_softirq(); + + if (in_softirq) + spin_lock(&pool->ring.producer_lock); + else + spin_lock_bh(&pool->ring.producer_lock); + + return in_softirq; +} + +static void page_pool_producer_unlock(struct page_pool *pool, + bool in_softirq) + __releases(&pool->ring.producer_lock) +{ + if (in_softirq) + spin_unlock(&pool->ring.producer_lock); + else + spin_unlock_bh(&pool->ring.producer_lock); +} + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -617,6 +640,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count) { int i, bulk_len = 0; + bool in_softirq; for (i = 0; i < count; i++) { struct page *page = virt_to_head_page(data[i]); @@ -635,7 +659,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, return; /* Bulk producer into ptr_ring page_pool cache */ - page_pool_ring_lock(pool); + in_softirq = page_pool_producer_lock(pool); for (i = 0; i < bulk_len; i++) { if (__ptr_ring_produce(&pool->ring, data[i])) { /* ring full */ @@ -644,7 +668,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, } } recycle_stat_add(pool, ring, i); - page_pool_ring_unlock(pool); + page_pool_producer_unlock(pool, in_softirq); /* Hopefully all pages was return into ptr_ring */ if (likely(i == bulk_len)) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 26a586007d8b..cea28d30abb5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5224,8 +5224,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, } else { skb = skb_clone(orig_skb, GFP_ATOMIC); - if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) + if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) { + kfree_skb(skb); return; + } } if (!skb) return; @@ -5298,7 +5300,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); u32 csum_start = skb_headroom(skb) + (u32)start; - if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { + if (unlikely(csum_start >= U16_MAX || csum_end > skb_headlen(skb))) { net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", start, off, skb_headroom(skb), skb_headlen(skb)); return false; @@ -5306,7 +5308,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = csum_start; skb->csum_offset = off; - skb_set_transport_header(skb, start); + skb->transport_header = csum_start; return true; } EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f81883759d38..a9060e1f0e43 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, msg_rx = sk_psock_peek_msg(psock); } out: - if (psock->work_state.skb && copied > 0) - schedule_work(&psock->work); return copied; } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); @@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, - struct sk_buff *skb, int len, int off) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { - state->skb = skb; state->len = len; state->off = off; - } else { - sock_drop(psock->sk, skb); } spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_backlog(struct work_struct *work) { - struct sk_psock *psock = container_of(work, struct sk_psock, work); + struct delayed_work *dwork = to_delayed_work(work); + struct sk_psock *psock = container_of(dwork, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; struct sk_buff *skb = NULL; + u32 len = 0, off = 0; bool ingress; - u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (unlikely(state->skb)) { - spin_lock_bh(&psock->ingress_lock); - skb = state->skb; + if (unlikely(state->len)) { len = state->len; off = state->off; - state->skb = NULL; - spin_unlock_bh(&psock->ingress_lock); } - if (skb) - goto start; - while ((skb = skb_dequeue(&psock->ingress_skb))) { + while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; off = 0; if (skb_bpf_strparser(skb)) { @@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work) off = stm->offset; len = stm->full_len; } -start: ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { @@ -678,22 +666,28 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - sk_psock_skb_state(psock, state, skb, - len, off); + sk_psock_skb_state(psock, state, len, off); + + /* Delay slightly to prioritize any + * other work that might be here. + */ + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + schedule_delayed_work(&psock->work, 1); goto end; } /* Hard errors break pipe and stop xmit. */ sk_psock_report_error(psock, ret ? -ret : EPIPE); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); - sock_drop(psock->sk, skb); goto end; } off += ret; len -= ret; } while (len); - if (!ingress) + skb = skb_dequeue(&psock->ingress_skb); + if (!ingress) { kfree_skb(skb); + } } end: mutex_unlock(&psock->work_mutex); @@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) INIT_LIST_HEAD(&psock->link); spin_lock_init(&psock->link_lock); - INIT_WORK(&psock->work, sk_psock_backlog); + INIT_DELAYED_WORK(&psock->work, sk_psock_backlog); mutex_init(&psock->work_mutex); INIT_LIST_HEAD(&psock->ingress_msg); spin_lock_init(&psock->ingress_lock); @@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } - kfree_skb(psock->work_state.skb); - /* We null the skb here to ensure that calls to sk_psock_backlog - * do not pick up the free'd skb. - */ - psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } @@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock) spin_lock_bh(&psock->ingress_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_cork_free(psock); - __sk_psock_zap_ingress(psock); spin_unlock_bh(&psock->ingress_lock); } @@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work) sk_psock_done_strp(psock); - cancel_work_sync(&psock->work); + cancel_delayed_work_sync(&psock->work); + __sk_psock_zap_ingress(psock); mutex_destroy(&psock->work_mutex); psock_progs_drop(&psock->progs); @@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) } skb_queue_tail(&psock_other->ingress_skb, skb); - schedule_work(&psock_other->work); + schedule_delayed_work(&psock_other->work, 0); spin_unlock_bh(&psock_other->ingress_lock); return 0; } @@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, err = -EIO; sk_other = psock->sk; if (sock_flag(sk_other, SOCK_DEAD) || - !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { - skb_bpf_redirect_clear(skb); + !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) goto out_free; - } skb_bpf_set_ingress(skb); @@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { skb_queue_tail(&psock->ingress_skb, skb); - schedule_work(&psock->work); + schedule_delayed_work(&psock->work, 0); err = 0; } spin_unlock_bh(&psock->ingress_lock); - if (err < 0) { - skb_bpf_redirect_clear(skb); + if (err < 0) goto out_free; - } } break; case __SK_REDIRECT: + tcp_eat_skb(psock->sk, skb); err = sk_psock_skb_redirect(psock, skb); break; case __SK_DROP: default: out_free: + skb_bpf_redirect_clear(skb); + tcp_eat_skb(psock->sk, skb); sock_drop(psock->sk, skb); } @@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk) psock = sk_psock(sk); if (likely(psock)) { if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - schedule_work(&psock->work); + schedule_delayed_work(&psock->work, 0); write_space = psock->saved_write_space; } rcu_read_unlock(); @@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); - if (ret == SK_PASS) - skb_bpf_set_strparser(skb); + skb_bpf_set_strparser(skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } @@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) int ret = __SK_DROP; int len = skb->len; - skb_get(skb); - rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { len = 0; + tcp_eat_skb(sk, skb); sock_drop(sk, skb); goto out; } @@ -1212,12 +1198,21 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { struct socket *sock = sk->sk_socket; + int copied; trace_sk_data_ready(sk); if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) return; - sock->ops->read_skb(sk, sk_psock_verdict_recv); + copied = sock->ops->read_skb(sk, sk_psock_verdict_recv); + if (copied >= 0) { + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + psock->saved_data_ready(sk); + rcu_read_unlock(); + } } void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 7c189c2e2fbf..00afb66cd095 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout) rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); - cancel_work_sync(&psock->work); + cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); } + /* Make sure we do not recurse. This is a bug. * Leak the socket instead of crashing on a stack overflow. */ diff --git a/net/core/stream.c b/net/core/stream.c index 434446ab14c5..f5c4e47df165 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -73,8 +73,8 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, - !sk->sk_err && - !((1 << sk->sk_state) & + !READ_ONCE(sk->sk_err) && + !((1 << READ_ONCE(sk->sk_state)) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait); remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; @@ -87,9 +87,9 @@ EXPORT_SYMBOL(sk_stream_wait_connect); * sk_stream_closing - Return 1 if we still have things to send in our buffers. * @sk: socket to verify */ -static inline int sk_stream_closing(struct sock *sk) +static int sk_stream_closing(const struct sock *sk) { - return (1 << sk->sk_state) & + return (1 << READ_ONCE(sk->sk_state)) & (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK); } @@ -142,8 +142,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, sk->sk_err || - (sk->sk_shutdown & SEND_SHUTDOWN) || + sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || (sk_stream_memory_free(sk) && !vm_wait), &wait); sk->sk_write_pending--; |
