summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-11-21 18:02:10 +0100
committerJakub Kicinski <kuba@kernel.org>2025-11-24 19:49:42 -0800
commit0eeb372deebce6c25b9afc09e35d6c75a744299a (patch)
tree805dffcd98cb483d63d892b2784a15293788c0c5
parent38a4a469c850fc007d6fe2429b1f7f492e50e7ad (diff)
mptcp: handle first subflow closing consistently
Currently, as soon as the PM closes a subflow, the msk stops accepting data from it, even if the TCP socket could be still formally open in the incoming direction, with the notable exception of the first subflow. The root cause of such behavior is that code currently piggy back two separate semantic on the subflow->disposable bit: the subflow context must be released and that the subflow must stop accepting incoming data. The first subflow is never disposed, so it also never stop accepting incoming data. Use a separate bit to mark the latter status and set such bit in __mptcp_close_ssk() for all subflows. Beyond making per subflow behaviour more consistent this will also simplify the next patch. Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-11-1f34b6c1e0b1@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/mptcp/protocol.c14
-rw-r--r--net/mptcp/protocol.h3
2 files changed, 11 insertions, 6 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ba1237853ebf..d22f792f4760 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -851,10 +851,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
struct mptcp_sock *msk = mptcp_sk(sk);
/* The peer can send data while we are shutting down this
- * subflow at msk destruction time, but we must avoid enqueuing
+ * subflow at subflow destruction time, but we must avoid enqueuing
* more data to the msk receive queue
*/
- if (unlikely(subflow->disposable))
+ if (unlikely(subflow->closing))
return;
mptcp_data_lock(sk);
@@ -2437,6 +2437,13 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_sock *msk = mptcp_sk(sk);
bool dispose_it, need_push = false;
+ /* Do not pass RX data to the msk, even if the subflow socket is not
+ * going to be freed (i.e. even for the first subflow on graceful
+ * subflow close.
+ */
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ subflow->closing = 1;
+
/* If the first subflow moved to a close state before accept, e.g. due
* to an incoming reset or listener shutdown, the subflow socket is
* already deleted by inet_child_forget() and the mptcp socket can't
@@ -2447,7 +2454,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* ensure later check in mptcp_worker() will dispose the msk */
sock_set_flag(sk, SOCK_DEAD);
mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1));
- lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
}
@@ -2456,8 +2462,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (dispose_it)
list_del(&subflow->node);
- lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
-
if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
tcp_set_state(ssk, TCP_CLOSE);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3d2892cc0ef2..d30806b287d2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -536,12 +536,13 @@ struct mptcp_subflow_context {
send_infinite_map : 1,
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
+ closing : 1, /* must not pass rx data to msk anymore */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
- __unused : 9;
+ __unused : 8;
bool data_avail;
bool scheduled;
bool pm_listener; /* a listener managed by the kernel PM? */