summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Baerts (NGI0) <matttbe@kernel.org>2025-09-25 12:32:50 +0200
committerJakub Kicinski <kuba@kernel.org>2025-09-26 17:44:05 -0700
commit539f6b9de39ec5d827b16f6f5c8f3cfd58669e93 (patch)
treef7267a7e3154da1970c4853869ef77f185386979
parentf596293314b25fc494acb42f40ec256e4662d04f (diff)
mptcp: pm: in-kernel: add laminar endpoints
Currently, upon the reception of an ADD_ADDR (and when the fullmesh flag is not used), the in-kernel PM will create new subflows using the local address the routing configuration will pick. It would be easier to pick local addresses from a selected list of endpoints, and use it only once, than relying on routing rules. Use case: both the client (C) and the server (S) have two addresses (a and b). The client establishes the connection between C(a) and S(a). Once established, the server announces its additional address S(b). Once received, the client connects to it using its second address C(b). Compared to a situation without the 'laminar' endpoint for C(b), the client didn't use this address C(b) to establish a subflow to the server's primary address S(a). So at the end, we have: C S C(a) --- S(a) C(b) --- S(b) In case of a 3rd address on each side (C(c) and S(c)), upon the reception of an ADD_ADDR with S(c), the client should not pick C(b) because it has already been used. C(c) should then be used. Note that this situation is currently possible if C doesn't add any endpoint, but configure the routing in order to pick C(b) for the route to S(b), and pick C(c) for the route to S(c). That doesn't sound very practical because it means knowing in advance the IP addresses that will be used and announced by the server. 'laminar', like the idea of laminar flows: the different subflows don't mix with each other on an endpoint, unlike the "turbulent" way traffic is mixed by 'fullmesh'. In the code, the new endpoint type is added. Similar to the other subflow types, an MPTCP_INFO counter is added. While at it, hole are now commented in struct mptcp_info, to remember next time that these holes can no longer be used. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/503 Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-15-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/uapi/linux/mptcp.h6
-rw-r--r--net/mptcp/pm_kernel.c82
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/sockopt.c2
4 files changed, 90 insertions, 1 deletions
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 5ec996977b3f..87cfab874e24 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -39,6 +39,7 @@
#define MPTCP_PM_ADDR_FLAG_BACKUP _BITUL(2)
#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
+#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
struct mptcp_info {
__u8 mptcpi_subflows;
@@ -51,6 +52,7 @@ struct mptcp_info {
#define mptcpi_endp_signal_max mptcpi_add_addr_signal_max
__u8 mptcpi_add_addr_accepted_max;
#define mptcpi_limit_add_addr_accepted mptcpi_add_addr_accepted_max
+ /* 16-bit hole that can no longer be filled */
__u32 mptcpi_flags;
__u32 mptcpi_token;
__u64 mptcpi_write_seq;
@@ -60,13 +62,15 @@ struct mptcp_info {
__u8 mptcpi_local_addr_max;
#define mptcpi_endp_subflow_max mptcpi_local_addr_max
__u8 mptcpi_csum_enabled;
+ /* 8-bit hole that can no longer be filled */
__u32 mptcpi_retransmits;
__u64 mptcpi_bytes_retrans;
__u64 mptcpi_bytes_sent;
__u64 mptcpi_bytes_received;
__u64 mptcpi_bytes_acked;
__u8 mptcpi_subflows_total;
- __u8 reserved[3];
+ __u8 mptcpi_endp_laminar_max;
+ __u8 reserved[2];
__u32 mptcpi_last_data_sent;
__u32 mptcpi_last_data_recv;
__u32 mptcpi_last_ack_recv;
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index 55dbf89d19b8..e0f44dc232aa 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -21,6 +21,7 @@ struct pm_nl_pernet {
u8 endpoints;
u8 endp_signal_max;
u8 endp_subflow_max;
+ u8 endp_laminar_max;
u8 limit_add_addr_accepted;
u8 limit_extra_subflows;
u8 next_id;
@@ -61,6 +62,14 @@ u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk)
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_subflow_max);
+u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
+
+ return READ_ONCE(pernet->endp_laminar_max);
+}
+EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max);
+
u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
@@ -459,6 +468,66 @@ fill_local_addresses_vec_fullmesh(struct mptcp_sock *msk,
}
static unsigned int
+fill_local_laminar_endp(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
+ struct mptcp_pm_local *locals)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_pm_local *local;
+ int found = 0;
+
+ /* Forbid creation of new subflows matching existing ones, possibly
+ * already created by 'subflow' endpoints
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ if ((1 << inet_sk_state_load(ssk)) &
+ (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING |
+ TCPF_CLOSE))
+ continue;
+
+ __set_bit(subflow_get_local_id(subflow), unavail_id);
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR))
+ continue;
+
+ if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
+ continue;
+
+ if (test_bit(mptcp_endp_get_local_id(msk, &entry->addr),
+ unavail_id))
+ continue;
+
+ local = &locals[0];
+ local->addr = entry->addr;
+ local->flags = entry->flags;
+ local->ifindex = entry->ifindex;
+
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+
+ if (local->addr.id != msk->mpc_endpoint_id)
+ msk->pm.local_addr_used++;
+ }
+
+ msk->pm.extra_subflows++;
+ found = 1;
+ break;
+ }
+ rcu_read_unlock();
+
+ return found;
+}
+
+static unsigned int
fill_local_addresses_vec_c_flag(struct mptcp_sock *msk,
struct mptcp_addr_info *remote,
struct mptcp_pm_local *locals)
@@ -532,6 +601,10 @@ fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
if (i)
return i;
+ /* If there is at least one MPTCP endpoint with a laminar flag */
+ if (mptcp_pm_get_endp_laminar_max(msk))
+ return fill_local_laminar_endp(msk, remote, locals);
+
/* Special case: peer sets the C flag, accept one ADD_ADDR if default
* limits are used -- accepting no ADD_ADDR -- and use subflow endpoints
*/
@@ -707,6 +780,10 @@ find_next:
addr_max = pernet->endp_subflow_max;
WRITE_ONCE(pernet->endp_subflow_max, addr_max + 1);
}
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
+ addr_max = pernet->endp_laminar_max;
+ WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1);
+ }
pernet->endpoints++;
if (!entry->addr.port)
@@ -1100,6 +1177,10 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
addr_max = pernet->endp_subflow_max;
WRITE_ONCE(pernet->endp_subflow_max, addr_max - 1);
}
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
+ addr_max = pernet->endp_laminar_max;
+ WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1);
+ }
pernet->endpoints--;
list_del_rcu(&entry->list);
@@ -1182,6 +1263,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet)
{
WRITE_ONCE(pernet->endp_signal_max, 0);
WRITE_ONCE(pernet->endp_subflow_max, 0);
+ WRITE_ONCE(pernet->endp_laminar_max, 0);
pernet->endpoints = 0;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0cd3333cafaf..371084a3fc22 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1182,6 +1182,7 @@ void mptcp_pm_worker(struct mptcp_sock *msk);
void __mptcp_pm_kernel_worker(struct mptcp_sock *msk);
u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk);
u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk);
+u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk);
u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk);
u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 92a2a2742627..a28a48385885 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -980,6 +980,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
mptcp_pm_get_limit_add_addr_accepted(msk);
info->mptcpi_endp_subflow_max =
mptcp_pm_get_endp_subflow_max(msk);
+ info->mptcpi_endp_laminar_max =
+ mptcp_pm_get_endp_laminar_max(msk);
}
if (__mptcp_check_fallback(msk))