From 68889dfd547bd8eabc5a98b58475d7b901cf5129 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:09 +0000 Subject: mptcp: Fix up subflow's memcg when CONFIG_SOCK_CGROUP_DATA=n. When sk_alloc() allocates a socket, mem_cgroup_sk_alloc() sets sk->sk_memcg based on the current task. MPTCP subflow socket creation is triggered from userspace or an in-kernel worker. In the latter case, sk->sk_memcg is not what we want. So, we fix it up from the parent socket's sk->sk_memcg in mptcp_attach_cgroup(). Although the code is placed under #ifdef CONFIG_MEMCG, it is buried under #ifdef CONFIG_SOCK_CGROUP_DATA. The two configs are orthogonal. If CONFIG_MEMCG is enabled without CONFIG_SOCK_CGROUP_DATA, the subflow's memory usage is not charged correctly. Let's move the code out of the wrong ifdef guard. Note that sk->sk_memcg is freed in sk_prot_free() and the parent sk holds the refcnt of memcg->css here, so we don't need to use css_tryget(). Fixes: 3764b0c5651e3 ("mptcp: attach subflow socket to parent cgroup") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Matthieu Baerts (NGI0) Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 785173aa0739..25921fbec685 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1604,6 +1604,7 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); +void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1661,6 +1662,11 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg); #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; static inline void mem_cgroup_sk_free(struct sock *sk) { }; + +static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) +{ +} + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; -- cgit v1.2.3 From f7161b234f2ec7f18999009c4becc04eeb6b12a7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:14 +0000 Subject: net-memcg: Introduce mem_cgroup_from_sk(). We will store a flag in the lowest bit of sk->sk_memcg. Then, directly dereferencing sk->sk_memcg will be illegal, and we do not want to allow touching the raw sk->sk_memcg in many places. Let's introduce mem_cgroup_from_sk(). Other places accessing the raw sk->sk_memcg will be converted later. Note that we cannot define the helper as an inline function in memcontrol.h as we cannot access any fields of struct sock there due to circular dependency, so it is placed in sock.h. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..811f95ea8d00 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2594,6 +2594,18 @@ static inline gfp_t gfp_memcg_charge(void) return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) +{ + return sk->sk_memcg; +} +#else +static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) +{ + return NULL; +} +#endif + static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo); -- cgit v1.2.3 From 43049b0db03823c2cd003ca7d3dddcd3924da8dc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:15 +0000 Subject: net-memcg: Introduce mem_cgroup_sk_enabled(). The socket memcg feature is enabled by a static key and only works for non-root cgroup. We check both conditions in many places. Let's factorise it as a helper function. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/proto_memory.h | 2 +- include/net/sock.h | 10 ++++++++++ include/net/tcp.h | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index a6ab2f4f5e28..859e63de81c4 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -31,7 +31,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (!sk->sk_prot->memory_pressure) return false; - if (mem_cgroup_sockets_enabled && sk->sk_memcg && + if (mem_cgroup_sk_enabled(sk) && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; diff --git a/include/net/sock.h b/include/net/sock.h index 811f95ea8d00..3efdf680401d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2599,11 +2599,21 @@ static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { return sk->sk_memcg; } + +static inline bool mem_cgroup_sk_enabled(const struct sock *sk) +{ + return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); +} #else static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { return NULL; } + +static inline bool mem_cgroup_sk_enabled(const struct sock *sk) +{ + return false; +} #endif static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) diff --git a/include/net/tcp.h b/include/net/tcp.h index 526a26e7a150..9f01b6be6444 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -275,7 +275,7 @@ extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_memcg && + if (mem_cgroup_sk_enabled(sk) && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; -- cgit v1.2.3 From bb178c6bc08525d758a57775458d644304011bf8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:16 +0000 Subject: net-memcg: Pass struct sock to mem_cgroup_sk_(un)?charge(). We will store a flag in the lowest bit of sk->sk_memcg. Then, we cannot pass the raw pointer to mem_cgroup_charge_skmem() and mem_cgroup_uncharge_skmem(). Let's pass struct sock to the functions. While at it, they are renamed to match other functions starting with mem_cgroup_sk_. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 25921fbec685..0837d3de3a68 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1596,15 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, - gfp_t gfp_mask); -void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) + void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); +bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, + gfp_t gfp_mask); +void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1660,13 +1661,31 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 -static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; -static inline void mem_cgroup_sk_free(struct sock *sk) { }; + +static inline void mem_cgroup_sk_alloc(struct sock *sk) +{ +} + +static inline void mem_cgroup_sk_free(struct sock *sk) +{ +} static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) { } +static inline bool mem_cgroup_sk_charge(const struct sock *sk, + unsigned int nr_pages, + gfp_t gfp_mask) +{ + return false; +} + +static inline void mem_cgroup_sk_uncharge(const struct sock *sk, + unsigned int nr_pages) +{ +} + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; -- cgit v1.2.3 From b2ffd10cddde47cc6830e4981e91e3215def62b1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:17 +0000 Subject: net-memcg: Pass struct sock to mem_cgroup_sk_under_memory_pressure(). We will store a flag in the lowest bit of sk->sk_memcg. Then, we cannot pass the raw pointer to mem_cgroup_under_socket_pressure(). Let's pass struct sock to it and rename the function to match other functions starting with mem_cgroup_sk_. Note that the helper is moved to sock.h to use mem_cgroup_from_sk(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-10-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 18 ------------------ include/net/proto_memory.h | 2 +- include/net/sock.h | 22 ++++++++++++++++++++++ include/net/tcp.h | 2 +- 4 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0837d3de3a68..fb27e3d2fdac 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1642,19 +1642,6 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) } #endif -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ -#ifdef CONFIG_MEMCG_V1 - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) - return !!memcg->tcpmem_pressure; -#endif /* CONFIG_MEMCG_V1 */ - do { - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) - return true; - } while ((memcg = parent_mem_cgroup(memcg))); - return false; -} - int alloc_shrinker_info(struct mem_cgroup *memcg); void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); @@ -1686,11 +1673,6 @@ static inline void mem_cgroup_sk_uncharge(const struct sock *sk, { } -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ - return false; -} - static inline void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index 859e63de81c4..8e91a8fa31b5 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -32,7 +32,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return false; if (mem_cgroup_sk_enabled(sk) && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + mem_cgroup_sk_under_memory_pressure(sk)) return true; return !!READ_ONCE(*sk->sk_prot->memory_pressure); diff --git a/include/net/sock.h b/include/net/sock.h index 3efdf680401d..3bc4d566f7d0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2604,6 +2604,23 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk) { return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); } + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + +#ifdef CONFIG_MEMCG_V1 + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; +#endif /* CONFIG_MEMCG_V1 */ + + do { + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) + return true; + } while ((memcg = parent_mem_cgroup(memcg))); + + return false; +} #else static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { @@ -2614,6 +2631,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk) { return false; } + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + return false; +} #endif static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f01b6be6444..2936b8175950 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -276,7 +276,7 @@ extern unsigned long tcp_memory_pressure; static inline bool tcp_under_memory_pressure(const struct sock *sk) { if (mem_cgroup_sk_enabled(sk) && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + mem_cgroup_sk_under_memory_pressure(sk)) return true; return READ_ONCE(tcp_memory_pressure); -- cgit v1.2.3 From bf64002c94fc330b996bc438f3d1b6bd3d781659 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:18 +0000 Subject: net: Define sk_memcg under CONFIG_MEMCG. Except for sk_clone_lock(), all accesses to sk->sk_memcg is done under CONFIG_MEMCG. As a bonus, let's define sk->sk_memcg under CONFIG_MEMCG. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-11-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 3bc4d566f7d0..1c49ea13af4a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -443,7 +443,9 @@ struct sock { __cacheline_group_begin(sock_read_rxtx); int sk_err; struct socket *sk_socket; +#ifdef CONFIG_MEMCG struct mem_cgroup *sk_memcg; +#endif #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif -- cgit v1.2.3