summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKuniyuki Iwashima <kuniyu@google.com>2025-10-14 23:54:55 +0000
committerMartin KaFai Lau <martin.lau@kernel.org>2025-10-16 12:04:47 -0700
commit7c268eaeec6388b7bee36aef3fb5e62c9222ad3b (patch)
tree1529d1d9e676c103e0ea8c0354984f74a592db14 /include
parent4a997d49d92ad9dda603f60881faa6c800d435e9 (diff)
net: Allow opt-out from global protocol memory accounting.
Some protocols (e.g., TCP, UDP) implement memory accounting for socket buffers and charge memory to per-protocol global counters pointed to by sk->sk_proto->memory_allocated. Sometimes, system processes do not want that limitation. For a similar purpose, there is SO_RESERVE_MEM for sockets under memcg. Also, by opting out of the per-protocol accounting, sockets under memcg can avoid paying costs for two orthogonal memory accounting mechanisms. A microbenchmark result is in the subsequent bpf patch. Let's allow opt-out from the per-protocol memory accounting if sk->sk_bypass_prot_mem is true. sk->sk_bypass_prot_mem and sk->sk_prot are placed in the same cache line, and sk_has_account() always fetches sk->sk_prot before accessing sk->sk_bypass_prot_mem, so there is no extra cache miss for this patch. The following patches will set sk->sk_bypass_prot_mem to true, and then, the per-protocol memory accounting will be skipped. Note that this does NOT disable memcg, but rather the per-protocol one. Another option not to use the hole in struct sock_common is create sk_prot variants like tcp_prot_bypass, but this would complicate SOCKMAP logic, tcp_bpf_prots etc. Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Link: https://patch.msgid.link/20251014235604.3057003-3-kuniyu@google.com
Diffstat (limited to 'include')
-rw-r--r--include/net/proto_memory.h3
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/tcp.h3
3 files changed, 9 insertions, 0 deletions
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
index 8e91a8fa31b5..ad6d703ce6fe 100644
--- a/include/net/proto_memory.h
+++ b/include/net/proto_memory.h
@@ -35,6 +35,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_sk_under_memory_pressure(sk))
return true;
+ if (sk->sk_bypass_prot_mem)
+ return false;
+
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
diff --git a/include/net/sock.h b/include/net/sock.h
index 30ac2eb4ef9b..415e7381aa50 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -118,6 +118,7 @@ typedef __u64 __bitwise __addrpair;
* @skc_reuseport: %SO_REUSEPORT setting
* @skc_ipv6only: socket is IPV6 only
* @skc_net_refcnt: socket is using net ref counting
+ * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
@@ -174,6 +175,7 @@ struct sock_common {
unsigned char skc_reuseport:1;
unsigned char skc_ipv6only:1;
unsigned char skc_net_refcnt:1;
+ unsigned char skc_bypass_prot_mem:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
@@ -381,6 +383,7 @@ struct sock {
#define sk_reuseport __sk_common.skc_reuseport
#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_net_refcnt __sk_common.skc_net_refcnt
+#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1e547138f4fb..439e327fdbfa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -303,6 +303,9 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk)
mem_cgroup_sk_under_memory_pressure(sk))
return true;
+ if (sk->sk_bypass_prot_mem)
+ return false;
+
return READ_ONCE(tcp_memory_pressure);
}
/*