From 20b5c30398639b458371c228abfda829854b61c5 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:08 -0800 Subject: Revert "gfp: add __GFP_NOACCOUNT" This reverts commit 8f4fc071b192 ("gfp: add __GFP_NOACCOUNT"). Black-list kmem accounting policy (aka __GFP_NOACCOUNT) turned out to be fragile and difficult to maintain, because there seem to be many more allocations that should not be accounted than those that should be. Besides, false accounting an allocation might result in much worse consequences than not accounting at all, namely increased memory consumption due to pinned dead kmem caches. So it was decided to switch to the white-list policy. This patch reverts bits introducing the black-list policy. The white-list policy will be introduced later in the series. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index cd0e2413c358..2103f36b3bd3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -773,8 +773,6 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) return true; - if (gfp & __GFP_NOACCOUNT) - return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; return false; -- cgit v1.2.3 From a9bb7e620efdfd29b6d1c238041173e411670996 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:12 -0800 Subject: memcg: only account kmem allocations marked as __GFP_ACCOUNT Black-list kmem accounting policy (aka __GFP_NOACCOUNT) turned out to be fragile and difficult to maintain, because there seem to be many more allocations that should not be accounted than those that should be. Besides, false accounting an allocation might result in much worse consequences than not accounting at all, namely increased memory consumption due to pinned dead kmem caches. So this patch switches kmem accounting to the white-policy: now only those kmem allocations that are marked as __GFP_ACCOUNT are accounted to memcg. Currently, no kmem allocations are marked like this. The following patches will mark several kmem allocations that are known to be easily triggered from userspace and therefore should be accounted to memcg. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2103f36b3bd3..c9d9a8e7b45f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -773,6 +773,8 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) return true; + if (!(gfp & __GFP_ACCOUNT)) + return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; return false; -- cgit v1.2.3 From 230e9fc2860450fbb1f33bdcf9093d92d7d91f5b Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:15 -0800 Subject: slab: add SLAB_ACCOUNT flag Currently, if we want to account all objects of a particular kmem cache, we have to pass __GFP_ACCOUNT to each kmem_cache_alloc call, which is inconvenient. This patch introduces SLAB_ACCOUNT flag which if passed to kmem_cache_create will force accounting for every allocation from this cache even if __GFP_ACCOUNT is not passed. This patch does not make any of the existing caches use this flag - it will be done later in the series. Note, a cache with SLAB_ACCOUNT cannot be merged with a cache w/o SLAB_ACCOUNT, because merged caches share the same kmem_cache struct and hence cannot have different sets of SLAB_* flags. Thus using this flag will probably reduce the number of merged slabs even if kmem accounting is not used (only compiled in). Signed-off-by: Vladimir Davydov Suggested-by: Tejun Heo Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c9d9a8e7b45f..5c97265c1c6e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -766,15 +766,13 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); +struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); void __memcg_kmem_put_cache(struct kmem_cache *cachep); -static inline bool __memcg_kmem_bypass(gfp_t gfp) +static inline bool __memcg_kmem_bypass(void) { if (!memcg_kmem_enabled()) return true; - if (!(gfp & __GFP_ACCOUNT)) - return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; return false; @@ -791,7 +789,9 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) static __always_inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) { - if (__memcg_kmem_bypass(gfp)) + if (__memcg_kmem_bypass()) + return 0; + if (!(gfp & __GFP_ACCOUNT)) return 0; return __memcg_kmem_charge(page, gfp, order); } @@ -810,16 +810,15 @@ static __always_inline void memcg_kmem_uncharge(struct page *page, int order) /** * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation * @cachep: the original global kmem cache - * @gfp: allocation flags. * * All memory allocated from a per-memcg cache is charged to the owner memcg. */ static __always_inline struct kmem_cache * memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { - if (__memcg_kmem_bypass(gfp)) + if (__memcg_kmem_bypass()) return cachep; - return __memcg_kmem_get_cache(cachep); + return __memcg_kmem_get_cache(cachep, gfp); } static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) -- cgit v1.2.3 From 9ee11ba4251dddf1b0e507d184b25b1bd7820773 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:19:41 -0800 Subject: memcg: do not allow to disable tcp accounting after limit is set There are two bits defined for cg_proto->flags - MEMCG_SOCK_ACTIVATED and MEMCG_SOCK_ACTIVE - both are set in tcp_update_limit, but the former is never cleared while the latter can be cleared by unsetting the limit. This allows to disable tcp socket accounting for new sockets after it was enabled by writing -1 to memory.kmem.tcp.limit_in_bytes while still guaranteeing that memcg_socket_limit_enabled static key will be decremented on memcg destruction. This functionality looks dubious, because it is not clear what a use case would be. By enabling tcp accounting a user accepts the price. If they then find the performance degradation unacceptable, they can always restart their workload with tcp accounting disabled. It does not seem there is any need to flip it while the workload is running. Besides, it contradicts to how kmem accounting API works: writing whatever to memory.kmem.limit_in_bytes enables kmem accounting for the cgroup in question, after which it cannot be disabled. Therefore one might expect that writing -1 to memory.kmem.tcp.limit_in_bytes just enables socket accounting w/o limiting it, which might be useful by itself, but it isn't true. Since this API peculiarity is not documented anywhere, I propose to drop it. This will allow to simplify the code by dropping cg_proto->flags. Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5c97265c1c6e..78a1ec2e23fc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -85,22 +85,12 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -/* - * Bits in struct cg_proto.flags - */ -enum cg_proto_flags { - /* Currently active and new sockets should be assigned to cgroups */ - MEMCG_SOCK_ACTIVE, - /* It was ever activated; we must disarm static keys on destruction */ - MEMCG_SOCK_ACTIVATED, -}; - struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; + bool active; long sysctl_mem[3]; - unsigned long flags; /* * memcg field is used to find which memcg we belong directly * Each memcg struct can hold more than one cg_proto, so container_of -- cgit v1.2.3 From 7d828602e5ef3297a69392a2d31264e4ab9c8bb7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:20:56 -0800 Subject: mm: memcontrol: export root_mem_cgroup A later patch will need this symbol in files other than memcontrol.c, so export it now and replace mem_cgroup_root_css at the same time. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 78a1ec2e23fc..d0c724f53691 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -265,7 +265,8 @@ struct mem_cgroup { struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; -extern struct cgroup_subsys_state *mem_cgroup_root_css; + +extern struct mem_cgroup *root_mem_cgroup; /** * mem_cgroup_events - count memory events against a cgroup -- cgit v1.2.3 From 3d596f7b907b0281b997cf30c92994a71ad0a1a9 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:05 -0800 Subject: net: tcp_memcontrol: protect all tcp_memcontrol calls by jump-label Move the jump-label from sock_update_memcg() and sock_release_memcg() to the callsite, and so eliminate those function calls when socket accounting is not enabled. This also eliminates the need for dummy functions because the calls will be optimized away if the Kconfig options are not enabled. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d0c724f53691..85c437b0cbc0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -694,17 +694,8 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); -#else -static inline void sock_update_memcg(struct sock *sk) -{ -} -static inline void sock_release_memcg(struct sock *sk) -{ -} -#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ #ifdef CONFIG_MEMCG_KMEM extern struct static_key memcg_kmem_enabled_key; -- cgit v1.2.3 From af95d7df4059cfeab7e7c244f3564214aada7dad Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:08 -0800 Subject: net: tcp_memcontrol: remove dead per-memcg count of allocated sockets The number of allocated sockets is used for calculations in the soft limit phase, where packets are accepted but the socket is under memory pressure. Since there is no soft limit phase in tcp_memcontrol, and memory pressure is only entered when packets are already dropped, this is actually dead code. Remove it. As this is the last user of parent_cg_proto(), remove that too. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 85c437b0cbc0..15acc04ebdd3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -87,7 +87,6 @@ enum mem_cgroup_events_target { struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ - struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; bool active; long sysctl_mem[3]; -- cgit v1.2.3 From 80f23124f57c77915a7b4201d8dcba38a38b23f0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:11 -0800 Subject: net: tcp_memcontrol: simplify the per-memcg limit access tcp_memcontrol replicates the global sysctl_mem limit array per cgroup, but it only ever sets these entries to the value of the memory_allocated page_counter limit. Use the latter directly. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 15acc04ebdd3..6c91c1b73951 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -89,7 +89,6 @@ struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ int memory_pressure; bool active; - long sysctl_mem[3]; /* * memcg field is used to find which memcg we belong directly * Each memcg struct can hold more than one cg_proto, so container_of -- cgit v1.2.3 From e805605c721021879a1469bdae45c6f80bc985f4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:14 -0800 Subject: net: tcp_memcontrol: sanitize tcp memory accounting callbacks There won't be a tcp control soft limit, so integrating the memcg code into the global skmem limiting scheme complicates things unnecessarily. Replace this with simple and clear charge and uncharge calls--hidden behind a jump label--to account skb memory. Note that this is not purely aesthetic: as a result of shoehorning the per-memcg code into the same memory accounting functions that handle the global level, the old code would compare the per-memcg consumption against the smaller of the per-memcg limit and the global limit. This allowed the total consumption of multiple sockets to exceed the global limit, as long as the individual sockets stayed within bounds. After this change, the code will always compare the per-memcg consumption to the per-memcg limit, and the global consumption to the global limit, and thus close this loophole. Without a soft limit, the per-memcg memory pressure state in sockets is generally questionable. However, we did it until now, so we continue to enter it when the hard limit is hit, and packets are dropped, to let other sockets in the cgroup know that they shouldn't grow their transmit windows, either. However, keep it simple in the new callback model and leave memory pressure lazily when the next packet is accepted (as opposed to doing it synchroneously when packets are processed). When packets are dropped, network performance will already be in the toilet, so that should be a reasonable trade-off. As described above, consumption is now checked on the per-memcg level and the global level separately. Likewise, memory pressure states are maintained on both the per-memcg level and the global level, and a socket is considered under pressure when either level asserts as much. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c91c1b73951..e4e77bd1dd39 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -660,12 +660,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) } #endif /* CONFIG_MEMCG */ -enum { - UNDER_LIMIT, - SOFT_LIMIT, - OVER_LIMIT, -}; - #ifdef CONFIG_CGROUP_WRITEBACK struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); @@ -694,6 +688,19 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, struct sock; void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); +bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages); +void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages); +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) +static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto) +{ + return proto->memory_pressure; +} +#else +static inline bool mem_cgroup_under_pressure(struct cg_proto *proto) +{ + return false; +} +#endif #ifdef CONFIG_MEMCG_KMEM extern struct static_key memcg_kmem_enabled_key; -- cgit v1.2.3 From baac50bbc3cdfd184ebf586b1704edbfcee866df Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:17 -0800 Subject: net: tcp_memcontrol: simplify linkage between socket and page counter There won't be any separate counters for socket memory consumed by protocols other than TCP in the future. Remove the indirection and link sockets directly to their owning memory cgroup. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e77bd1dd39..7c085e4636ba 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -89,16 +89,6 @@ struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ int memory_pressure; bool active; - /* - * memcg field is used to find which memcg we belong directly - * Each memcg struct can hold more than one cg_proto, so container_of - * won't really cut. - * - * The elegant solution would be having an inverse function to - * proto_cgroup in struct proto, but that means polluting the structure - * for everybody, instead of just for memcg users. - */ - struct mem_cgroup *memcg; }; #ifdef CONFIG_MEMCG @@ -688,15 +678,15 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, struct sock; void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); -bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages); -void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages); +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); +void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) -static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto) +static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { - return proto->memory_pressure; + return memcg->tcp_mem.memory_pressure; } #else -static inline bool mem_cgroup_under_pressure(struct cg_proto *proto) +static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; } -- cgit v1.2.3 From 80e95fe0fdcde2812c341ad4209d62dc1a7af53b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:20 -0800 Subject: mm: memcontrol: generalize the socket accounting jump label The unified hierarchy memory controller is going to use this jump label as well to control the networking callbacks. Move it to the memory controller code and give it a more generic name. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7c085e4636ba..03090e8e7fff 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -681,11 +681,14 @@ void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) +extern struct static_key memcg_sockets_enabled_key; +#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return memcg->tcp_mem.memory_pressure; } #else +#define mem_cgroup_sockets_enabled 0 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; -- cgit v1.2.3 From f7e1cb6ec51b041335b5ad4dd7aefb37a56d79a6 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:29 -0800 Subject: mm: memcontrol: account socket memory in unified hierarchy memory controller Socket memory can be a significant share of overall memory consumed by common workloads. In order to provide reasonable resource isolation in the unified hierarchy, this type of memory needs to be included in the tracking/accounting of a cgroup under active memory resource control. Overhead is only incurred when a non-root control group is created AND the memory controller is instructed to track and account the memory footprint of that group. cgroup.memory=nosocket can be specified on the boot commandline to override any runtime configuration and forcibly exclude socket memory from active memory resource control. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 03090e8e7fff..a355f61a2ed3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -170,6 +170,9 @@ struct mem_cgroup { unsigned long low; unsigned long high; + /* Range enforcement for interrupt charges */ + struct work_struct high_work; + unsigned long soft_limit; /* vmpressure notifications */ @@ -680,12 +683,16 @@ void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) +#if defined(CONFIG_MEMCG) && defined(CONFIG_INET) extern struct static_key memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { +#ifdef CONFIG_MEMCG_KMEM return memcg->tcp_mem.memory_pressure; +#else + return false; +#endif } #else #define mem_cgroup_sockets_enabled 0 -- cgit v1.2.3 From 8e8ae645249b85c8ed6c178557f8db8613a6bcc7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:32 -0800 Subject: mm: memcontrol: hook up vmpressure to socket pressure Let the networking stack know when a memcg is under reclaim pressure so that it can clamp its transmit windows accordingly. Whenever the reclaim efficiency of a cgroup's LRU lists drops low enough for a MEDIUM or HIGH vmpressure event to occur, assert a pressure state in the socket and tcp memory code that tells it to curb consumption growth from sockets associated with said control group. Traditionally, vmpressure reports for the entire subtree of a memcg under pressure, which drops useful information on the individual groups reclaimed. However, it's too late to change the userinterface, so add a second reporting mode that reports on the level of reclaim instead of at the level of pressure, and use that report for sockets. vmpressure events are naturally edge triggered, so for hysteresis assert socket pressure for a second to allow for subsequent vmpressure events to occur before letting the socket code return to normal. This will likely need finetuning for a wider variety of workloads, but for now stick to the vmpressure presets and keep hysteresis simple. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a355f61a2ed3..c5a51039df57 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -249,6 +249,10 @@ struct mem_cgroup { struct wb_domain cgwb_domain; #endif +#ifdef CONFIG_INET + unsigned long socket_pressure; +#endif + /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; @@ -290,18 +294,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; } +#define mem_cgroup_from_counter(counter, member) \ + container_of(counter, struct mem_cgroup, member) + struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *, struct mem_cgroup_reclaim_cookie *); void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); +/** + * parent_mem_cgroup - find the accounting parent of a memcg + * @memcg: memcg whose parent to find + * + * Returns the parent memcg, or NULL if this is the root or the memory + * controller is in legacy no-hierarchy mode. + */ +static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) +{ + if (!memcg->memory.parent) + return NULL; + return mem_cgroup_from_counter(memcg->memory.parent, memory); +} + static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root) { @@ -689,10 +709,14 @@ extern struct static_key memcg_sockets_enabled_key; static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { #ifdef CONFIG_MEMCG_KMEM - return memcg->tcp_mem.memory_pressure; -#else - return false; + if (memcg->tcp_mem.memory_pressure) + return true; #endif + do { + if (time_before(jiffies, memcg->socket_pressure)) + return true; + } while ((memcg = parent_mem_cgroup(memcg))); + return false; } #else #define mem_cgroup_sockets_enabled 0 -- cgit v1.2.3 From ef12947c9c5a96af549c49f10e5503f0612a397c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:34 -0800 Subject: mm: memcontrol: switch to the updated jump-label API According to the direct use of struct static_key is deprecated. Update the socket and slab accounting code accordingly. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reported-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c5a51039df57..2292468f2a30 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -704,8 +704,8 @@ void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #if defined(CONFIG_MEMCG) && defined(CONFIG_INET) -extern struct static_key memcg_sockets_enabled_key; -#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) +extern struct static_key_false memcg_sockets_enabled_key; +#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { #ifdef CONFIG_MEMCG_KMEM @@ -727,7 +727,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) #endif #ifdef CONFIG_MEMCG_KMEM -extern struct static_key memcg_kmem_enabled_key; +extern struct static_key_false memcg_kmem_enabled_key; extern int memcg_nr_cache_ids; void memcg_get_cache_ids(void); @@ -743,7 +743,7 @@ void memcg_put_cache_ids(void); static inline bool memcg_kmem_enabled(void) { - return static_key_false(&memcg_kmem_enabled_key); + return static_branch_unlikely(&memcg_kmem_enabled_key); } static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) -- cgit v1.2.3