diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/filemap.c | 15 | ||||
-rw-r--r-- | mm/memcontrol.c | 419 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 75 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 71 | ||||
-rw-r--r-- | mm/shmem_acl.c | 171 | ||||
-rw-r--r-- | mm/truncate.c | 6 |
10 files changed, 433 insertions, 353 deletions
diff --git a/mm/Makefile b/mm/Makefile index 82131d0f8d85..7a68d2ab5560 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o -obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_KSM) += ksm.o diff --git a/mm/filemap.c b/mm/filemap.c index 8b4d88f9249e..96ac6b0eb6cb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2240,7 +2240,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, size_t count, ssize_t written) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; ssize_t status; struct iov_iter i; @@ -2252,15 +2251,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, *ppos = pos + status; } - /* - * If we get here for O_DIRECT writes then we must have fallen through - * to buffered writes (block instantiation inside i_size). So we sync - * the file data here, to try to honour O_DIRECT expectations. - */ - if (unlikely(file->f_flags & O_DIRECT) && written) - status = filemap_write_and_wait_range(mapping, - pos, pos + written - 1); - return written ? written : status; } EXPORT_SYMBOL(generic_file_buffered_write); @@ -2359,10 +2349,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, * semantics. */ endbyte = pos + written_buffered - written - 1; - err = do_sync_mapping_range(file->f_mapping, pos, endbyte, - SYNC_FILE_RANGE_WAIT_BEFORE| - SYNC_FILE_RANGE_WRITE| - SYNC_FILE_RANGE_WAIT_AFTER); + err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); if (err == 0) { written = written_buffered; invalidate_mapping_pages(mapping, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9eee80d6d490..488b644e0e8e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -38,6 +38,7 @@ #include <linux/vmalloc.h> #include <linux/mm_inline.h> #include <linux/page_cgroup.h> +#include <linux/cpu.h> #include "internal.h" #include <asm/uaccess.h> @@ -54,7 +55,6 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ #define do_swap_account (0) #endif -static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ #define SOFTLIMIT_EVENTS_THRESH (1000) /* @@ -66,7 +66,7 @@ enum mem_cgroup_stat_index { */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ - MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ @@ -275,6 +275,7 @@ enum charge_type { static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); +static void drain_all_stock_async(void); static struct mem_cgroup_per_zone * mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) @@ -763,7 +764,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) task_unlock(task); if (!curr) return 0; - if (curr->use_hierarchy) + /* + * We should check use_hierarchy of "mem" not "curr". Because checking + * use_hierarchy of "curr" here make this function true if hierarchy is + * enabled in "curr" and "curr" is a child of "mem" in *cgroup* + * hierarchy(even if use_hierarchy is disabled in "mem"). + */ + if (mem->use_hierarchy) ret = css_is_ancestor(&curr->css, &mem->css); else ret = (curr == mem); @@ -1012,7 +1019,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) static char memcg_name[PATH_MAX]; int ret; - if (!memcg) + if (!memcg || !p) return; @@ -1142,6 +1149,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, victim = mem_cgroup_select_victim(root_mem); if (victim == root_mem) { loop++; + if (loop >= 1) + drain_all_stock_async(); if (loop >= 2) { /* * If we have not been able to reclaim @@ -1228,7 +1237,7 @@ static void record_last_oom(struct mem_cgroup *mem) * Currently used to update mapped file statistics, but the routine can be * generalized to update other statistics as well. */ -void mem_cgroup_update_mapped_file_stat(struct page *page, int val) +void mem_cgroup_update_file_mapped(struct page *page, int val) { struct mem_cgroup *mem; struct mem_cgroup_stat *stat; @@ -1236,9 +1245,6 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val) int cpu; struct page_cgroup *pc; - if (!page_is_file_cache(page)) - return; - pc = lookup_page_cgroup(page); if (unlikely(!pc)) return; @@ -1258,12 +1264,139 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val) stat = &mem->stat; cpustat = &stat->cpustat[cpu]; - __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val); done: unlock_page_cgroup(pc); } /* + * size of first charge trial. "32" comes from vmscan.c's magic value. + * TODO: maybe necessary to use big numbers in big irons. + */ +#define CHARGE_SIZE (32 * PAGE_SIZE) +struct memcg_stock_pcp { + struct mem_cgroup *cached; /* this never be root cgroup */ + int charge; + struct work_struct work; +}; +static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); +static atomic_t memcg_drain_count; + +/* + * Try to consume stocked charge on this cpu. If success, PAGE_SIZE is consumed + * from local stock and true is returned. If the stock is 0 or charges from a + * cgroup which is not current target, returns false. This stock will be + * refilled. + */ +static bool consume_stock(struct mem_cgroup *mem) +{ + struct memcg_stock_pcp *stock; + bool ret = true; + + stock = &get_cpu_var(memcg_stock); + if (mem == stock->cached && stock->charge) + stock->charge -= PAGE_SIZE; + else /* need to call res_counter_charge */ + ret = false; + put_cpu_var(memcg_stock); + return ret; +} + +/* + * Returns stocks cached in percpu to res_counter and reset cached information. + */ +static void drain_stock(struct memcg_stock_pcp *stock) +{ + struct mem_cgroup *old = stock->cached; + + if (stock->charge) { + res_counter_uncharge(&old->res, stock->charge); + if (do_swap_account) + res_counter_uncharge(&old->memsw, stock->charge); + } + stock->cached = NULL; + stock->charge = 0; +} + +/* + * This must be called under preempt disabled or must be called by + * a thread which is pinned to local cpu. + */ +static void drain_local_stock(struct work_struct *dummy) +{ + struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock); + drain_stock(stock); +} + +/* + * Cache charges(val) which is from res_counter, to local per_cpu area. + * This will be consumed by consumt_stock() function, later. + */ +static void refill_stock(struct mem_cgroup *mem, int val) +{ + struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); + + if (stock->cached != mem) { /* reset if necessary */ + drain_stock(stock); + stock->cached = mem; + } + stock->charge += val; + put_cpu_var(memcg_stock); +} + +/* + * Tries to drain stocked charges in other cpus. This function is asynchronous + * and just put a work per cpu for draining localy on each cpu. Caller can + * expects some charges will be back to res_counter later but cannot wait for + * it. + */ +static void drain_all_stock_async(void) +{ + int cpu; + /* This function is for scheduling "drain" in asynchronous way. + * The result of "drain" is not directly handled by callers. Then, + * if someone is calling drain, we don't have to call drain more. + * Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if + * there is a race. We just do loose check here. + */ + if (atomic_read(&memcg_drain_count)) + return; + /* Notify other cpus that system-wide "drain" is running */ + atomic_inc(&memcg_drain_count); + get_online_cpus(); + for_each_online_cpu(cpu) { + struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); + schedule_work_on(cpu, &stock->work); + } + put_online_cpus(); + atomic_dec(&memcg_drain_count); + /* We don't wait for flush_work */ +} + +/* This is a synchronous drain interface. */ +static void drain_all_stock_sync(void) +{ + /* called when force_empty is called */ + atomic_inc(&memcg_drain_count); + schedule_on_each_cpu(drain_local_stock); + atomic_dec(&memcg_drain_count); +} + +static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct memcg_stock_pcp *stock; + + if (action != CPU_DEAD) + return NOTIFY_OK; + stock = &per_cpu(memcg_stock, cpu); + drain_stock(stock); + return NOTIFY_OK; +} + +/* * Unlike exported interface, "oom" parameter is added. if oom==true, * oom-killer can be invoked. */ @@ -1274,6 +1407,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, struct mem_cgroup *mem, *mem_over_limit; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct res_counter *fail_res; + int csize = CHARGE_SIZE; if (unlikely(test_thread_flag(TIF_MEMDIE))) { /* Don't account this! */ @@ -1298,23 +1432,25 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, return 0; VM_BUG_ON(css_is_removed(&mem->css)); + if (mem_cgroup_is_root(mem)) + goto done; while (1) { int ret = 0; unsigned long flags = 0; - if (mem_cgroup_is_root(mem)) - goto done; - ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); + if (consume_stock(mem)) + goto charged; + + ret = res_counter_charge(&mem->res, csize, &fail_res); if (likely(!ret)) { if (!do_swap_account) break; - ret = res_counter_charge(&mem->memsw, PAGE_SIZE, - &fail_res); + ret = res_counter_charge(&mem->memsw, csize, &fail_res); if (likely(!ret)) break; /* mem+swap counter fails */ - res_counter_uncharge(&mem->res, PAGE_SIZE); + res_counter_uncharge(&mem->res, csize); flags |= MEM_CGROUP_RECLAIM_NOSWAP; mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); @@ -1323,6 +1459,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); + /* reduce request size and retry */ + if (csize > PAGE_SIZE) { + csize = PAGE_SIZE; + continue; + } if (!(gfp_mask & __GFP_WAIT)) goto nomem; @@ -1344,14 +1485,15 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (!nr_retries--) { if (oom) { - mutex_lock(&memcg_tasklist); mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); - mutex_unlock(&memcg_tasklist); record_last_oom(mem_over_limit); } goto nomem; } } + if (csize > PAGE_SIZE) + refill_stock(mem, csize - PAGE_SIZE); +charged: /* * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. * if they exceeds softlimit. @@ -1366,6 +1508,21 @@ nomem: } /* + * Somemtimes we have to undo a charge we got by try_charge(). + * This function is for that and do uncharge, put css's refcnt. + * gotten by try_charge(). + */ +static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) +{ + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE); + } + css_put(&mem->css); +} + +/* * A helper function to get mem_cgroup from ID. must be called under * rcu_read_lock(). The caller must check css_is_removed() or some if * it's concern. (dropping refcnt from swap can be called against removed @@ -1428,12 +1585,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); - if (!mem_cgroup_is_root(mem)) { - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); - } - css_put(&mem->css); + mem_cgroup_cancel_charge(mem); return; } @@ -1466,27 +1618,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, } /** - * mem_cgroup_move_account - move account of the page + * __mem_cgroup_move_account - move account of the page * @pc: page_cgroup of the page. * @from: mem_cgroup which the page is moved from. * @to: mem_cgroup which the page is moved to. @from != @to. * * The caller must confirm following. * - page is not on LRU (isolate_page() is useful.) - * - * returns 0 at success, - * returns -EBUSY when lock is busy or "pc" is unstable. + * - the pc is locked, used, and ->mem_cgroup points to @from. * * This function does "uncharge" from old cgroup but doesn't do "charge" to * new cgroup. It should be done by a caller. */ -static int mem_cgroup_move_account(struct page_cgroup *pc, +static void __mem_cgroup_move_account(struct page_cgroup *pc, struct mem_cgroup *from, struct mem_cgroup *to) { - struct mem_cgroup_per_zone *from_mz, *to_mz; - int nid, zid; - int ret = -EBUSY; struct page *page; int cpu; struct mem_cgroup_stat *stat; @@ -1494,38 +1641,27 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, VM_BUG_ON(from == to); VM_BUG_ON(PageLRU(pc->page)); - - nid = page_cgroup_nid(pc); - zid = page_cgroup_zid(pc); - from_mz = mem_cgroup_zoneinfo(from, nid, zid); - to_mz = mem_cgroup_zoneinfo(to, nid, zid); - - if (!trylock_page_cgroup(pc)) - return ret; - - if (!PageCgroupUsed(pc)) - goto out; - - if (pc->mem_cgroup != from) - goto out; + VM_BUG_ON(!PageCgroupLocked(pc)); + VM_BUG_ON(!PageCgroupUsed(pc)); + VM_BUG_ON(pc->mem_cgroup != from); if (!mem_cgroup_is_root(from)) res_counter_uncharge(&from->res, PAGE_SIZE); mem_cgroup_charge_statistics(from, pc, false); page = pc->page; - if (page_is_file_cache(page) && page_mapped(page)) { + if (page_mapped(page) && !PageAnon(page)) { cpu = smp_processor_id(); /* Update mapped_file data for mem_cgroup "from" */ stat = &from->stat; cpustat = &stat->cpustat[cpu]; - __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, -1); /* Update mapped_file data for mem_cgroup "to" */ stat = &to->stat; cpustat = &stat->cpustat[cpu]; - __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, 1); } @@ -1536,15 +1672,28 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, css_get(&to->css); pc->mem_cgroup = to; mem_cgroup_charge_statistics(to, pc, true); - ret = 0; -out: - unlock_page_cgroup(pc); /* * We charges against "to" which may not have any tasks. Then, "to" * can be under rmdir(). But in current implementation, caller of * this function is just force_empty() and it's garanteed that * "to" is never removed. So, we don't check rmdir status here. */ +} + +/* + * check whether the @pc is valid for moving account and call + * __mem_cgroup_move_account() + */ +static int mem_cgroup_move_account(struct page_cgroup *pc, + struct mem_cgroup *from, struct mem_cgroup *to) +{ + int ret = -EINVAL; + lock_page_cgroup(pc); + if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { + __mem_cgroup_move_account(pc, from, to); + ret = 0; + } + unlock_page_cgroup(pc); return ret; } @@ -1566,45 +1715,27 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, if (!pcg) return -EINVAL; + ret = -EBUSY; + if (!get_page_unless_zero(page)) + goto out; + if (isolate_lru_page(page)) + goto put; parent = mem_cgroup_from_cont(pcg); - - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); if (ret || !parent) - return ret; - - if (!get_page_unless_zero(page)) { - ret = -EBUSY; - goto uncharge; - } - - ret = isolate_lru_page(page); - - if (ret) - goto cancel; + goto put_back; ret = mem_cgroup_move_account(pc, child, parent); - + if (!ret) + css_put(&parent->css); /* drop extra refcnt by try_charge() */ + else + mem_cgroup_cancel_charge(parent); /* does css_put */ +put_back: putback_lru_page(page); - if (!ret) { - put_page(page); - /* drop extra refcnt by try_charge() */ - css_put(&parent->css); - return 0; - } - -cancel: +put: put_page(page); -uncharge: - /* drop extra refcnt by try_charge() */ - css_put(&parent->css); - /* uncharge if move fails */ - if (!mem_cgroup_is_root(parent)) { - res_counter_uncharge(&parent->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&parent->memsw, PAGE_SIZE); - } +out: return ret; } @@ -1821,14 +1952,53 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) return; if (!mem) return; - if (!mem_cgroup_is_root(mem)) { - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); - } - css_put(&mem->css); + mem_cgroup_cancel_charge(mem); } +static void +__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) +{ + struct memcg_batch_info *batch = NULL; + bool uncharge_memsw = true; + /* If swapout, usage of swap doesn't decrease */ + if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) + uncharge_memsw = false; + /* + * do_batch > 0 when unmapping pages or inode invalidate/truncate. + * In those cases, all pages freed continously can be expected to be in + * the same cgroup and we have chance to coalesce uncharges. + * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) + * because we want to do uncharge as soon as possible. + */ + if (!current->memcg_batch.do_batch || test_thread_flag(TIF_MEMDIE)) + goto direct_uncharge; + + batch = ¤t->memcg_batch; + /* + * In usual, we do css_get() when we remember memcg pointer. + * But in this case, we keep res->usage until end of a series of + * uncharges. Then, it's ok to ignore memcg's refcnt. + */ + if (!batch->memcg) + batch->memcg = mem; + /* + * In typical case, batch->memcg == mem. This means we can + * merge a series of uncharges to an uncharge of res_counter. + * If not, we uncharge res_counter ony by one. + */ + if (batch->memcg != mem) + goto direct_uncharge; + /* remember freed charge and uncharge it later */ + batch->bytes += PAGE_SIZE; + if (uncharge_memsw) + batch->memsw_bytes += PAGE_SIZE; + return; +direct_uncharge: + res_counter_uncharge(&mem->res, PAGE_SIZE); + if (uncharge_memsw) + res_counter_uncharge(&mem->memsw, PAGE_SIZE); + return; +} /* * uncharge if !page_mapped(page) @@ -1877,12 +2047,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) break; } - if (!mem_cgroup_is_root(mem)) { - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account && - (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); - } + if (!mem_cgroup_is_root(mem)) + __do_uncharge(mem, ctype); if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) mem_cgroup_swap_statistics(mem, true); mem_cgroup_charge_statistics(mem, pc, false); @@ -1928,6 +2094,50 @@ void mem_cgroup_uncharge_cache_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } +/* + * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. + * In that cases, pages are freed continuously and we can expect pages + * are in the same memcg. All these calls itself limits the number of + * pages freed at once, then uncharge_start/end() is called properly. + * This may be called prural(2) times in a context, + */ + +void mem_cgroup_uncharge_start(void) +{ + current->memcg_batch.do_batch++; + /* We can do nest. */ + if (current->memcg_batch.do_batch == 1) { + current->memcg_batch.memcg = NULL; + current->memcg_batch.bytes = 0; + current->memcg_batch.memsw_bytes = 0; + } +} + +void mem_cgroup_uncharge_end(void) +{ + struct memcg_batch_info *batch = ¤t->memcg_batch; + + if (!batch->do_batch) + return; + + batch->do_batch--; + if (batch->do_batch) /* If stacked, do nothing. */ + return; + + if (!batch->memcg) + return; + /* + * This "batch->memcg" is valid without any css_get/put etc... + * bacause we hide charges behind us. + */ + if (batch->bytes) + res_counter_uncharge(&batch->memcg->res, batch->bytes); + if (batch->memsw_bytes) + res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes); + /* forget this pointer (for sanity check) */ + batch->memcg = NULL; +} + #ifdef CONFIG_SWAP /* * called after __delete_from_swap_cache() and drop "page" account. @@ -2103,7 +2313,6 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) { int retry_count; - int progress; u64 memswlimit; int ret = 0; int children = mem_cgroup_count_children(memcg); @@ -2147,8 +2356,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, if (!ret) break; - progress = mem_cgroup_hierarchical_reclaim(memcg, NULL, - GFP_KERNEL, + mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ @@ -2387,6 +2595,7 @@ move_account: goto out; /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); + drain_all_stock_sync(); ret = 0; for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { @@ -2544,6 +2753,7 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) val += idx_val; mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_SWAPOUT, &idx_val); + val += idx_val; val <<= PAGE_SHIFT; } else val = res_counter_read_u64(&mem->memsw, name); @@ -2663,7 +2873,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) enum { MCS_CACHE, MCS_RSS, - MCS_MAPPED_FILE, + MCS_FILE_MAPPED, MCS_PGPGIN, MCS_PGPGOUT, MCS_SWAP, @@ -2707,8 +2917,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) s->stat[MCS_CACHE] += val * PAGE_SIZE; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); s->stat[MCS_RSS] += val * PAGE_SIZE; - val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE); - s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE; + val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED); + s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); s->stat[MCS_PGPGIN] += val; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); @@ -3100,11 +3310,18 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) /* root ? */ if (cont->parent == NULL) { + int cpu; enable_swap_cgroup(); parent = NULL; root_mem_cgroup = mem; if (mem_cgroup_soft_limit_tree_init()) goto free_out; + for_each_possible_cpu(cpu) { + struct memcg_stock_pcp *stock = + &per_cpu(memcg_stock, cpu); + INIT_WORK(&stock->work, drain_local_stock); + } + hotcpu_notifier(memcg_stock_cpu_callback, 0); } else { parent = mem_cgroup_from_cont(cont->parent); @@ -3173,12 +3390,10 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct task_struct *p, bool threadgroup) { - mutex_lock(&memcg_tasklist); /* * FIXME: It's better to move charges of this process from old * memcg to new memcg. But it's just on TODO-List now. */ - mutex_unlock(&memcg_tasklist); } struct cgroup_subsys mem_cgroup_subsys = { diff --git a/mm/memory.c b/mm/memory.c index db09106ed44b..09e4b1be7b67 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -956,6 +956,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, details = NULL; BUG_ON(addr >= end); + mem_cgroup_uncharge_start(); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { @@ -968,6 +969,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, zap_work, details); } while (pgd++, addr = next, (addr != end && *zap_work > 0)); tlb_end_vma(tlb, vma); + mem_cgroup_uncharge_end(); return addr; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 492c98624fc1..f52481b1c1e5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) /* * Determine the type of allocation constraint. */ -static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, - gfp_t gfp_mask) -{ #ifdef CONFIG_NUMA +static enum oom_constraint constrained_alloc(struct zonelist *zonelist, + gfp_t gfp_mask, nodemask_t *nodemask) +{ struct zone *zone; struct zoneref *z; enum zone_type high_zoneidx = gfp_zone(gfp_mask); - nodemask_t nodes = node_states[N_HIGH_MEMORY]; - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) - if (cpuset_zone_allowed_softwall(zone, gfp_mask)) - node_clear(zone_to_nid(zone), nodes); - else - return CONSTRAINT_CPUSET; + /* + * Reach here only when __GFP_NOFAIL is used. So, we should avoid + * to kill current.We have to random task kill in this case. + * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. + */ + if (gfp_mask & __GFP_THISNODE) + return CONSTRAINT_NONE; - if (!nodes_empty(nodes)) + /* + * The nodemask here is a nodemask passed to alloc_pages(). Now, + * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy + * feature. mempolicy is an only user of nodemask here. + * check mempolicy's nodemask contains all N_HIGH_MEMORY + */ + if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) return CONSTRAINT_MEMORY_POLICY; -#endif + + /* Check this allocation failure is caused by cpuset's wall function */ + for_each_zone_zonelist_nodemask(zone, z, zonelist, + high_zoneidx, nodemask) + if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) + return CONSTRAINT_CPUSET; return CONSTRAINT_NONE; } +#else +static enum oom_constraint constrained_alloc(struct zonelist *zonelist, + gfp_t gfp_mask, nodemask_t *nodemask) +{ + return CONSTRAINT_NONE; +} +#endif /* * Simple selection loop. We chose the process with the highest @@ -337,7 +356,8 @@ static void dump_tasks(const struct mem_cgroup *mem) } while_each_thread(g, p); } -static void dump_header(gfp_t gfp_mask, int order, struct mem_cgroup *mem) +static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, + struct mem_cgroup *mem) { pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " "oom_adj=%d\n", @@ -346,12 +366,14 @@ static void dump_header(gfp_t gfp_mask, int order, struct mem_cgroup *mem) cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); - mem_cgroup_print_oom_info(mem, current); + mem_cgroup_print_oom_info(mem, p); show_mem(); if (sysctl_oom_dump_tasks) dump_tasks(mem); } +#define K(x) ((x) << (PAGE_SHIFT-10)) + /* * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO @@ -365,15 +387,23 @@ static void __oom_kill_task(struct task_struct *p, int verbose) return; } + task_lock(p); if (!p->mm) { WARN_ON(1); - printk(KERN_WARNING "tried to kill an mm-less task!\n"); + printk(KERN_WARNING "tried to kill an mm-less task %d (%s)!\n", + task_pid_nr(p), p->comm); + task_unlock(p); return; } if (verbose) - printk(KERN_ERR "Killed process %d (%s)\n", - task_pid_nr(p), p->comm); + printk(KERN_ERR "Killed process %d (%s) " + "vsz:%lukB, anon-rss:%lukB, file-rss:%lukB\n", + task_pid_nr(p), p->comm, + K(p->mm->total_vm), + K(get_mm_counter(p->mm, anon_rss)), + K(get_mm_counter(p->mm, file_rss))); + task_unlock(p); /* * We give our sacrificial lamb high priority and access to @@ -411,7 +441,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, struct task_struct *c; if (printk_ratelimit()) - dump_header(gfp_mask, order, mem); + dump_header(p, gfp_mask, order, mem); /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -547,7 +577,7 @@ retry: /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { read_unlock(&tasklist_lock); - dump_header(gfp_mask, order, NULL); + dump_header(NULL, gfp_mask, order, NULL); panic("Out of memory and no killable processes...\n"); } @@ -603,7 +633,8 @@ rest_and_return: * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) +void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, + int order, nodemask_t *nodemask) { unsigned long freed = 0; enum oom_constraint constraint; @@ -614,7 +645,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) return; if (sysctl_panic_on_oom == 2) { - dump_header(gfp_mask, order, NULL); + dump_header(NULL, gfp_mask, order, NULL); panic("out of memory. Compulsory panic_on_oom is selected.\n"); } @@ -622,7 +653,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ - constraint = constrained_alloc(zonelist, gfp_mask); + constraint = constrained_alloc(zonelist, gfp_mask, nodemask); read_lock(&tasklist_lock); switch (constraint) { @@ -633,7 +664,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) case CONSTRAINT_NONE: if (sysctl_panic_on_oom) { - dump_header(gfp_mask, order, NULL); + dump_header(NULL, gfp_mask, order, NULL); panic("out of memory. panic_on_oom is selected\n"); } /* Fall-through */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6867b4d391fd..74af449b1f1d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1654,12 +1654,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, if (page) goto out; - /* The OOM killer will not help higher order allocs */ - if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) - goto out; - + if (!(gfp_mask & __GFP_NOFAIL)) { + /* The OOM killer will not help higher order allocs */ + if (order > PAGE_ALLOC_COSTLY_ORDER) + goto out; + /* + * GFP_THISNODE contains __GFP_NORETRY and we never hit this. + * Sanity check for bare calls of __GFP_THISNODE, not real OOM. + * The caller should handle page allocation failure by itself if + * it specifies __GFP_THISNODE. + * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. + */ + if (gfp_mask & __GFP_THISNODE) + goto out; + } /* Exhausted what can be done so it's blamo time */ - out_of_memory(zonelist, gfp_mask, order); + out_of_memory(zonelist, gfp_mask, order, nodemask); out: clear_zonelist_oom(zonelist, gfp_mask); @@ -3123,7 +3133,7 @@ static int __cpuinit process_zones(int cpu) if (percpu_pagelist_fraction) setup_pagelist_highmark(zone_pcp(zone, cpu), - (zone->present_pages / percpu_pagelist_fraction)); + (zone->present_pages / percpu_pagelist_fraction)); } return 0; diff --git a/mm/rmap.c b/mm/rmap.c index 98135dbd25ba..278cd277bdec 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -721,7 +721,7 @@ void page_add_file_rmap(struct page *page) { if (atomic_inc_and_test(&page->_mapcount)) { __inc_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_update_mapped_file_stat(page, 1); + mem_cgroup_update_file_mapped(page, 1); } } @@ -753,8 +753,8 @@ void page_remove_rmap(struct page *page) __dec_zone_page_state(page, NR_ANON_PAGES); } else { __dec_zone_page_state(page, NR_FILE_MAPPED); + mem_cgroup_update_file_mapped(page, -1); } - mem_cgroup_update_mapped_file_stat(page, -1); /* * It would be tidy to reset the PageAnon mapping here, * but that might overwrite a racing page_add_anon_rmap diff --git a/mm/shmem.c b/mm/shmem.c index 4fb41c83daca..f8485062f3ba 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -29,7 +29,6 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/swap.h> -#include <linux/ima.h> static struct vfsmount *shm_mnt; @@ -42,6 +41,7 @@ static struct vfsmount *shm_mnt; #include <linux/xattr.h> #include <linux/exportfs.h> +#include <linux/posix_acl.h> #include <linux/generic_acl.h> #include <linux/mman.h> #include <linux/string.h> @@ -810,7 +810,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) error = inode_setattr(inode, attr); #ifdef CONFIG_TMPFS_POSIX_ACL if (!error && (attr->ia_valid & ATTR_MODE)) - error = generic_acl_chmod(inode, &shmem_acl_ops); + error = generic_acl_chmod(inode); #endif if (page) page_cache_release(page); @@ -1824,11 +1824,13 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) return error; } } - error = shmem_acl_init(inode, dir); +#ifdef CONFIG_TMPFS_POSIX_ACL + error = generic_acl_init(inode, dir); if (error) { iput(inode); return error; } +#endif if (dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; if (S_ISDIR(mode)) @@ -2043,27 +2045,28 @@ static const struct inode_operations shmem_symlink_inode_operations = { * filesystem level, though. */ -static size_t shmem_xattr_security_list(struct inode *inode, char *list, +static size_t shmem_xattr_security_list(struct dentry *dentry, char *list, size_t list_len, const char *name, - size_t name_len) + size_t name_len, int handler_flags) { - return security_inode_listsecurity(inode, list, list_len); + return security_inode_listsecurity(dentry->d_inode, list, list_len); } -static int shmem_xattr_security_get(struct inode *inode, const char *name, - void *buffer, size_t size) +static int shmem_xattr_security_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int handler_flags) { if (strcmp(name, "") == 0) return -EINVAL; - return xattr_getsecurity(inode, name, buffer, size); + return xattr_getsecurity(dentry->d_inode, name, buffer, size); } -static int shmem_xattr_security_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +static int shmem_xattr_security_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int handler_flags) { if (strcmp(name, "") == 0) return -EINVAL; - return security_inode_setsecurity(inode, name, value, size, flags); + return security_inode_setsecurity(dentry->d_inode, name, value, + size, flags); } static struct xattr_handler shmem_xattr_security_handler = { @@ -2074,8 +2077,8 @@ static struct xattr_handler shmem_xattr_security_handler = { }; static struct xattr_handler *shmem_xattr_handlers[] = { - &shmem_xattr_acl_access_handler, - &shmem_xattr_acl_default_handler, + &generic_acl_access_handler, + &generic_acl_default_handler, &shmem_xattr_security_handler, NULL }; @@ -2454,7 +2457,7 @@ static const struct inode_operations shmem_inode_operations = { .getxattr = generic_getxattr, .listxattr = generic_listxattr, .removexattr = generic_removexattr, - .check_acl = shmem_check_acl, + .check_acl = generic_check_acl, #endif }; @@ -2477,7 +2480,7 @@ static const struct inode_operations shmem_dir_inode_operations = { .getxattr = generic_getxattr, .listxattr = generic_listxattr, .removexattr = generic_removexattr, - .check_acl = shmem_check_acl, + .check_acl = generic_check_acl, #endif }; @@ -2488,7 +2491,7 @@ static const struct inode_operations shmem_special_inode_operations = { .getxattr = generic_getxattr, .listxattr = generic_listxattr, .removexattr = generic_removexattr, - .check_acl = shmem_check_acl, + .check_acl = generic_check_acl, #endif }; @@ -2626,7 +2629,8 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags int error; struct file *file; struct inode *inode; - struct dentry *dentry, *root; + struct path path; + struct dentry *root; struct qstr this; if (IS_ERR(shm_mnt)) @@ -2643,38 +2647,35 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags this.len = strlen(name); this.hash = 0; /* will go */ root = shm_mnt->mnt_root; - dentry = d_alloc(root, &this); - if (!dentry) + path.dentry = d_alloc(root, &this); + if (!path.dentry) goto put_memory; - - error = -ENFILE; - file = get_empty_filp(); - if (!file) - goto put_dentry; + path.mnt = mntget(shm_mnt); error = -ENOSPC; inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags); if (!inode) - goto close_file; + goto put_dentry; - d_instantiate(dentry, inode); + d_instantiate(path.dentry, inode); inode->i_size = size; inode->i_nlink = 0; /* It is unlinked */ - init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ, - &shmem_file_operations); - #ifndef CONFIG_MMU error = ramfs_nommu_expand_for_mapping(inode, size); if (error) - goto close_file; + goto put_dentry; #endif - ima_counts_get(file); + + error = -ENFILE; + file = alloc_file(&path, FMODE_WRITE | FMODE_READ, + &shmem_file_operations); + if (!file) + goto put_dentry; + return file; -close_file: - put_filp(file); put_dentry: - dput(dentry); + path_put(&path); put_memory: shmem_unacct_size(flags, size); return ERR_PTR(error); diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c deleted file mode 100644 index df2c87fdae50..000000000000 --- a/mm/shmem_acl.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * mm/shmem_acl.c - * - * (C) 2005 Andreas Gruenbacher <agruen@suse.de> - * - * This file is released under the GPL. - */ - -#include <linux/fs.h> -#include <linux/shmem_fs.h> -#include <linux/xattr.h> -#include <linux/generic_acl.h> - -/** - * shmem_get_acl - generic_acl_operations->getacl() operation - */ -static struct posix_acl * -shmem_get_acl(struct inode *inode, int type) -{ - struct posix_acl *acl = NULL; - - spin_lock(&inode->i_lock); - switch(type) { - case ACL_TYPE_ACCESS: - acl = posix_acl_dup(inode->i_acl); - break; - - case ACL_TYPE_DEFAULT: - acl = posix_acl_dup(inode->i_default_acl); - break; - } - spin_unlock(&inode->i_lock); - - return acl; -} - -/** - * shmem_set_acl - generic_acl_operations->setacl() operation - */ -static void -shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) -{ - struct posix_acl *free = NULL; - - spin_lock(&inode->i_lock); - switch(type) { - case ACL_TYPE_ACCESS: - free = inode->i_acl; - inode->i_acl = posix_acl_dup(acl); - break; - - case ACL_TYPE_DEFAULT: - free = inode->i_default_acl; - inode->i_default_acl = posix_acl_dup(acl); - break; - } - spin_unlock(&inode->i_lock); - posix_acl_release(free); -} - -struct generic_acl_operations shmem_acl_ops = { - .getacl = shmem_get_acl, - .setacl = shmem_set_acl, -}; - -/** - * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access, - * shmem_xattr_acl_access_handler - plumbing code to implement the - * system.posix_acl_access xattr using the generic acl functions. - */ - -static size_t -shmem_list_acl_access(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, - list, list_size); -} - -static int -shmem_get_acl_access(struct inode *inode, const char *name, void *buffer, - size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer, - size); -} - -static int -shmem_set_acl_access(struct inode *inode, const char *name, const void *value, - size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value, - size); -} - -struct xattr_handler shmem_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .list = shmem_list_acl_access, - .get = shmem_get_acl_access, - .set = shmem_set_acl_access, -}; - -/** - * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default, - * shmem_xattr_acl_default_handler - plumbing code to implement the - * system.posix_acl_default xattr using the generic acl functions. - */ - -static size_t -shmem_list_acl_default(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, - list, list_size); -} - -static int -shmem_get_acl_default(struct inode *inode, const char *name, void *buffer, - size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer, - size); -} - -static int -shmem_set_acl_default(struct inode *inode, const char *name, const void *value, - size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value, - size); -} - -struct xattr_handler shmem_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .list = shmem_list_acl_default, - .get = shmem_get_acl_default, - .set = shmem_set_acl_default, -}; - -/** - * shmem_acl_init - Inizialize the acl(s) of a new inode - */ -int -shmem_acl_init(struct inode *inode, struct inode *dir) -{ - return generic_acl_init(inode, dir, &shmem_acl_ops); -} - -/** - * shmem_check_acl - check_acl() callback for generic_permission() - */ -int -shmem_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS); - - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - return -EAGAIN; -} diff --git a/mm/truncate.c b/mm/truncate.c index 2c147a7e5f2c..342deee22684 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -272,6 +272,7 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_release(&pvec); break; } + mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -286,6 +287,7 @@ void truncate_inode_pages_range(struct address_space *mapping, unlock_page(page); } pagevec_release(&pvec); + mem_cgroup_uncharge_end(); } } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -327,6 +329,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, pagevec_init(&pvec, 0); while (next <= end && pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; pgoff_t index; @@ -354,6 +357,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, break; } pagevec_release(&pvec); + mem_cgroup_uncharge_end(); cond_resched(); } return ret; @@ -428,6 +432,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, while (next <= end && !wrapped && pagevec_lookup(&pvec, mapping, next, min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; pgoff_t page_index; @@ -477,6 +482,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, unlock_page(page); } pagevec_release(&pvec); + mem_cgroup_uncharge_end(); cond_resched(); } return ret; |