From 390722baa7fc447b0a4f0c3c3f537ed056dbc944 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:38:57 -0800 Subject: mm: don't mark_page_accessed in shmem_fault Following "mm: don't mark_page_accessed in fault path", which now places a mark_page_accessed() in zap_pte_range(), we should remove the mark_page_accessed() from shmem_fault(). Signed-off-by: Hugh Dickins Cc: Nick Piggin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index f1b0d4871f3a..24f18fdee6e3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1444,7 +1444,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); - mark_page_accessed(vmf->page); return ret | VM_FAULT_LOCKED; } -- cgit v1.2.3 From 853ac43ab194f5051b27a55060215d696dc9480d Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Tue, 6 Jan 2009 14:40:20 -0800 Subject: shmem: unify regular and tiny shmem tiny-shmem shares most of its 130 lines of code with shmem and tends to break when particular bits of shmem get modified. Unifying saves code and makes keeping these two in sync much easier. before: 14367 392 24 14783 39bf mm/shmem.o 396 72 8 476 1dc mm/tiny-shmem.o after: 14367 392 24 14783 39bf mm/shmem.o 412 72 8 492 1ec mm/shmem.o tiny Signed-off-by: Matt Mackall Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 10 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 24f18fdee6e3..5941f9801363 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -14,31 +14,39 @@ * Copyright (c) 2004, Luke Kenneth Casson Leighton * Copyright (c) 2004 Red Hat, Inc., James Morris * + * tiny-shmem: + * Copyright (c) 2004, 2008 Matt Mackall + * * This file is released under the GPL. */ +#include +#include +#include +#include +#include +#include +#include +#include + +static struct vfsmount *shm_mnt; + +#ifdef CONFIG_SHMEM /* * This virtual memory filesystem is heavily based on the ramfs. It * extends ramfs by the ability to use swap and honor resource limits * which makes it a completely usable filesystem. */ -#include -#include -#include #include #include #include -#include #include -#include -#include #include #include #include #include #include -#include #include #include #include @@ -2485,7 +2493,6 @@ static struct file_system_type tmpfs_fs_type = { .get_sb = shmem_get_sb, .kill_sb = kill_litter_super, }; -static struct vfsmount *shm_mnt; static int __init init_tmpfs(void) { @@ -2524,7 +2531,51 @@ out4: shm_mnt = ERR_PTR(error); return error; } -module_init(init_tmpfs) + +#else /* !CONFIG_SHMEM */ + +/* + * tiny-shmem: simple shmemfs and tmpfs using ramfs code + * + * This is intended for small system where the benefits of the full + * shmem code (swap-backed and resource-limited) are outweighed by + * their complexity. On systems without swap this code should be + * effectively equivalent, but much lighter weight. + */ + +#include + +static struct file_system_type tmpfs_fs_type = { + .name = "tmpfs", + .get_sb = ramfs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init init_tmpfs(void) +{ + BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); + + shm_mnt = kern_mount(&tmpfs_fs_type); + BUG_ON(IS_ERR(shm_mnt)); + + return 0; +} + +int shmem_unuse(swp_entry_t entry, struct page *page) +{ + return 0; +} + +#define shmem_file_operations ramfs_file_operations +#define shmem_vm_ops generic_file_vm_ops +#define shmem_get_inode ramfs_get_inode +#define shmem_acct_size(a, b) 0 +#define shmem_unacct_size(a, b) do {} while (0) +#define SHMEM_MAX_BYTES LLONG_MAX + +#endif /* CONFIG_SHMEM */ + +/* common code */ /** * shmem_file_setup - get an unlinked file living in tmpfs @@ -2568,12 +2619,20 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) if (!inode) goto close_file; +#ifdef CONFIG_SHMEM SHMEM_I(inode)->flags = flags & VM_ACCOUNT; +#endif d_instantiate(dentry, inode); inode->i_size = size; inode->i_nlink = 0; /* It is unlinked */ init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ, - &shmem_file_operations); + &shmem_file_operations); + +#ifndef CONFIG_MMU + error = ramfs_nommu_expand_for_mapping(inode, size); + if (error) + goto close_file; +#endif return file; close_file: @@ -2605,3 +2664,5 @@ int shmem_zero_setup(struct vm_area_struct *vma) vma->vm_ops = &shmem_vm_ops; return 0; } + +module_init(init_tmpfs) -- cgit v1.2.3 From bced0520fe462bb94021dcabd32e99630c171be2 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:49 -0800 Subject: memcg: fix gfp_mask of callers of charge Fix misuse of gfp_kernel. Now, most of callers of mem_cgroup_charge_xxx functions uses GFP_KERNEL. I think that this is from the fact that page_cgroup *was* dynamically allocated. But now, we allocate all page_cgroup at boot. And mem_cgroup_try_to_free_pages() reclaim memory from GFP_HIGHUSER_MOVABLE + specified GFP_RECLAIM_MASK. * This is because we just want to reduce memory usage. "Where we should reclaim from ?" is not a problem in memcg. This patch modifies gfp masks to be GFP_HIGUSER_MOVABLE if possible. Note: This patch is not for fixing behavior but for showing sane information in source code. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 5941f9801363..bd9b4ea307b2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -928,8 +928,8 @@ found: error = 1; if (!inode) goto out; - /* Precharge page using GFP_KERNEL while we can wait */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); + /* Charge page using GFP_HIGHUSER_MOVABLE while we can wait */ + error = mem_cgroup_cache_charge(page, current->mm, GFP_HIGHUSER_MOVABLE); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); @@ -1379,7 +1379,7 @@ repeat: /* Precharge page while we can wait, compensate after */ error = mem_cgroup_cache_charge(filepage, current->mm, - gfp & ~__GFP_HIGHMEM); + GFP_HIGHUSER_MOVABLE); if (error) { page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); -- cgit v1.2.3 From d13d144309d2e5a3e6ad978b16c1d0226ddc9231 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:56 -0800 Subject: memcg: handle swap caches SwapCache support for memory resource controller (memcg) Before mem+swap controller, memcg itself should handle SwapCache in proper way. This is cut-out from it. In current memcg, SwapCache is just leaked and the user can create tons of SwapCache. This is a leak of account and should be handled. SwapCache accounting is done as following. charge (anon) - charged when it's mapped. (because of readahead, charge at add_to_swap_cache() is not sane) uncharge (anon) - uncharged when it's dropped from swapcache and fully unmapped. means it's not uncharged at unmap. Note: delete from swap cache at swap-in is done after rmap information is established. charge (shmem) - charged at swap-in. this prevents charge at add_to_page_cache(). uncharge (shmem) - uncharged when it's dropped from swapcache and not on shmem's radix-tree. at migration, check against 'old page' is modified to handle shmem. Comparing to the old version discussed (and caused troubles), we have advantages of - PCG_USED bit. - simple migrating handling. So, situation is much easier than several months ago, maybe. [hugh@veritas.com: memcg: handle swap caches build fix] Reviewed-by: Daisuke Nishimura Tested-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index bd9b4ea307b2..adf5c3eedbc9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -928,8 +928,12 @@ found: error = 1; if (!inode) goto out; - /* Charge page using GFP_HIGHUSER_MOVABLE while we can wait */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_HIGHUSER_MOVABLE); + /* + * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. + * charged back to the user(not to caller) when swap account is used. + */ + error = mem_cgroup_cache_charge_swapin(page, + current->mm, GFP_HIGHUSER_MOVABLE, true); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); @@ -1266,6 +1270,16 @@ repeat: goto repeat; } wait_on_page_locked(swappage); + /* + * We want to avoid charge at add_to_page_cache(). + * charge against this swap cache here. + */ + if (mem_cgroup_cache_charge_swapin(swappage, + current->mm, gfp, false)) { + page_cache_release(swappage); + error = -ENOMEM; + goto failed; + } page_cache_release(swappage); goto repeat; } -- cgit v1.2.3 From 2c26fdd70c3094fa3e84caf9ef434911933d5477 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:10 -0800 Subject: memcg: revert gfp mask fix My patch, memcg-fix-gfp_mask-of-callers-of-charge.patch changed gfp_mask of callers of charge to be GFP_HIGHUSER_MOVABLE for showing what will happen at memory reclaim. But in recent discussion, it's NACKed because it sounds ugly. This patch is for reverting it and add some clean up to gfp_mask of callers of charge. No behavior change but need review before generating HUNK in deep queue. This patch also adds explanation to meaning of gfp_mask passed to charge functions in memcontrol.h. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Daisuke Nishimura Cc: Hugh Dickins Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index adf5c3eedbc9..bbb7b043c986 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -932,8 +932,8 @@ found: * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. * charged back to the user(not to caller) when swap account is used. */ - error = mem_cgroup_cache_charge_swapin(page, - current->mm, GFP_HIGHUSER_MOVABLE, true); + error = mem_cgroup_cache_charge_swapin(page, current->mm, GFP_KERNEL, + true); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); @@ -1275,7 +1275,7 @@ repeat: * charge against this swap cache here. */ if (mem_cgroup_cache_charge_swapin(swappage, - current->mm, gfp, false)) { + current->mm, gfp & GFP_RECLAIM_MASK, false)) { page_cache_release(swappage); error = -ENOMEM; goto failed; @@ -1393,7 +1393,7 @@ repeat: /* Precharge page while we can wait, compensate after */ error = mem_cgroup_cache_charge(filepage, current->mm, - GFP_HIGHUSER_MOVABLE); + GFP_KERNEL); if (error) { page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); -- cgit v1.2.3 From b5a84319a4343a0db753436fd8147e61eaafa7ea Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:08:35 -0800 Subject: memcg: fix shmem's swap accounting Now, you can see following even when swap accounting is enabled. 1. Create Group 01, and 02. 2. allocate a "file" on tmpfs by a task under 01. 3. swap out the "file" (by memory pressure) 4. Read "file" from a task in group 02. 5. the charge of "file" is moved to group 02. This is not ideal behavior. This is because SwapCache which was loaded by read-ahead is not taken into account.. This is a patch to fix shmem's swapcache behavior. - remove mem_cgroup_cache_charge_swapin(). - Add SwapCache handler routine to mem_cgroup_cache_charge(). By this, shmem's file cache is charged at add_to_page_cache() with GFP_NOWAIT. - pass the page of swapcache to shrink_mem_cgroup. Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Paul Menage Cc: Li Zefan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index bbb7b043c986..5d0de96c9789 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -929,11 +929,11 @@ found: if (!inode) goto out; /* - * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. - * charged back to the user(not to caller) when swap account is used. + * Charge page using GFP_KERNEL while we can wait. + * Charged back to the user(not to caller) when swap account is used. + * add_to_page_cache() will be called with GFP_NOWAIT. */ - error = mem_cgroup_cache_charge_swapin(page, current->mm, GFP_KERNEL, - true); + error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); @@ -1270,16 +1270,6 @@ repeat: goto repeat; } wait_on_page_locked(swappage); - /* - * We want to avoid charge at add_to_page_cache(). - * charge against this swap cache here. - */ - if (mem_cgroup_cache_charge_swapin(swappage, - current->mm, gfp & GFP_RECLAIM_MASK, false)) { - page_cache_release(swappage); - error = -ENOMEM; - goto failed; - } page_cache_release(swappage); goto repeat; } @@ -1334,15 +1324,19 @@ repeat: } else { shmem_swp_unmap(entry); spin_unlock(&info->lock); - unlock_page(swappage); - page_cache_release(swappage); if (error == -ENOMEM) { /* allow reclaim from this memory cgroup */ - error = mem_cgroup_shrink_usage(current->mm, + error = mem_cgroup_shrink_usage(swappage, + current->mm, gfp); - if (error) + if (error) { + unlock_page(swappage); + page_cache_release(swappage); goto failed; + } } + unlock_page(swappage); + page_cache_release(swappage); goto repeat; } } else if (sgp == SGP_READ && !filepage) { -- cgit v1.2.3 From fc8744adc870a8d4366908221508bb113d8b72ee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 31 Jan 2009 15:08:56 -0800 Subject: Stop playing silly games with the VM_ACCOUNT flag The mmap_region() code would temporarily set the VM_ACCOUNT flag for anonymous shared mappings just to inform shmem_zero_setup() that it should enable accounting for the resulting shm object. It would then clear the flag after calling ->mmap (for the /dev/zero case) or doing shmem_zero_setup() (for the MAP_ANON case). This just resulted in vma merge issues, but also made for just unnecessary confusion. Use the already-existing VM_NORESERVE flag for this instead, and let shmem_{zero|file}_setup() just figure it out from that. This also happens to make it obvious that the new DRI2 GEM layer uses a non-reserving backing store for its object allocation - which is quite possibly not intentional. But since I didn't want to change semantics in this patch, I left it alone, and just updated the caller to use the new flag semantics. Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 5d0de96c9789..19d566ccdeea 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2628,7 +2628,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) goto close_file; #ifdef CONFIG_SHMEM - SHMEM_I(inode)->flags = flags & VM_ACCOUNT; + SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT; #endif d_instantiate(dentry, inode); inode->i_size = size; -- cgit v1.2.3