From 894bc310419ac95f4fa4142dc364401a7e607f65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:39 -0700 Subject: Unevictable LRU Infrastructure When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Benjamin Kidwell Signed-off-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) (limited to 'mm/swap.c') diff --git a/mm/swap.c b/mm/swap.c index 0b1974a08974..fee6b973f143 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -115,7 +115,7 @@ static void pagevec_move_tail(struct pagevec *pvec) zone = pagezone; spin_lock(&zone->lru_lock); } - if (PageLRU(page) && !PageActive(page)) { + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int lru = page_is_file_cache(page); list_move_tail(&page->lru, &zone->lru[lru].list); pgmoved++; @@ -136,7 +136,7 @@ static void pagevec_move_tail(struct pagevec *pvec) void rotate_reclaimable_page(struct page *page) { if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && - PageLRU(page)) { + !PageUnevictable(page) && PageLRU(page)) { struct pagevec *pvec; unsigned long flags; @@ -157,7 +157,7 @@ void activate_page(struct page *page) struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); - if (PageLRU(page) && !PageActive(page)) { + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int file = page_is_file_cache(page); int lru = LRU_BASE + file; del_page_from_lru_list(zone, page, lru); @@ -166,7 +166,7 @@ void activate_page(struct page *page) lru += LRU_ACTIVE; add_page_to_lru_list(zone, page, lru); __count_vm_event(PGACTIVATE); - mem_cgroup_move_lists(page, true); + mem_cgroup_move_lists(page, lru); zone->recent_rotated[!!file]++; zone->recent_scanned[!!file]++; @@ -183,7 +183,8 @@ void activate_page(struct page *page) */ void mark_page_accessed(struct page *page) { - if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { + if (!PageActive(page) && !PageUnevictable(page) && + PageReferenced(page) && PageLRU(page)) { activate_page(page); ClearPageReferenced(page); } else if (!PageReferenced(page)) { @@ -211,13 +212,38 @@ void __lru_cache_add(struct page *page, enum lru_list lru) void lru_cache_add_lru(struct page *page, enum lru_list lru) { if (PageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); ClearPageActive(page); + } else if (PageUnevictable(page)) { + VM_BUG_ON(PageActive(page)); + ClearPageUnevictable(page); } - VM_BUG_ON(PageLRU(page) || PageActive(page)); + VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); __lru_cache_add(page, lru); } +/** + * add_page_to_unevictable_list - add a page to the unevictable list + * @page: the page to be added to the unevictable list + * + * Add page directly to its zone's unevictable list. To avoid races with + * tasks that might be making the page evictable, through eg. munlock, + * munmap or exit, while it's not on the lru, we want to add the page + * while it's locked or otherwise "invisible" to other tasks. This is + * difficult to do when using the pagevec cache, so bypass that. + */ +void add_page_to_unevictable_list(struct page *page) +{ + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + SetPageUnevictable(page); + SetPageLRU(page); + add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); + spin_unlock_irq(&zone->lru_lock); +} + /* * Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been @@ -316,6 +342,7 @@ void release_pages(struct page **pages, int nr, int cold) if (PageLRU(page)) { struct zone *pagezone = page_zone(page); + if (pagezone != zone) { if (zone) spin_unlock_irqrestore(&zone->lru_lock, @@ -392,6 +419,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { int i; struct zone *zone = NULL; + VM_BUG_ON(is_unevictable_lru(lru)); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; @@ -403,6 +431,8 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); if (is_active_lru(lru)) -- cgit v1.2.3