diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-11 18:05:37 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-11 18:05:37 -0800 |
commit | 608ff1a210ab0e8b969399039bf8e18693605910 (patch) | |
tree | faea7bb1764461c73d0953089bd5439d91733a03 /include | |
parent | 414a6750e59b0b687034764c464e9ddecac0f7a6 (diff) | |
parent | 74d42d8fe146e870c52bde3b1c692f86cc8ff844 (diff) |
Merge branch 'akpm' (Andrew's patchbomb)
Merge misc updates from Andrew Morton:
"About half of most of MM. Going very early this time due to
uncertainty over the coreautounifiednumasched things. I'll send the
other half of most of MM tomorrow. The rest of MM awaits a slab merge
from Pekka."
* emailed patches from Andrew Morton: (71 commits)
memory_hotplug: ensure every online node has NORMAL memory
memory_hotplug: handle empty zone when online_movable/online_kernel
mm, memory-hotplug: dynamic configure movable memory and portion memory
drivers/base/node.c: cleanup node_state_attr[]
bootmem: fix wrong call parameter for free_bootmem()
avr32, kconfig: remove HAVE_ARCH_BOOTMEM
mm: cma: remove watermark hacks
mm: cma: skip watermarks check for already isolated blocks in split_free_page()
mm, oom: fix race when specifying a thread as the oom origin
mm, oom: change type of oom_score_adj to short
mm: cleanup register_node()
mm, mempolicy: remove duplicate code
mm/vmscan.c: try_to_freeze() returns boolean
mm: introduce putback_movable_pages()
virtio_balloon: introduce migration primitives to balloon pages
mm: introduce compaction and migration for ballooned pages
mm: introduce a common interface for balloon pages mobility
mm: redefine address_space.assoc_mapping
mm: adjust address_space_operations.migratepage() return code
arch/sparc/kernel/sys_sparc_64.c: s/COLOUR/COLOR/
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/balloon_compaction.h | 272 | ||||
-rw-r--r-- | include/linux/bootmem.h | 4 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/gfp.h | 2 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 4 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 7 | ||||
-rw-r--r-- | include/linux/kernel.h | 14 | ||||
-rw-r--r-- | include/linux/memory.h | 1 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 13 | ||||
-rw-r--r-- | include/linux/migrate.h | 19 | ||||
-rw-r--r-- | include/linux/mm.h | 31 | ||||
-rw-r--r-- | include/linux/mm_types.h | 19 | ||||
-rw-r--r-- | include/linux/mmzone.h | 9 | ||||
-rw-r--r-- | include/linux/node.h | 3 | ||||
-rw-r--r-- | include/linux/oom.h | 21 | ||||
-rw-r--r-- | include/linux/page-isolation.h | 10 | ||||
-rw-r--r-- | include/linux/pagemap.h | 16 | ||||
-rw-r--r-- | include/linux/sched.h | 7 | ||||
-rw-r--r-- | include/linux/shm.h | 15 | ||||
-rw-r--r-- | include/linux/types.h | 1 | ||||
-rw-r--r-- | include/linux/writeback.h | 9 | ||||
-rw-r--r-- | include/trace/events/oom.h | 4 | ||||
-rw-r--r-- | include/trace/events/task.h | 8 | ||||
-rw-r--r-- | include/uapi/asm-generic/mman-common.h | 11 | ||||
-rw-r--r-- | include/uapi/asm-generic/mman.h | 2 |
25 files changed, 444 insertions, 60 deletions
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h new file mode 100644 index 000000000000..f7f1d7169b11 --- /dev/null +++ b/include/linux/balloon_compaction.h @@ -0,0 +1,272 @@ +/* + * include/linux/balloon_compaction.h + * + * Common interface definitions for making balloon pages movable by compaction. + * + * Despite being perfectly possible to perform ballooned pages migration, they + * make a special corner case to compaction scans because balloon pages are not + * enlisted at any LRU list like the other pages we do compact / migrate. + * + * As the page isolation scanning step a compaction thread does is a lockless + * procedure (from a page standpoint), it might bring some racy situations while + * performing balloon page compaction. In order to sort out these racy scenarios + * and safely perform balloon's page compaction and migration we must, always, + * ensure following these three simple rules: + * + * i. when updating a balloon's page ->mapping element, strictly do it under + * the following lock order, independently of the far superior + * locking scheme (lru_lock, balloon_lock): + * +-page_lock(page); + * +--spin_lock_irq(&b_dev_info->pages_lock); + * ... page->mapping updates here ... + * + * ii. before isolating or dequeueing a balloon page from the balloon device + * pages list, the page reference counter must be raised by one and the + * extra refcount must be dropped when the page is enqueued back into + * the balloon device page list, thus a balloon page keeps its reference + * counter raised only while it is under our special handling; + * + * iii. after the lockless scan step have selected a potential balloon page for + * isolation, re-test the page->mapping flags and the page ref counter + * under the proper page lock, to ensure isolating a valid balloon page + * (not yet isolated, nor under release procedure) + * + * The functions provided by this interface are placed to help on coping with + * the aforementioned balloon page corner case, as well as to ensure the simple + * set of exposed rules are satisfied while we are dealing with balloon pages + * compaction / migration. + * + * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com> + */ +#ifndef _LINUX_BALLOON_COMPACTION_H +#define _LINUX_BALLOON_COMPACTION_H +#include <linux/pagemap.h> +#include <linux/page-flags.h> +#include <linux/migrate.h> +#include <linux/gfp.h> +#include <linux/err.h> + +/* + * Balloon device information descriptor. + * This struct is used to allow the common balloon compaction interface + * procedures to find the proper balloon device holding memory pages they'll + * have to cope for page compaction / migration, as well as it serves the + * balloon driver as a page book-keeper for its registered balloon devices. + */ +struct balloon_dev_info { + void *balloon_device; /* balloon device descriptor */ + struct address_space *mapping; /* balloon special page->mapping */ + unsigned long isolated_pages; /* # of isolated pages for migration */ + spinlock_t pages_lock; /* Protection to pages list */ + struct list_head pages; /* Pages enqueued & handled to Host */ +}; + +extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); +extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); +extern struct balloon_dev_info *balloon_devinfo_alloc( + void *balloon_dev_descriptor); + +static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info) +{ + kfree(b_dev_info); +} + +/* + * balloon_page_free - release a balloon page back to the page free lists + * @page: ballooned page to be set free + * + * This function must be used to properly set free an isolated/dequeued balloon + * page at the end of a sucessful page migration, or at the balloon driver's + * page release procedure. + */ +static inline void balloon_page_free(struct page *page) +{ + /* + * Balloon pages always get an extra refcount before being isolated + * and before being dequeued to help on sorting out fortuite colisions + * between a thread attempting to isolate and another thread attempting + * to release the very same balloon page. + * + * Before we handle the page back to Buddy, lets drop its extra refcnt. + */ + put_page(page); + __free_page(page); +} + +#ifdef CONFIG_BALLOON_COMPACTION +extern bool balloon_page_isolate(struct page *page); +extern void balloon_page_putback(struct page *page); +extern int balloon_page_migrate(struct page *newpage, + struct page *page, enum migrate_mode mode); +extern struct address_space +*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info, + const struct address_space_operations *a_ops); + +static inline void balloon_mapping_free(struct address_space *balloon_mapping) +{ + kfree(balloon_mapping); +} + +/* + * page_flags_cleared - helper to perform balloon @page ->flags tests. + * + * As balloon pages are obtained from buddy and we do not play with page->flags + * at driver level (exception made when we get the page lock for compaction), + * we can safely identify a ballooned page by checking if the + * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also + * helps us skip ballooned pages that are locked for compaction or release, thus + * mitigating their racy check at balloon_page_movable() + */ +static inline bool page_flags_cleared(struct page *page) +{ + return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP); +} + +/* + * __is_movable_balloon_page - helper to perform @page mapping->flags tests + */ +static inline bool __is_movable_balloon_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + return mapping_balloon(mapping); +} + +/* + * balloon_page_movable - test page->mapping->flags to identify balloon pages + * that can be moved by compaction/migration. + * + * This function is used at core compaction's page isolation scheme, therefore + * most pages exposed to it are not enlisted as balloon pages and so, to avoid + * undesired side effects like racing against __free_pages(), we cannot afford + * holding the page locked while testing page->mapping->flags here. + * + * As we might return false positives in the case of a balloon page being just + * released under us, the page->mapping->flags need to be re-tested later, + * under the proper page lock, at the functions that will be coping with the + * balloon page case. + */ +static inline bool balloon_page_movable(struct page *page) +{ + /* + * Before dereferencing and testing mapping->flags, let's make sure + * this is not a page that uses ->mapping in a different way + */ + if (page_flags_cleared(page) && !page_mapped(page) && + page_count(page) == 1) + return __is_movable_balloon_page(page); + + return false; +} + +/* + * balloon_page_insert - insert a page into the balloon's page list and make + * the page->mapping assignment accordingly. + * @page : page to be assigned as a 'balloon page' + * @mapping : allocated special 'balloon_mapping' + * @head : balloon's device page list head + * + * Caller must ensure the page is locked and the spin_lock protecting balloon + * pages list is held before inserting a page into the balloon device. + */ +static inline void balloon_page_insert(struct page *page, + struct address_space *mapping, + struct list_head *head) +{ + page->mapping = mapping; + list_add(&page->lru, head); +} + +/* + * balloon_page_delete - delete a page from balloon's page list and clear + * the page->mapping assignement accordingly. + * @page : page to be released from balloon's page list + * + * Caller must ensure the page is locked and the spin_lock protecting balloon + * pages list is held before deleting a page from the balloon device. + */ +static inline void balloon_page_delete(struct page *page) +{ + page->mapping = NULL; + list_del(&page->lru); +} + +/* + * balloon_page_device - get the b_dev_info descriptor for the balloon device + * that enqueues the given page. + */ +static inline struct balloon_dev_info *balloon_page_device(struct page *page) +{ + struct address_space *mapping = page->mapping; + if (likely(mapping)) + return mapping->private_data; + + return NULL; +} + +static inline gfp_t balloon_mapping_gfp_mask(void) +{ + return GFP_HIGHUSER_MOVABLE; +} + +static inline bool balloon_compaction_check(void) +{ + return true; +} + +#else /* !CONFIG_BALLOON_COMPACTION */ + +static inline void *balloon_mapping_alloc(void *balloon_device, + const struct address_space_operations *a_ops) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void balloon_mapping_free(struct address_space *balloon_mapping) +{ + return; +} + +static inline void balloon_page_insert(struct page *page, + struct address_space *mapping, + struct list_head *head) +{ + list_add(&page->lru, head); +} + +static inline void balloon_page_delete(struct page *page) +{ + list_del(&page->lru); +} + +static inline bool balloon_page_movable(struct page *page) +{ + return false; +} + +static inline bool balloon_page_isolate(struct page *page) +{ + return false; +} + +static inline void balloon_page_putback(struct page *page) +{ + return; +} + +static inline int balloon_page_migrate(struct page *newpage, + struct page *page, enum migrate_mode mode) +{ + return 0; +} + +static inline gfp_t balloon_mapping_gfp_mask(void) +{ + return GFP_HIGHUSER; +} + +static inline bool balloon_compaction_check(void) +{ + return false; +} +#endif /* CONFIG_BALLOON_COMPACTION */ +#endif /* _LINUX_BALLOON_COMPACTION_H */ diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 6d6795d46a75..7b74452c5317 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -51,8 +51,8 @@ extern unsigned long free_all_bootmem(void); extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); -extern void free_bootmem(unsigned long addr, unsigned long size); -extern void free_bootmem_late(unsigned long addr, unsigned long size); +extern void free_bootmem(unsigned long physaddr, unsigned long size); +extern void free_bootmem_late(unsigned long physaddr, unsigned long size); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, diff --git a/include/linux/fs.h b/include/linux/fs.h index 75fe9a134803..408fb1e77a0a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -418,7 +418,7 @@ struct address_space { struct backing_dev_info *backing_dev_info; /* device readahead, etc */ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ - struct address_space *assoc_mapping; /* ditto */ + void *private_data; /* ditto */ } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but diff --git a/include/linux/gfp.h b/include/linux/gfp.h index d0a79678f169..31e8041274f6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -266,7 +266,7 @@ static inline enum zone_type gfp_zone(gfp_t flags) static inline int gfp_zonelist(gfp_t flags) { - if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) + if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE)) return 1; return 0; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b31cb7da0346..1af477552459 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -8,6 +8,10 @@ extern int do_huge_pmd_anonymous_page(struct mm_struct *mm, extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma); +extern void huge_pmd_set_accessed(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd, + pmd_t orig_pmd, int dirty); extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 225164842ab6..3e7fa1acf09c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -183,7 +183,8 @@ extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, unsigned long addr, size_t size, vm_flags_t acct, - struct user_struct **user, int creat_flags); + struct user_struct **user, int creat_flags, + int page_size_log); static inline int is_file_hugepages(struct file *file) { @@ -195,12 +196,14 @@ static inline int is_file_hugepages(struct file *file) return 0; } + #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) 0 static inline struct file * hugetlb_file_setup(const char *name, unsigned long addr, size_t size, - vm_flags_t acctflag, struct user_struct **user, int creat_flags) + vm_flags_t acctflag, struct user_struct **user, int creat_flags, + int page_size_log) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7d8dfc7392f1..dd9900cabf89 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -687,20 +687,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) -/* This helps us to avoid #ifdef CONFIG_NUMA */ -#ifdef CONFIG_NUMA -#define NUMA_BUILD 1 -#else -#define NUMA_BUILD 0 -#endif - -/* This helps us avoid #ifdef CONFIG_COMPACTION */ -#ifdef CONFIG_COMPACTION -#define COMPACTION_BUILD 1 -#else -#define COMPACTION_BUILD 0 -#endif - /* This helps us to avoid #ifdef CONFIG_SYMBOL_PREFIX */ #ifdef CONFIG_SYMBOL_PREFIX #define SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX diff --git a/include/linux/memory.h b/include/linux/memory.h index ff9a9f8e0ed9..a09216d0dcc7 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -53,6 +53,7 @@ int arch_get_memory_phys_device(unsigned long start_pfn); struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; + int status_change_nid_normal; int status_change_nid; }; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 95573ec4ee6c..4a45c4e50025 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -26,6 +26,13 @@ enum { MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, }; +/* Types for control the zone type of onlined memory */ +enum { + ONLINE_KEEP, + ONLINE_KERNEL, + ONLINE_MOVABLE, +}; + /* * pgdat resizing functions */ @@ -46,6 +53,10 @@ void pgdat_resize_init(struct pglist_data *pgdat) } /* * Zone resizing functions + * + * Note: any attempt to resize a zone should has pgdat_resize_lock() + * zone_span_writelock() both held. This ensure the size of a zone + * can't be changed while pgdat_resize_lock() held. */ static inline unsigned zone_span_seqbegin(struct zone *zone) { @@ -71,7 +82,7 @@ extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); /* VM interface that may be used by firmware interface */ -extern int online_pages(unsigned long, unsigned long); +extern int online_pages(unsigned long, unsigned long, int); extern void __offline_isolated_pages(unsigned long, unsigned long); typedef void (*online_page_callback_t)(struct page *page); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ce7e6671968b..0b5865c61efd 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,9 +7,27 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); +/* + * Return values from addresss_space_operations.migratepage(): + * - negative errno on page migration failure; + * - zero on page migration success; + * + * The balloon page migration introduces this special case where a 'distinct' + * return code is used to flag a successful page migration to unmap_and_move(). + * This approach is necessary because page migration can race against balloon + * deflation procedure, and for such case we could introduce a nasty page leak + * if a successfully migrated balloon page gets released concurrently with + * migration's unmap_and_move() wrap-up steps. + */ +#define MIGRATEPAGE_SUCCESS 0 +#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page + * sucessful migration case. + */ + #ifdef CONFIG_MIGRATION extern void putback_lru_pages(struct list_head *l); +extern void putback_movable_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, @@ -33,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, #else static inline void putback_lru_pages(struct list_head *l) {} +static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, enum migrate_mode mode) { return -ENOSYS; } diff --git a/include/linux/mm.h b/include/linux/mm.h index bcaab4e6fe91..4af4f0b1be4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1456,6 +1456,37 @@ extern unsigned long vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +struct vm_unmapped_area_info { +#define VM_UNMAPPED_AREA_TOPDOWN 1 + unsigned long flags; + unsigned long length; + unsigned long low_limit; + unsigned long high_limit; + unsigned long align_mask; + unsigned long align_offset; +}; + +extern unsigned long unmapped_area(struct vm_unmapped_area_info *info); +extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); + +/* + * Search for an unmapped address range. + * + * We are looking for a range that: + * - does not intersect with any VMA; + * - is contained within the [low_limit, high_limit) interval; + * - is at least the desired size. + * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) + */ +static inline unsigned long +vm_unmapped_area(struct vm_unmapped_area_info *info) +{ + if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN)) + return unmapped_area(info); + else + return unmapped_area_topdown(info); +} + /* truncate.c */ extern void truncate_inode_pages(struct address_space *, loff_t); extern void truncate_inode_pages_range(struct address_space *, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 31f8a3af7d94..7ade2731b5d6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -224,7 +224,8 @@ struct vm_region { * library, the executable area etc). */ struct vm_area_struct { - struct mm_struct * vm_mm; /* The address space we belong to. */ + /* The first cache line has the info for VMA tree walking. */ + unsigned long vm_start; /* Our start address within vm_mm. */ unsigned long vm_end; /* The first byte after our end address within vm_mm. */ @@ -232,11 +233,22 @@ struct vm_area_struct { /* linked list of VM areas per task, sorted by address */ struct vm_area_struct *vm_next, *vm_prev; + struct rb_node vm_rb; + + /* + * Largest free memory gap in bytes to the left of this VMA. + * Either between this VMA and vma->vm_prev, or between one of the + * VMAs below us in the VMA rbtree and its ->vm_prev. This helps + * get_unmapped_area find a free area of the right size. + */ + unsigned long rb_subtree_gap; + + /* Second cache line starts here. */ + + struct mm_struct *vm_mm; /* The address space we belong to. */ pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, see mm.h. */ - struct rb_node vm_rb; - /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree, or @@ -322,6 +334,7 @@ struct mm_struct { unsigned long task_size; /* size of task vm space */ unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ + unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a23923ba8263..0c0b1d608a69 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -63,10 +63,8 @@ enum { #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) -# define cma_wmark_pages(zone) zone->min_cma_pages #else # define is_migrate_cma(migratetype) false -# define cma_wmark_pages(zone) 0 #endif #define for_each_migratetype_order(order, type) \ @@ -383,13 +381,6 @@ struct zone { /* see spanned/present_pages for more description */ seqlock_t span_seqlock; #endif -#ifdef CONFIG_CMA - /* - * CMA needs to increase watermark levels during the allocation - * process to make sure that the system is not starved. - */ - unsigned long min_cma_pages; -#endif struct free_area free_area[MAX_ORDER]; #ifndef CONFIG_SPARSEMEM diff --git a/include/linux/node.h b/include/linux/node.h index 624e53cecc02..2115ad5d6f19 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -27,10 +27,9 @@ struct node { }; struct memory_block; -extern struct node node_devices[]; +extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); -extern int register_node(struct node *, int, struct node *); extern void unregister_node(struct node *node); #ifdef CONFIG_NUMA extern int register_one_node(int nid); diff --git a/include/linux/oom.h b/include/linux/oom.h index fb9826847b89..da60007075b5 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -29,8 +29,23 @@ enum oom_scan_t { OOM_SCAN_SELECT, /* always select this thread first */ }; -extern void compare_swap_oom_score_adj(int old_val, int new_val); -extern int test_set_oom_score_adj(int new_val); +/* Thread is the potential origin of an oom condition; kill first on oom */ +#define OOM_FLAG_ORIGIN ((__force oom_flags_t)0x1) + +static inline void set_current_oom_origin(void) +{ + current->signal->oom_flags |= OOM_FLAG_ORIGIN; +} + +static inline void clear_current_oom_origin(void) +{ + current->signal->oom_flags &= ~OOM_FLAG_ORIGIN; +} + +static inline bool oom_task_origin(const struct task_struct *p) +{ + return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); +} extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, @@ -49,8 +64,6 @@ extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill); -extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, - int order); extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *mask, bool force_kill); diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 76a9539cfd3f..a92061e08d48 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -2,7 +2,8 @@ #define __LINUX_PAGEISOLATION_H -bool has_unmovable_pages(struct zone *zone, struct page *page, int count); +bool has_unmovable_pages(struct zone *zone, struct page *page, int count, + bool skip_hwpoisoned_pages); void set_pageblock_migratetype(struct page *page, int migratetype); int move_freepages_block(struct zone *zone, struct page *page, int migratetype); @@ -21,7 +22,7 @@ int move_freepages(struct zone *zone, */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype); + unsigned migratetype, bool skip_hwpoisoned_pages); /* * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. @@ -34,12 +35,13 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, /* * Test all pages in [start_pfn, end_pfn) are isolated or not. */ -int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); +int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, + bool skip_hwpoisoned_pages); /* * Internal functions. Changes pageblock's migrate type. */ -int set_migratetype_isolate(struct page *page); +int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages); void unset_migratetype_isolate(struct page *page, unsigned migratetype); struct page *alloc_migrate_target(struct page *page, unsigned long private, int **resultp); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e42c762f0dc7..6da609d14c15 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -24,6 +24,7 @@ enum mapping_flags { AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ + AS_BALLOON_MAP = __GFP_BITS_SHIFT + 4, /* balloon page special map */ }; static inline void mapping_set_error(struct address_space *mapping, int error) @@ -53,6 +54,21 @@ static inline int mapping_unevictable(struct address_space *mapping) return !!mapping; } +static inline void mapping_set_balloon(struct address_space *mapping) +{ + set_bit(AS_BALLOON_MAP, &mapping->flags); +} + +static inline void mapping_clear_balloon(struct address_space *mapping) +{ + clear_bit(AS_BALLOON_MAP, &mapping->flags); +} + +static inline int mapping_balloon(struct address_space *mapping) +{ + return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a02df2e..3e387df065fc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -631,9 +631,10 @@ struct signal_struct { struct rw_semaphore group_rwsem; #endif - int oom_score_adj; /* OOM kill score adjustment */ - int oom_score_adj_min; /* OOM kill score adjustment minimum value. - * Only settable by CAP_SYS_RESOURCE. */ + oom_flags_t oom_flags; + short oom_score_adj; /* OOM kill score adjustment */ + short oom_score_adj_min; /* OOM kill score adjustment min value. + * Only settable by CAP_SYS_RESOURCE. */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations diff --git a/include/linux/shm.h b/include/linux/shm.h index bcf8a6a3ec00..429c1995d756 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -29,6 +29,21 @@ struct shmid_kernel /* private to the kernel */ #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ #define SHM_NORESERVE 010000 /* don't check for reservations */ +/* Bits [26:31] are reserved */ + +/* + * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define SHM_HUGE_SHIFT 26 +#define SHM_HUGE_MASK 0x3f +#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) + #ifdef CONFIG_SYSVIPC long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); diff --git a/include/linux/types.h b/include/linux/types.h index 1cc0e4b9a048..4d118ba11349 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -156,6 +156,7 @@ typedef u32 dma_addr_t; #endif typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; +typedef unsigned __bitwise__ oom_flags_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 phys_addr_t; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 50c3e8fa06a8..b82a83aba311 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -161,14 +161,7 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi, unsigned long start_time); void page_writeback_init(void); -void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, - unsigned long nr_pages_dirtied); - -static inline void -balance_dirty_pages_ratelimited(struct address_space *mapping) -{ - balance_dirty_pages_ratelimited_nr(mapping, 1); -} +void balance_dirty_pages_ratelimited(struct address_space *mapping); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index dd4ba3b92002..1e974983757e 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -14,7 +14,7 @@ TRACE_EVENT(oom_score_adj_update, TP_STRUCT__entry( __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN ) - __field( int, oom_score_adj) + __field( short, oom_score_adj) ), TP_fast_assign( @@ -23,7 +23,7 @@ TRACE_EVENT(oom_score_adj_update, __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d comm=%s oom_score_adj=%d", + TP_printk("pid=%d comm=%s oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->oom_score_adj) ); diff --git a/include/trace/events/task.h b/include/trace/events/task.h index b53add02e929..102a646e1996 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -15,7 +15,7 @@ TRACE_EVENT(task_newtask, __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN) __field( unsigned long, clone_flags) - __field( int, oom_score_adj) + __field( short, oom_score_adj) ), TP_fast_assign( @@ -25,7 +25,7 @@ TRACE_EVENT(task_newtask, __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%d", + TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->clone_flags, __entry->oom_score_adj) ); @@ -40,7 +40,7 @@ TRACE_EVENT(task_rename, __field( pid_t, pid) __array( char, oldcomm, TASK_COMM_LEN) __array( char, newcomm, TASK_COMM_LEN) - __field( int, oom_score_adj) + __field( short, oom_score_adj) ), TP_fast_assign( @@ -50,7 +50,7 @@ TRACE_EVENT(task_rename, __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%d", + TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd", __entry->pid, __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj) ); diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index d030d2c2647a..4164529a94f9 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -55,4 +55,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* __ASM_GENERIC_MMAN_COMMON_H */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 32c8bd6a196d..e9fe6fd2a074 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -13,6 +13,8 @@ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ + #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ |