summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/memory.h94
-rw-r--r--include/linux/memory_hotplug.h104
-rw-r--r--include/linux/mempolicy.h7
-rw-r--r--include/linux/mm.h150
-rw-r--r--include/linux/mmzone.h28
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/rwsem-spinlock.h5
-rw-r--r--include/linux/sched.h65
-rw-r--r--include/linux/vmalloc.h8
11 files changed, 396 insertions, 77 deletions
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 88af42f5e04a..c937d6e65502 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp)
/* If we *know* page->private refers to buffer_heads */
#define page_buffers(page) \
({ \
- BUG_ON(!PagePrivate(page)); \
- ((struct buffer_head *)(page)->private); \
+ BUG_ON(!PagePrivate(page)); \
+ ((struct buffer_head *)page_private(page)); \
})
#define page_has_buffers(page) PagePrivate(page)
@@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page,
{
page_cache_get(page);
SetPagePrivate(page);
- page->private = (unsigned long)head;
+ set_page_private(page, (unsigned long)head);
}
static inline void get_bh(struct buffer_head *bh)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d664330d900e..0cea162b08c0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
-void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
int hugetlb_report_meminfo(char *);
@@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void)
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
-#define zap_hugepage_range(vma, start, len) BUG()
#define unmap_hugepage_range(vma, start, end) BUG()
#define is_hugepage_mem_enough(size) 0
#define hugetlb_report_meminfo(buf) 0
diff --git a/include/linux/memory.h b/include/linux/memory.h
new file mode 100644
index 000000000000..0def328ab5cf
--- /dev/null
+++ b/include/linux/memory.h
@@ -0,0 +1,94 @@
+/*
+ * include/linux/memory.h - generic memory definition
+ *
+ * This is mainly for topological representation. We define the
+ * basic "struct memory_block" here, which can be embedded in per-arch
+ * definitions or NUMA information.
+ *
+ * Basic handling of the devices is done in drivers/base/memory.c
+ * and system devices are handled in drivers/base/sys.c.
+ *
+ * Memory block are exported via sysfs in the class/memory/devices/
+ * directory.
+ *
+ */
+#ifndef _LINUX_MEMORY_H_
+#define _LINUX_MEMORY_H_
+
+#include <linux/sysdev.h>
+#include <linux/node.h>
+#include <linux/compiler.h>
+
+#include <asm/semaphore.h>
+
+struct memory_block {
+ unsigned long phys_index;
+ unsigned long state;
+ /*
+ * This serializes all state change requests. It isn't
+ * held during creation because the control files are
+ * created long after the critical areas during
+ * initialization.
+ */
+ struct semaphore state_sem;
+ int phys_device; /* to which fru does this belong? */
+ void *hw; /* optional pointer to fw/hw data */
+ int (*phys_callback)(struct memory_block *);
+ struct sys_device sysdev;
+};
+
+/* These states are exposed to userspace as text strings in sysfs */
+#define MEM_ONLINE (1<<0) /* exposed to userspace */
+#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
+#define MEM_OFFLINE (1<<2) /* exposed to userspace */
+
+/*
+ * All of these states are currently kernel-internal for notifying
+ * kernel components and architectures.
+ *
+ * For MEM_MAPPING_INVALID, all notifier chains with priority >0
+ * are called before pfn_to_page() becomes invalid. The priority=0
+ * entry is reserved for the function that actually makes
+ * pfn_to_page() stop working. Any notifiers that want to be called
+ * after that should have priority <0.
+ */
+#define MEM_MAPPING_INVALID (1<<3)
+
+#ifndef CONFIG_MEMORY_HOTPLUG
+static inline int memory_dev_init(void)
+{
+ return 0;
+}
+static inline int register_memory_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline void unregister_memory_notifier(struct notifier_block *nb)
+{
+}
+#else
+extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
+extern int register_new_memory(struct mem_section *);
+extern int unregister_memory_section(struct mem_section *);
+extern int memory_dev_init(void);
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
+
+extern int invalidate_phys_mapping(unsigned long, unsigned long);
+struct notifier_block;
+
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+extern struct sysdev_class memory_sysdev_class;
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#define hotplug_memory_notifier(fn, pri) { \
+ static struct notifier_block fn##_mem_nb = \
+ { .notifier_call = fn, .priority = pri }; \
+ register_memory_notifier(&fn##_mem_nb); \
+}
+
+#endif /* _LINUX_MEMORY_H_ */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
new file mode 100644
index 000000000000..01f03bc06eff
--- /dev/null
+++ b/include/linux/memory_hotplug.h
@@ -0,0 +1,104 @@
+#ifndef __LINUX_MEMORY_HOTPLUG_H
+#define __LINUX_MEMORY_HOTPLUG_H
+
+#include <linux/mmzone.h>
+#include <linux/spinlock.h>
+#include <linux/mmzone.h>
+#include <linux/notifier.h>
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * pgdat resizing functions
+ */
+static inline
+void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
+{
+ spin_lock_irqsave(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
+{
+ spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_init(struct pglist_data *pgdat)
+{
+ spin_lock_init(&pgdat->node_size_lock);
+}
+/*
+ * Zone resizing functions
+ */
+static inline unsigned zone_span_seqbegin(struct zone *zone)
+{
+ return read_seqbegin(&zone->span_seqlock);
+}
+static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
+{
+ return read_seqretry(&zone->span_seqlock, iv);
+}
+static inline void zone_span_writelock(struct zone *zone)
+{
+ write_seqlock(&zone->span_seqlock);
+}
+static inline void zone_span_writeunlock(struct zone *zone)
+{
+ write_sequnlock(&zone->span_seqlock);
+}
+static inline void zone_seqlock_init(struct zone *zone)
+{
+ seqlock_init(&zone->span_seqlock);
+}
+extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
+extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
+extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
+/* need some defines for these for archs that don't support it */
+extern void online_page(struct page *page);
+/* VM interface that may be used by firmware interface */
+extern int add_memory(u64 start, u64 size);
+extern int remove_memory(u64 start, u64 size);
+extern int online_pages(unsigned long, unsigned long);
+
+/* reasonably generic interface to expand the physical pages in a zone */
+extern int __add_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages);
+#else /* ! CONFIG_MEMORY_HOTPLUG */
+/*
+ * Stub functions for when hotplug is off
+ */
+static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
+
+static inline unsigned zone_span_seqbegin(struct zone *zone)
+{
+ return 0;
+}
+static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
+{
+ return 0;
+}
+static inline void zone_span_writelock(struct zone *zone) {}
+static inline void zone_span_writeunlock(struct zone *zone) {}
+static inline void zone_seqlock_init(struct zone *zone) {}
+
+static inline int mhp_notimplemented(const char *func)
+{
+ printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
+ dump_stack();
+ return -ENOSYS;
+}
+
+static inline int __add_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ return mhp_notimplemented(__FUNCTION__);
+}
+#endif /* ! CONFIG_MEMORY_HOTPLUG */
+static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__);
+ dump_stack();
+ return -ENOSYS;
+}
+#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 58385ee1c0ac..7af8cb836e78 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -27,10 +27,10 @@
#include <linux/config.h>
#include <linux/mmzone.h>
-#include <linux/bitmap.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
+#include <linux/nodemask.h>
struct vm_area_struct;
@@ -47,8 +47,7 @@ struct vm_area_struct;
* Locking policy for interlave:
* In process context there is no locking because only the process accesses
* its own state. All vma manipulation is somewhat protected by a down_read on
- * mmap_sem. For allocating in the interleave policy the page_table_lock
- * must be also aquired to protect il_next.
+ * mmap_sem.
*
* Freeing policy:
* When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd.
@@ -63,7 +62,7 @@ struct mempolicy {
union {
struct zonelist *zonelist; /* bind */
short preferred_node; /* preferred */
- DECLARE_BITMAP(nodes, MAX_NUMNODES); /* interleave */
+ nodemask_t nodes; /* interleave */
/* undefined for default */
} v;
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1649578fb0c..5c1fb0a2e806 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
-#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
+#define VM_RESERVED 0x00080000 /* Pages managed in a special way */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
@@ -226,13 +226,18 @@ struct page {
* to show when page is mapped
* & limit reverse map searches.
*/
- unsigned long private; /* Mapping-private opaque data:
+ union {
+ unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache
* When page is free, this indicates
* order in the buddy system.
*/
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+ spinlock_t ptl;
+#endif
+ } u;
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
@@ -260,6 +265,9 @@ struct page {
#endif /* WANT_PAGE_VIRTUAL */
};
+#define page_private(page) ((page)->u.private)
+#define set_page_private(page, v) ((page)->u.private = (v))
+
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *));
#ifdef CONFIG_HUGETLB_PAGE
-static inline int page_count(struct page *p)
+static inline int page_count(struct page *page)
{
- if (PageCompound(p))
- p = (struct page *)p->private;
- return atomic_read(&(p)->_count) + 1;
+ if (PageCompound(page))
+ page = (struct page *)page_private(page);
+ return atomic_read(&page->_count) + 1;
}
static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
- page = (struct page *)page->private;
+ page = (struct page *)page_private(page);
atomic_inc(&page->_count);
}
@@ -338,7 +346,7 @@ static inline void get_page(struct page *page)
static inline void put_page(struct page *page)
{
- if (!PageReserved(page) && put_page_testzero(page))
+ if (put_page_testzero(page))
__page_cache_release(page);
}
@@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page)
static inline pgoff_t page_index(struct page *page)
{
if (unlikely(PageSwapCache(page)))
- return page->private;
+ return page_private(page);
return page->index;
}
@@ -682,7 +690,7 @@ struct zap_details {
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
@@ -704,10 +712,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
}
extern int vmtruncate(struct inode * inode, loff_t offset);
-extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
-extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
@@ -723,6 +727,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page);
@@ -759,38 +764,83 @@ struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
-/*
- * On a two-level or three-level page table, this ends up being trivial. Thus
- * the inlining and the symmetry break with pte_alloc_map() that does all
- * of this out-of-line.
- */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+
/*
* The following ifdef needed to get the 4level-fixup.h header to work.
* Remove it when 4level-fixup.h has been removed.
*/
-#ifdef CONFIG_MMU
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
- if (pgd_none(*pgd))
- return __pud_alloc(mm, pgd, address);
- return pud_offset(pgd, address);
+ return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
+ NULL: pud_offset(pgd, address);
}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
- if (pud_none(*pud))
- return __pmd_alloc(mm, pud, address);
- return pmd_offset(pud, address);
+ return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
+ NULL: pmd_offset(pud, address);
}
-#endif
-#endif /* CONFIG_MMU */
+#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page) &((page)->u.ptl)
+#define pte_lock_init(_page) do { \
+ spin_lock_init(__pte_lockptr(_page)); \
+} while (0)
+#define pte_lock_deinit(page) ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page) do {} while (0)
+#define pte_lock_deinit(page) do {} while (0)
+#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define pte_offset_map_lock(mm, pmd, address, ptlp) \
+({ \
+ spinlock_t *__ptl = pte_lockptr(mm, pmd); \
+ pte_t *__pte = pte_offset_map(pmd, address); \
+ *(ptlp) = __ptl; \
+ spin_lock(__ptl); \
+ __pte; \
+})
+
+#define pte_unmap_unlock(pte, ptl) do { \
+ spin_unlock(ptl); \
+ pte_unmap(pte); \
+} while (0)
+
+#define pte_alloc_map(mm, pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map(pmd, address))
+
+#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+
+#define pte_alloc_kernel(pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ NULL: pte_offset_kernel(pmd, address))
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat,
unsigned long * zones_size, unsigned long zone_start_pfn,
unsigned long *zholes_size);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
+extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
@@ -834,6 +884,7 @@ extern int split_vma(struct mm_struct *,
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
+extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *);
@@ -894,7 +945,8 @@ void handle_ra_miss(struct address_space *mapping,
unsigned long max_sane_readahead(unsigned long nr);
/* Do stack extension */
-extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
+extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -917,40 +969,28 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct page *vmalloc_to_page(void *addr);
+unsigned long vmalloc_to_pfn(void *addr);
+int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t);
-extern struct page * vmalloc_to_page(void *addr);
-extern unsigned long vmalloc_to_pfn(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
- int write);
-extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
-int remap_pfn_range(struct vm_area_struct *, unsigned long,
- unsigned long, unsigned long, pgprot_t);
+struct page *follow_page(struct mm_struct *, unsigned long address,
+ unsigned int foll_flags);
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_TOUCH 0x02 /* mark page accessed */
+#define FOLL_GET 0x04 /* do get_page on page */
+#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
#ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
#else
-static inline void __vm_stat_account(struct mm_struct *mm,
+static inline void vm_stat_account(struct mm_struct *mm,
unsigned long flags, struct file *file, long pages)
{
}
#endif /* CONFIG_PROC_FS */
-static inline void vm_stat_account(struct vm_area_struct *vma)
-{
- __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- vma_pages(vma));
-}
-
-static inline void vm_stat_unaccount(struct vm_area_struct *vma)
-{
- __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- -vma_pages(vma));
-}
-
-/* update per process rss and vm hiwater data */
-extern void update_mem_hiwater(struct task_struct *tsk);
-
#ifndef CONFIG_DEBUG_PAGEALLOC
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7519eb4191e7..f5fa3082fd6a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -12,6 +12,7 @@
#include <linux/threads.h>
#include <linux/numa.h>
#include <linux/init.h>
+#include <linux/seqlock.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -137,6 +138,10 @@ struct zone {
* free areas of different sizes
*/
spinlock_t lock;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* see spanned/present_pages for more description */
+ seqlock_t span_seqlock;
+#endif
struct free_area free_area[MAX_ORDER];
@@ -220,6 +225,16 @@ struct zone {
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
+ /*
+ * zone_start_pfn, spanned_pages and present_pages are all
+ * protected by span_seqlock. It is a seqlock because it has
+ * to be read outside of zone->lock, and it is done in the main
+ * allocator path. But, it is written quite infrequently.
+ *
+ * The lock is declared along with zone->lock because it is
+ * frequently read in proximity to zone->lock. It's good to
+ * give them a chance of being in the same cacheline.
+ */
unsigned long spanned_pages; /* total size, including holes */
unsigned long present_pages; /* amount of memory (excluding holes) */
@@ -273,6 +288,16 @@ typedef struct pglist_data {
struct page *node_mem_map;
#endif
struct bootmem_data *bdata;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Must be held any time you expect node_start_pfn, node_present_pages
+ * or node_spanned_pages stay constant. Holding this will also
+ * guarantee that any pfn_valid() stays that way.
+ *
+ * Nests above zone->lock and zone->size_seqlock.
+ */
+ spinlock_t node_size_lock;
+#endif
unsigned long node_start_pfn;
unsigned long node_present_pages; /* total number of physical pages */
unsigned long node_spanned_pages; /* total size of physical page
@@ -293,6 +318,8 @@ typedef struct pglist_data {
#endif
#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
+#include <linux/memory_hotplug.h>
+
extern struct pglist_data *pgdat_list;
void __get_zone_counts(unsigned long *active, unsigned long *inactive,
@@ -509,6 +536,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
+extern int __section_nr(struct mem_section* ms);
/*
* We use the lower bits of the mem_map pointer to store
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e80fb7ee6efd..35b30e6c8cf8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -95,8 +95,8 @@ int try_to_unmap(struct page *);
/*
* Called from mm/filemap_xip.c to unmap empty zero page
*/
-pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long);
-
+pte_t *page_check_address(struct page *, struct mm_struct *,
+ unsigned long, spinlock_t **);
/*
* Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index b52a2af25f1f..f30f805080ae 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -61,5 +61,10 @@ extern void FASTCALL(__up_read(struct rw_semaphore *sem));
extern void FASTCALL(__up_write(struct rw_semaphore *sem));
extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->activity != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 27519df0f987..1c30bc308ef1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -249,6 +249,36 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+#ifdef ATOMIC64_INIT
+#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
+typedef atomic64_t mm_counter_t;
+#else /* !ATOMIC64_INIT */
+/*
+ * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
+ * that is, at 16TB if using 4kB page size.
+ */
+#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
+typedef atomic_t mm_counter_t;
+#endif /* !ATOMIC64_INIT */
+
+#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
#define get_mm_counter(mm, member) ((mm)->_##member)
#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
@@ -256,6 +286,20 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define dec_mm_counter(mm, member) (mm)->_##member--
typedef unsigned long mm_counter_t;
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define get_mm_rss(mm) \
+ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+#define update_hiwater_rss(mm) do { \
+ unsigned long _rss = get_mm_rss(mm); \
+ if ((mm)->hiwater_rss < _rss) \
+ (mm)->hiwater_rss = _rss; \
+} while (0)
+#define update_hiwater_vm(mm) do { \
+ if ((mm)->hiwater_vm < (mm)->total_vm) \
+ (mm)->hiwater_vm = (mm)->total_vm; \
+} while (0)
+
struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */
struct rb_root mm_rb;
@@ -279,15 +323,20 @@ struct mm_struct {
* by mmlist_lock
*/
+ /* Special counters, in some configurations protected by the
+ * page_table_lock, in other configurations by being atomic.
+ */
+ mm_counter_t _file_rss;
+ mm_counter_t _anon_rss;
+
+ unsigned long hiwater_rss; /* High-watermark of RSS usage */
+ unsigned long hiwater_vm; /* High-water virtual memory usage */
+
+ unsigned long total_vm, locked_vm, shared_vm, exec_vm;
+ unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
- unsigned long total_vm, locked_vm, shared_vm;
- unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes;
-
- /* Special counters protected by the page_table_lock */
- mm_counter_t _rss;
- mm_counter_t _anon_rss;
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
@@ -308,11 +357,7 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
-
struct kioctx default_kioctx;
-
- unsigned long hiwater_rss; /* High-water RSS usage */
- unsigned long hiwater_vm; /* High-water virtual memory usage */
};
struct sighand_struct {
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3701a0673d2c..1d5577b2b752 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -32,10 +32,14 @@ struct vm_struct {
* Highlevel APIs for driver use
*/
extern void *vmalloc(unsigned long size);
+extern void *vmalloc_node(unsigned long size, int node);
extern void *vmalloc_exec(unsigned long size);
extern void *vmalloc_32(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot);
+extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
+ pgprot_t prot);
+extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
+ pgprot_t prot, int node);
extern void vfree(void *addr);
extern void *vmap(struct page **pages, unsigned int count,
@@ -48,6 +52,8 @@ extern void vunmap(void *addr);
extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end);
+extern struct vm_struct *get_vm_area_node(unsigned long size,
+ unsigned long flags, int node);
extern struct vm_struct *remove_vm_area(void *addr);
extern struct vm_struct *__remove_vm_area(void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,