From 58cb65487e92b47448d00a711c9f5922137d5678 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:25:54 -0700
Subject: proc/maps: make vm_is_stack() logic namespace-friendly

- Rename vm_is_stack() to task_of_stack() and change it to return
  "struct task_struct *" rather than the global (and thus wrong in
  general) pid_t.

- Add the new pid_of_stack() helper which calls task_of_stack() and
  uses the right namespace to report the correct pid_t.

  Unfortunately we need to define this helper twice, in task_mmu.c
  and in task_nommu.c. perhaps it makes sense to add fs/proc/util.c
  and move at least pid_of_stack/task_of_stack there to avoid the
  code duplication.

- Change show_map_vma() and show_numa_map() to use the new helper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0f4196a0bc20..28df70774b81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1247,8 +1247,8 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
 		!vma_growsup(vma->vm_next, addr);
 }
 
-extern pid_t
-vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
+extern struct task_struct *task_of_stack(struct task_struct *task,
+				struct vm_area_struct *vma, bool in_group);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
-- 
cgit v1.2.3


From 07f361b2bee38896df8be17d8c3f8af3f3610606 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:00 -0700
Subject: mm/slab_common: move kmem_cache definition to internal header

We don't need to keep kmem_cache definition in include/linux/slab.h if we
don't need to inline kmem_cache_size().  According to my code inspection,
this function is only called at lc_create() in lib/lru_cache.c which may
be called at initialization phase of something, so we don't need to inline
it.  Therfore, move it to slab_common.c and move kmem_cache definition to
internal header.

After this change, we can change kmem_cache definition easily without full
kernel build.  For instance, we can turn on/off CONFIG_SLUB_STATS without
full kernel build.

[akpm@linux-foundation.org: export kmem_cache_size() to modules]
[rdunlap@infradead.org: add header files to fix kmemcheck.c build errors]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 42 +-----------------------------------------
 1 file changed, 1 insertion(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1d9abb7d22a0..9062e4ad1787 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -158,31 +158,6 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
 #endif
 
-#ifdef CONFIG_SLOB
-/*
- * Common fields provided in kmem_cache by all slab allocators
- * This struct is either used directly by the allocator (SLOB)
- * or the allocator must include definitions for all fields
- * provided in kmem_cache_common in their definition of kmem_cache.
- *
- * Once we can do anonymous structs (C11 standard) we could put a
- * anonymous struct definition in these allocators so that the
- * separate allocations in the kmem_cache structure of SLAB and
- * SLUB is no longer needed.
- */
-struct kmem_cache {
-	unsigned int object_size;/* The original size of the object */
-	unsigned int size;	/* The aligned/padded/added on size  */
-	unsigned int align;	/* Alignment as calculated */
-	unsigned long flags;	/* Active flags on the slab */
-	const char *name;	/* Slab name for sysfs */
-	int refcount;		/* Use counter */
-	void (*ctor)(void *);	/* Called on object slot creation */
-	struct list_head list;	/* List of all slab caches on the system */
-};
-
-#endif /* CONFIG_SLOB */
-
 /*
  * Kmalloc array related definitions
  */
@@ -363,14 +338,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 }
 #endif /* CONFIG_TRACING */
 
-#ifdef CONFIG_SLAB
-#include <linux/slab_def.h>
-#endif
-
-#ifdef CONFIG_SLUB
-#include <linux/slub_def.h>
-#endif
-
 extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
 
 #ifdef CONFIG_TRACING
@@ -650,14 +617,7 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
-/*
- * Determine the size of a slab object
- */
-static inline unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-	return s->object_size;
-}
-
+unsigned int kmem_cache_size(struct kmem_cache *s);
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */
-- 
cgit v1.2.3


From 61f47105a2c9c60e950ca808b7560f776f9bfa31 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:02 -0700
Subject: mm/sl[ao]b: always track caller in kmalloc_(node_)track_caller()

Now, we track caller if tracing or slab debugging is enabled.  If they are
disabled, we could save one argument passing overhead by calling
__kmalloc(_node)().  But, I think that it would be marginal.  Furthermore,
default slab allocator, SLUB, doesn't use this technique so I think that
it's okay to change this situation.

After this change, we can turn on/off CONFIG_DEBUG_SLAB without full
kernel build and remove some complicated '#if' defintion.  It looks more
benefitial to me.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9062e4ad1787..c265bec6a57d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -549,37 +549,15 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
-	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \
-	(defined(CONFIG_SLOB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, _RET_IP_)
-#else
-#define kmalloc_track_caller(size, flags) \
-	__kmalloc(size, flags)
-#endif /* DEBUG_SLAB */
 
 #ifdef CONFIG_NUMA
-/*
- * kmalloc_node_track_caller is a special version of kmalloc_node that
- * records the calling function of the routine calling it for slab leak
- * tracking instead of just the calling function (confusing, eh?).
- * It's useful when the call to kmalloc_node comes from a widely-used
- * standard allocator where we care about the real place the memory
- * allocation request comes from.
- */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
-	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \
-	(defined(CONFIG_SLOB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
 			_RET_IP_)
-#else
-#define kmalloc_node_track_caller(size, flags, node) \
-	__kmalloc_node(size, flags, node)
-#endif
 
 #else /* CONFIG_NUMA */
 
-- 
cgit v1.2.3


From ad2c8144418c6a81cefe65379fd47bbe8344cef2 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:13 -0700
Subject: topology: add support for node_to_mem_node() to determine the
 fallback node

Anton noticed (http://www.spinics.net/lists/linux-mm/msg67489.html) that
on ppc LPARs with memoryless nodes, a large amount of memory was consumed
by slabs and was marked unreclaimable.  He tracked it down to slab
deactivations in the SLUB core when we allocate remotely, leading to poor
efficiency always when memoryless nodes are present.

After much discussion, Joonsoo provided a few patches that help
significantly.  They don't resolve the problem altogether:

 - memory hotplug still needs testing, that is when a memoryless node
   becomes memory-ful, we want to dtrt
 - there are other reasons for going off-node than memoryless nodes,
   e.g., fully exhausted local nodes

Neither case is resolved with this series, but I don't think that should
block their acceptance, as they can be explored/resolved with follow-on
patches.

The series consists of:

[1/3] topology: add support for node_to_mem_node() to determine the
      fallback node

[2/3] slub: fallback to node_to_mem_node() node if allocating on
      memoryless node

      - Joonsoo's patches to cache the nearest node with memory for each
        NUMA node

[3/3] Partial revert of 81c98869faa5 (""kthread: ensure locality of
      task_struct allocations")

 - At Tejun's request, keep the knowledge of memoryless node fallback
   to the allocator core.

This patch (of 3):

We need to determine the fallback node in slub allocator if the allocation
target node is memoryless node.  Without it, the SLUB wrongly select the
node which has no memory and can't use a partial slab, because of node
mismatch.  Introduced function, node_to_mem_node(X), will return a node Y
with memory that has the nearest distance.  If X is memoryless node, it
will return nearest distance node, but, if X is normal node, it will
return itself.

We will use this function in following patch to determine the fallback
node.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Han Pingtian <hanpt@linux.vnet.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Anton Blanchard <anton@samba.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/topology.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index dda6ee521e74..909b6e43b694 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -119,11 +119,20 @@ static inline int numa_node_id(void)
  * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
  */
 DECLARE_PER_CPU(int, _numa_mem_);
+extern int _node_numa_mem_[MAX_NUMNODES];
 
 #ifndef set_numa_mem
 static inline void set_numa_mem(int node)
 {
 	this_cpu_write(_numa_mem_, node);
+	_node_numa_mem_[numa_node_id()] = node;
+}
+#endif
+
+#ifndef node_to_mem_node
+static inline int node_to_mem_node(int node)
+{
+	return _node_numa_mem_[node];
 }
 #endif
 
@@ -146,6 +155,7 @@ static inline int cpu_to_mem(int cpu)
 static inline void set_cpu_numa_mem(int cpu, int node)
 {
 	per_cpu(_numa_mem_, cpu) = node;
+	_node_numa_mem_[cpu_to_node(cpu)] = node;
 }
 #endif
 
@@ -159,6 +169,13 @@ static inline int numa_mem_id(void)
 }
 #endif
 
+#ifndef node_to_mem_node
+static inline int node_to_mem_node(int node)
+{
+	return node;
+}
+#endif
+
 #ifndef cpu_to_mem
 static inline int cpu_to_mem(int cpu)
 {
-- 
cgit v1.2.3


From bf0dea23a9c094ae869a88bb694fbe966671bf6d Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:27 -0700
Subject: mm/slab: use percpu allocator for cpu cache

Because of chicken and egg problem, initialization of SLAB is really
complicated.  We need to allocate cpu cache through SLAB to make the
kmem_cache work, but before initialization of kmem_cache, allocation
through SLAB is impossible.

On the other hand, SLUB does initialization in a more simple way.  It uses
percpu allocator to allocate cpu cache so there is no chicken and egg
problem.

So, this patch try to use percpu allocator in SLAB.  This simplifies the
initialization step in SLAB so that we could maintain SLAB code more
easily.

In my testing there is no performance difference.

This implementation relies on percpu allocator.  Because percpu allocator
uses vmalloc address space, vmalloc address space could be exhausted by
this change on many cpu system with *32 bit* kernel.  This implementation
can cover 1024 cpus in worst case by following calculation.

Worst: 1024 cpus * 4 bytes for pointer * 300 kmem_caches *
	120 objects per cpu_cache = 140 MB
Normal: 1024 cpus * 4 bytes for pointer * 150 kmem_caches(slab merge) *
	80 objects per cpu_cache = 46 MB

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab_def.h | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8235dfbb3b05..b869d1662ba3 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -8,6 +8,8 @@
  */
 
 struct kmem_cache {
+	struct array_cache __percpu *cpu_cache;
+
 /* 1) Cache tunables. Protected by slab_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
@@ -71,23 +73,7 @@ struct kmem_cache {
 	struct memcg_cache_params *memcg_params;
 #endif
 
-/* 6) per-cpu/per-node data, touched during every alloc/free */
-	/*
-	 * We put array[] at the end of kmem_cache, because we want to size
-	 * this array to nr_cpu_ids slots instead of NR_CPUS
-	 * (see kmem_cache_init())
-	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of cpus.
-	 *
-	 * We also need to guarantee that the list is able to accomodate a
-	 * pointer for each node since "nodelists" uses the remainder of
-	 * available pointers.
-	 */
-	struct kmem_cache_node **node;
-	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
-	/*
-	 * Do not add fields after array[]
-	 */
+	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
 #endif	/* _LINUX_SLAB_DEF_H */
-- 
cgit v1.2.3


From ed2f240094f900833ac06f533ab8bbcf0a1e8199 Mon Sep 17 00:00:00 2001
From: Zhang Zhen <zhenzhang.zhang@huawei.com>
Date: Thu, 9 Oct 2014 15:26:31 -0700
Subject: memory-hotplug: add sysfs valid_zones attribute

Currently memory-hotplug has two limits:

1. If the memory block is in ZONE_NORMAL, you can change it to
   ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE.

2. If the memory block is in ZONE_MOVABLE, you can change it to
   ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL.

With this patch, we can easy to know a memory block can be onlined to
which zone, and don't need to know the above two limits.

Updated the related Documentation.

[akpm@linux-foundation.org: use conventional comment layout]
[akpm@linux-foundation.org: fix build with CONFIG_MEMORY_HOTREMOVE=n]
[akpm@linux-foundation.org: remove unused local zone_prev]
Signed-off-by: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d9524c49d767..8f1a41951df9 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -84,6 +84,7 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long, int);
+extern int test_pages_in_a_zone(unsigned long, unsigned long);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
-- 
cgit v1.2.3


From 505e3be6c082489a32a88e042f930d047b6415bc Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 9 Oct 2014 15:26:35 -0700
Subject: lib/genalloc.c: add power aligned algorithm

One of the more common algorithms used for allocation is to align the
start address of the allocation to the order of size requested.  Add this
as an algorithm option for genalloc.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 1c2fdaa2ffc3..3cd0934d62ba 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -110,6 +110,10 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo,
 extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
 		unsigned long start, unsigned int nr, void *data);
 
+extern unsigned long gen_pool_first_fit_order_align(unsigned long *map,
+		unsigned long size, unsigned long start, unsigned int nr,
+		void *data);
+
 extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
 		unsigned long start, unsigned int nr, void *data);
 
-- 
cgit v1.2.3


From 9efb3a421d55d30b65fb0dbee05108d15c6c55f7 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 9 Oct 2014 15:26:38 -0700
Subject: lib/genalloc.c: add genpool range check function

After allocating an address from a particular genpool, there is no good
way to verify if that address actually belongs to a genpool.  Introduce
addr_in_gen_pool which will return if an address plus size falls
completely within the genpool range.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 3cd0934d62ba..1ccaab44abcc 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -121,6 +121,9 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev,
 		int min_alloc_order, int nid);
 extern struct gen_pool *dev_get_gen_pool(struct device *dev);
 
+bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+			size_t size);
+
 #ifdef CONFIG_OF
 extern struct gen_pool *of_get_named_gen_pool(struct device_node *np,
 	const char *propname, int index);
-- 
cgit v1.2.3


From 53853e2d2bfb748a8b5aa2fd1de15699266865e0 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 9 Oct 2014 15:27:02 -0700
Subject: mm, compaction: defer each zone individually instead of preferred
 zone

When direct sync compaction is often unsuccessful, it may become deferred
for some time to avoid further useless attempts, both sync and async.
Successful high-order allocations un-defer compaction, while further
unsuccessful compaction attempts prolong the compaction deferred period.

Currently the checking and setting deferred status is performed only on
the preferred zone of the allocation that invoked direct compaction.  But
compaction itself is attempted on all eligible zones in the zonelist, so
the behavior is suboptimal and may lead both to scenarios where 1)
compaction is attempted uselessly, or 2) where it's not attempted despite
good chances of succeeding, as shown on the examples below:

1) A direct compaction with Normal preferred zone failed and set
   deferred compaction for the Normal zone.  Another unrelated direct
   compaction with DMA32 as preferred zone will attempt to compact DMA32
   zone even though the first compaction attempt also included DMA32 zone.

   In another scenario, compaction with Normal preferred zone failed to
   compact Normal zone, but succeeded in the DMA32 zone, so it will not
   defer compaction.  In the next attempt, it will try Normal zone which
   will fail again, instead of skipping Normal zone and trying DMA32
   directly.

2) Kswapd will balance DMA32 zone and reset defer status based on
   watermarks looking good.  A direct compaction with preferred Normal
   zone will skip compaction of all zones including DMA32 because Normal
   was still deferred.  The allocation might have succeeded in DMA32, but
   won't.

This patch makes compaction deferring work on individual zone basis
instead of preferred zone.  For each zone, it checks compaction_deferred()
to decide if the zone should be skipped.  If watermarks fail after
compacting the zone, defer_compaction() is called.  The zone where
watermarks passed can still be deferred when the allocation attempt is
unsuccessful.  When allocation is successful, compaction_defer_reset() is
called for the zone containing the allocated page.  This approach should
approximate calling defer_compaction() only on zones where compaction was
attempted and did not yield allocated page.  There might be corner cases
but that is inevitable as long as the decision to stop compacting dues not
guarantee that a page will be allocated.

Due to a new COMPACT_DEFERRED return value, some functions relying
implicitly on COMPACT_SKIPPED = 0 had to be updated, with comments made
more accurate.  The did_some_progress output parameter of
__alloc_pages_direct_compact() is removed completely, as the caller
actually does not use it after compaction sets it - it is only considered
when direct reclaim sets it.

During testing on a two-node machine with a single very small Normal zone
on node 1, this patch has improved success rates in stress-highalloc
mmtests benchmark.  The success here were previously made worse by commit
3a025760fc15 ("mm: page_alloc: spill to remote nodes before waking
kswapd") as kswapd was no longer resetting often enough the deferred
compaction for the Normal zone, and DMA32 zones on both nodes were thus
not considered for compaction.  On different machine, success rates were
improved with __GFP_NO_KSWAPD allocations.

[akpm@linux-foundation.org: fix CONFIG_COMPACTION=n build]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 01e3132820da..b2e4c92d0445 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -2,14 +2,16 @@
 #define _LINUX_COMPACTION_H
 
 /* Return values for compact_zone() and try_to_compact_pages() */
+/* compaction didn't start as it was deferred due to past failures */
+#define COMPACT_DEFERRED	0
 /* compaction didn't start as it was not possible or direct reclaim was more suitable */
-#define COMPACT_SKIPPED		0
+#define COMPACT_SKIPPED		1
 /* compaction should continue to another pageblock */
-#define COMPACT_CONTINUE	1
+#define COMPACT_CONTINUE	2
 /* direct compaction partially compacted a zone and there are suitable pages */
-#define COMPACT_PARTIAL		2
+#define COMPACT_PARTIAL		3
 /* The full zone was compacted */
-#define COMPACT_COMPLETE	3
+#define COMPACT_COMPLETE	4
 
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
@@ -22,7 +24,8 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			enum migrate_mode mode, bool *contended);
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -91,7 +94,8 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended)
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
 }
-- 
cgit v1.2.3


From 1f9efdef4f3f1d2a073e524113fd0038af636f2b Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 9 Oct 2014 15:27:14 -0700
Subject: mm, compaction: khugepaged should not give up due to need_resched()

Async compaction aborts when it detects zone lock contention or
need_resched() is true.  David Rientjes has reported that in practice,
most direct async compactions for THP allocation abort due to
need_resched().  This means that a second direct compaction is never
attempted, which might be OK for a page fault, but khugepaged is intended
to attempt a sync compaction in such case and in these cases it won't.

This patch replaces "bool contended" in compact_control with an int that
distinguishes between aborting due to need_resched() and aborting due to
lock contention.  This allows propagating the abort through all compaction
functions as before, but passing the abort reason up to
__alloc_pages_slowpath() which decides when to continue with direct
reclaim and another compaction attempt.

Another problem is that try_to_compact_pages() did not act upon the
reported contention (both need_resched() or lock contention) immediately
and would proceed with another zone from the zonelist.  When
need_resched() is true, that means initializing another zone compaction,
only to check again need_resched() in isolate_migratepages() and aborting.
 For zone lock contention, the unintended consequence is that the lock
contended status reported back to the allocator is detrmined from the last
zone where compaction was attempted, which is rather arbitrary.

This patch fixes the problem in the following way:
- async compaction of a zone aborting due to need_resched() or fatal signal
  pending means that further zones should not be tried. We report
  COMPACT_CONTENDED_SCHED to the allocator.
- aborting zone compaction due to lock contention means we can still try
  another zone, since it has different set of locks. We report back
  COMPACT_CONTENDED_LOCK only if *all* zones where compaction was attempted,
  it was aborted due to lock contention.

As a result of these fixes, khugepaged will proceed with second sync
compaction as intended, when the preceding async compaction aborted due to
need_resched().  Page fault compactions aborting due to need_resched()
will spare some cycles previously wasted by initializing another zone
compaction only to abort again.  Lock contention will be reported only
when compaction in all zones aborted due to lock contention, and therefore
it's not a good idea to try again after reclaim.

In stress-highalloc from mmtests configured to use __GFP_NO_KSWAPD, this
has improved number of THP collapse allocations by 10%, which shows
positive effect on khugepaged.  The benchmark's success rates are
unchanged as it is not recognized as khugepaged.  Numbers of compact_stall
and compact_fail events have however decreased by 20%, with
compact_success still a bit improved, which is good.  With benchmark
configured not to use __GFP_NO_KSWAPD, there is 6% improvement in THP
collapse allocations, and only slight improvement in stalls and failures.

[akpm@linux-foundation.org: fix warnings]
Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index b2e4c92d0445..60bdf8dc02a3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -13,6 +13,14 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	4
 
+/* Used to signal whether compaction detected need_sched() or lock contention */
+/* No contention detected */
+#define COMPACT_CONTENDED_NONE	0
+/* Either need_sched() was true or fatal signal pending */
+#define COMPACT_CONTENDED_SCHED	1
+/* Zone lock or lru_lock was contended in async compaction */
+#define COMPACT_CONTENDED_LOCK	2
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -24,7 +32,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
@@ -94,7 +102,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
-- 
cgit v1.2.3


From 43e7a34d265e884b7cf34f9b05e6f2e0c05bf120 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 9 Oct 2014 15:27:25 -0700
Subject: mm: rename allocflags_to_migratetype for clarity

The page allocator has gfp flags (like __GFP_WAIT) and alloc flags (like
ALLOC_CPUSET) that have separate semantics.

The function allocflags_to_migratetype() actually takes gfp flags, not
alloc flags, and returns a migratetype.  Rename it to
gfpflags_to_migratetype().

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5e7219dc0fae..41b30fd4d041 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -156,7 +156,7 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 /* Convert GFP flags to their corresponding migrate type */
-static inline int allocflags_to_migratetype(gfp_t gfp_flags)
+static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
 	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
 
-- 
cgit v1.2.3


From 9c5990240e076ae564cccbd921868cd08f6daaa5 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 9 Oct 2014 15:27:29 -0700
Subject: mm: introduce check_data_rlimit helper

To eliminate code duplication lets introduce check_data_rlimit helper
which we will use in brk() and prctl() syscalls.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Julien Tinnes <jln@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 28df70774b81..4d814aa97785 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -18,6 +18,7 @@
 #include <linux/pfn.h>
 #include <linux/bit_spinlock.h>
 #include <linux/shrinker.h>
+#include <linux/resource.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1780,6 +1781,20 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
 
+static inline int check_data_rlimit(unsigned long rlim,
+				    unsigned long new,
+				    unsigned long start,
+				    unsigned long end_data,
+				    unsigned long start_data)
+{
+	if (rlim < RLIM_INFINITY) {
+		if (((new - start) + (end_data - start_data)) > rlim)
+			return -ENOSPC;
+	}
+
+	return 0;
+}
+
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 
-- 
cgit v1.2.3


From 1f13ae399c58af5a05b5cee61da864e1f4071de4 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:27:39 -0700
Subject: mm: remove noisy remainder of the scan_unevictable interface

The deprecation warnings for the scan_unevictable interface triggers by
scripts doing `sysctl -a | grep something else'.  This is annoying and not
helpful.

The interface has been defunct since 264e56d8247e ("mm: disable user
interface to manually rescue unevictable pages"), which was in 2011, and
there haven't been any reports of usecases for it, only reports that the
deprecation warnings are annying.  It's unlikely that anybody is using
this interface specifically at this point, so remove it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1b72060f093a..ea4f926e6b9b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,22 +354,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 extern int page_evictable(struct page *page);
 extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
-extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-#ifdef CONFIG_NUMA
-extern int scan_unevictable_register_node(struct node *node);
-extern void scan_unevictable_unregister_node(struct node *node);
-#else
-static inline int scan_unevictable_register_node(struct node *node)
-{
-	return 0;
-}
-static inline void scan_unevictable_unregister_node(struct node *node)
-{
-}
-#endif
-
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 #ifdef CONFIG_MEMCG
-- 
cgit v1.2.3


From 6b6482bbf64ef6f6dbc8b52f7a7cf88a0498bd51 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:48 -0700
Subject: mempolicy: remove the "task" arg of vma_policy_mof() and simplify it

1. vma_policy_mof(task) is simply not safe unless task == current,
   it can race with do_exit()->mpol_put(). Remove this arg and update
   its single caller.

2. vma can not be NULL, remove this check and simplify the code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index f230a978e6ba..5e4bfcedd2ce 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -136,7 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
-bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma);
+bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
-- 
cgit v1.2.3


From 74d2c3a05cc6c1eef2d7236a9919036ed85ddaaf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:50 -0700
Subject: mempolicy: introduce __get_vma_policy(), export get_task_policy()

Extract the code which looks for vma's policy from get_vma_policy()
into the new helper, __get_vma_policy(). Export get_task_policy().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5e4bfcedd2ce..e1abe249892a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -134,6 +134,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+struct mempolicy *get_task_policy(struct task_struct *p);
+struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
+		unsigned long addr);
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
-- 
cgit v1.2.3


From dd6eecb917938c1b7e505a83df307b3476e7c8bd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:57 -0700
Subject: mempolicy: unexport get_vma_policy() and remove its "task" arg

- get_vma_policy(task) is not safe if task != current, remove this
  argument.

- get_vma_policy() no longer has callers outside of mempolicy.c,
  make it static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index e1abe249892a..3d385c81c153 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -137,8 +137,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
-struct mempolicy *get_vma_policy(struct task_struct *tsk,
-		struct vm_area_struct *vma, unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
-- 
cgit v1.2.3


From 1c93923cc264105418e6ead149c76bd88302eff4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 9 Oct 2014 15:27:59 -0700
Subject: include/linux/migrate.h: remove migrate_page #define

This is designed to avoid a few ifdefs in .c files but it's obnoxious
because it can cause unsuspecting "migrate_page" symbols to get turned into
"NULL".

Just nuke it and use the ifdefs.

Cc: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a2901c414664..b66fd10f4b93 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -82,9 +82,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 	return -ENOSYS;
 }
 
-/* Possible settings for the migrate_page() method in address_operations */
-#define migrate_page NULL
-
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
cgit v1.2.3


From 0bf55139782db1fa96af66e37cc84afde18443ef Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:06 -0700
Subject: mm: introduce dump_vma

Introduce a helper to dump information about a VMA, this also makes
dump_page_flags more generic and re-uses that so the output looks very
similar to dump_page:

[   61.903437] vma ffff88070f88be00 start 00007fff25970000 end 00007fff25992000
[   61.903437] next ffff88070facd600 prev ffff88070face400 mm ffff88070fade000
[   61.903437] prot 8000000000000025 anon_vma ffff88070fa1e200 vm_ops           (null)
[   61.903437] pgoff 7ffffffdd file           (null) private_data           (null)
[   61.909129] flags: 0x100173(read|write|mayread|maywrite|mayexec|growsdown|account)

[akpm@linux-foundation.org: make dump_vma() require CONFIG_DEBUG_VM]
[swarren@nvidia.com: fix dump_vma() compilation]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2f348d02f640..dfb93333fc62 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -4,10 +4,12 @@
 #include <linux/stringify.h>
 
 struct page;
+struct vm_area_struct;
 
 extern void dump_page(struct page *page, const char *reason);
 extern void dump_page_badflags(struct page *page, const char *reason,
 			       unsigned long badflags);
+void dump_vma(const struct vm_area_struct *vma);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
-- 
cgit v1.2.3


From fa3759ccd5651c4235f572302d58c8ec9ddf1c4b Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:08 -0700
Subject: mm: introduce VM_BUG_ON_VMA

Very similar to VM_BUG_ON_PAGE but dumps VMA information instead.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index dfb93333fc62..569e4c8d0ebb 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,12 +20,20 @@ void dump_vma(const struct vm_area_struct *vma);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_VMA(cond, vma)					\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_vma(vma);					\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
-- 
cgit v1.2.3


From 81d1b09c6be66afac7d41ee52279d9bccbce56d8 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:10 -0700
Subject: mm: convert a few VM_BUG_ON callers to VM_BUG_ON_VMA

Trivially convert a few VM_BUG_ON calls to VM_BUG_ON_VMA to extract
more information when they trigger.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 +-
 include/linux/rmap.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 63579cb8d3dc..ad9051bab267 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -132,7 +132,7 @@ extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 		spinlock_t **ptl)
 {
-	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
 	if (pmd_trans_huge(*pmd))
 		return __pmd_trans_huge_lock(pmd, vma, ptl);
 	else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index be574506e6a9..c0c2bce6b0b7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -150,7 +150,7 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 static inline void anon_vma_merge(struct vm_area_struct *vma,
 				  struct vm_area_struct *next)
 {
-	VM_BUG_ON(vma->anon_vma != next->anon_vma);
+	VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma);
 	unlink_anon_vmas(next);
 }
 
-- 
cgit v1.2.3


From 5705465174686d007473e017b76c4b64b44aa690 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:28:17 -0700
Subject: mm: clean up zone flags

Page reclaim tests zone_is_reclaim_dirty(), but the site that actually
sets this state does zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY), sending the
reader through layers indirection just to track down a simple bit.

Remove all zone flag wrappers and just use bitops against zone->flags
directly.  It's just as readable and the lines are barely any longer.

Also rename ZONE_TAIL_LRU_DIRTY to ZONE_DIRTY to match ZONE_WRITEBACK, and
remove the zone_flags_t typedef.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 51 +++-----------------------------------------------
 1 file changed, 3 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 318df7051850..48bf12ef6620 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -521,13 +521,13 @@ struct zone {
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
-typedef enum {
+enum zone_flags {
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
 	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
 	ZONE_CONGESTED,			/* zone has many dirty pages backed by
 					 * a congested BDI
 					 */
-	ZONE_TAIL_LRU_DIRTY,		/* reclaim scanning has recently found
+	ZONE_DIRTY,			/* reclaim scanning has recently found
 					 * many dirty file pages at the tail
 					 * of the LRU.
 					 */
@@ -535,52 +535,7 @@ typedef enum {
 					 * many pages under writeback
 					 */
 	ZONE_FAIR_DEPLETED,		/* fair zone policy batch depleted */
-} zone_flags_t;
-
-static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
-{
-	set_bit(flag, &zone->flags);
-}
-
-static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag)
-{
-	return test_and_set_bit(flag, &zone->flags);
-}
-
-static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
-{
-	clear_bit(flag, &zone->flags);
-}
-
-static inline int zone_is_reclaim_congested(const struct zone *zone)
-{
-	return test_bit(ZONE_CONGESTED, &zone->flags);
-}
-
-static inline int zone_is_reclaim_dirty(const struct zone *zone)
-{
-	return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags);
-}
-
-static inline int zone_is_reclaim_writeback(const struct zone *zone)
-{
-	return test_bit(ZONE_WRITEBACK, &zone->flags);
-}
-
-static inline int zone_is_reclaim_locked(const struct zone *zone)
-{
-	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
-}
-
-static inline int zone_is_fair_depleted(const struct zone *zone)
-{
-	return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
-}
-
-static inline int zone_is_oom_locked(const struct zone *zone)
-{
-	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
-}
+};
 
 static inline unsigned long zone_end_pfn(const struct zone *zone)
 {
-- 
cgit v1.2.3


From 934f3072c17cc8886f4c043b47eeeb1b12f8de33 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 9 Oct 2014 15:28:23 -0700
Subject: mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set

commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O
during memory allocation") introduces PF_MEMALLOC_NOIO flag to avoid doing
I/O inside memory allocation, __GFP_IO is cleared when this flag is set,
but __GFP_FS implies __GFP_IO, it should also be cleared.  Or it may still
run into I/O, like in superblock shrinker.  And this will make the kernel
run into the deadlock case described in that commit.

See Dave Chinner's comment about io in superblock shrinker:

Filesystem shrinkers do indeed perform IO from the superblock shrinker and
have for years.  Even clean inodes can require IO before they can be freed
- e.g.  on an orphan list, need truncation of post-eof blocks, need to
wait for ordered operations to complete before it can be freed, etc.

IOWs, Ext4, btrfs and XFS all can issue and/or block on arbitrary amounts
of IO in the superblock shrinker context.  XFS, in particular, has been
doing transactions and IO from the VFS inode cache shrinker since it was
first introduced....

Fix this by clearing __GFP_FS in memalloc_noio_flags(), this function has
masked all the gfp_mask that will be passed into fs for the processes
setting PF_MEMALLOC_NOIO in the direct reclaim path.

v1 thread at: https://lkml.org/lkml/2014/9/3/32

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: joyce.xue <xuejiufei@huawei.com>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c6353d9e63a..5e63ba59258c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1935,11 +1935,13 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
 static inline gfp_t memalloc_noio_flags(gfp_t flags)
 {
 	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~__GFP_IO;
+		flags &= ~(__GFP_IO | __GFP_FS);
 	return flags;
 }
 
-- 
cgit v1.2.3


From 31c9afa6db122a5c7a7843278aaf77dd08ea6e98 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:37 -0700
Subject: mm: introduce VM_BUG_ON_MM

Very similar to VM_BUG_ON_PAGE and VM_BUG_ON_VMA, dump struct_mm when the
bug is hit.

[akpm@linux-foundation.org: coding-style fixes]
[mhocko@suse.cz: fix build]
[mhocko@suse.cz: fix build some more]
[akpm@linux-foundation.org: do strange things to avoid doing strange things for the comma separators]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 569e4c8d0ebb..877ef226f90f 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -5,11 +5,13 @@
 
 struct page;
 struct vm_area_struct;
+struct mm_struct;
 
 extern void dump_page(struct page *page, const char *reason);
 extern void dump_page_badflags(struct page *page, const char *reason,
 			       unsigned long badflags);
 void dump_vma(const struct vm_area_struct *vma);
+void dump_mm(const struct mm_struct *mm);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
@@ -27,6 +29,13 @@ void dump_vma(const struct vm_area_struct *vma);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_MM(cond, mm)						\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_mm(mm);					\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
@@ -34,6 +43,7 @@ void dump_vma(const struct vm_area_struct *vma);
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
+#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
-- 
cgit v1.2.3


From 33a690c45b202e4c6483bfd1d93ad8d0f51df2ca Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 9 Oct 2014 15:28:43 -0700
Subject: memcg: move memcg_{alloc,free}_cache_params to slab_common.c

The only reason why they live in memcontrol.c is that we get/put css
reference to the owner memory cgroup in them.  However, we can do that in
memcg_{un,}register_cache.  OTOH, there are several reasons to move them
to slab_common.c.

First, I think that the less public interface functions we have in
memcontrol.h the better.  Since the functions I move don't depend on
memcontrol, I think it's worth making them private to slab, especially
taking into account that the arrays are defined on the slab's side too.

Second, the way how per-memcg arrays are updated looks rather awkward: it
proceeds from memcontrol.c (__memcg_activate_kmem) to slab_common.c
(memcg_update_all_caches) and back to memcontrol.c again
(memcg_update_array_size).  In the following patches I move the function
relocating the arrays (memcg_update_array_size) to slab_common.c and
therefore get rid this circular call path.  I think we should have the
cache allocation stuff in the same place where we have relocation, because
it's easier to follow the code then.  So I move arrays alloc/free
functions to slab_common.c too.

The third point isn't obvious.  I'm going to make the list_lru structure
per-memcg to allow targeted kmem reclaim.  That means we will have
per-memcg arrays in list_lrus too.  It turns out that it's much easier to
update these arrays in list_lru.c rather than in memcontrol.c, because all
the stuff we need is defined there.  This patch makes memcg caches arrays
allocation path conform that of the upcoming list_lru.

So let's move these functions to slab_common.c and make them static.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e0752d204d9e..4d17242eeff7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,10 +440,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
-			     struct kmem_cache *root_cache);
-void memcg_free_cache_params(struct kmem_cache *s);
-
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
 
@@ -574,16 +570,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-		struct kmem_cache *s, struct kmem_cache *root_cache)
-{
-	return 0;
-}
-
-static inline void memcg_free_cache_params(struct kmem_cache *s)
-{
-}
-
 static inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
-- 
cgit v1.2.3


From 6f817f4cda68b09621312ec5ba84217bc5e37b3d Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 9 Oct 2014 15:28:47 -0700
Subject: memcg: move memcg_update_cache_size() to slab_common.c

`While growing per memcg caches arrays, we jump between memcontrol.c and
slab_common.c in a weird way:

  memcg_alloc_cache_id - memcontrol.c
    memcg_update_all_caches - slab_common.c
      memcg_update_cache_size - memcontrol.c

There's absolutely no reason why memcg_update_cache_size can't live on the
slab's side though.  So let's move it there and settle it comfortably amid
per-memcg cache allocation functions.

Besides, this patch cleans this function up a bit, removing all the
useless comments from it, and renames it to memcg_update_cache_params to
conform to memcg_alloc/free_cache_params, which we already have in
slab_common.c.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d17242eeff7..19df5d857411 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,7 +440,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
 
 struct kmem_cache *
-- 
cgit v1.2.3


From b70a2a21dc9d4ad455931b53131a0cb4fc01fafe Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:28:56 -0700
Subject: mm: memcontrol: fix transparent huge page allocations under pressure

In a memcg with even just moderate cache pressure, success rates for
transparent huge page allocations drop to zero, wasting a lot of effort
that the allocator puts into assembling these pages.

The reason for this is that the memcg reclaim code was never designed for
higher-order charges.  It reclaims in small batches until there is room
for at least one page.  Huge page charges only succeed when these batches
add up over a series of huge faults, which is unlikely under any
significant load involving order-0 allocations in the group.

Remove that loop on the memcg side in favor of passing the actual reclaim
goal to direct reclaim, which is already set up and optimized to meet
higher-order goals efficiently.

This brings memcg's THP policy in line with the system policy: if the
allocator painstakingly assembles a hugepage, memcg will at least make an
honest effort to charge it.  As a result, transparent hugepage allocation
rates amid cache activity are drastically improved:

                                      vanilla                 patched
pgalloc                 4717530.80 (  +0.00%)   4451376.40 (  -5.64%)
pgfault                  491370.60 (  +0.00%)    225477.40 ( -54.11%)
pgmajfault                    2.00 (  +0.00%)         1.80 (  -6.67%)
thp_fault_alloc               0.00 (  +0.00%)       531.60 (+100.00%)
thp_fault_fallback          749.00 (  +0.00%)       217.40 ( -70.88%)

[ Note: this may in turn increase memory consumption from internal
  fragmentation, which is an inherent risk of transparent hugepages.
  Some setups may have to adjust the memcg limits accordingly to
  accomodate this - or, if the machine is already packed to capacity,
  disable the transparent huge page feature. ]

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave@sr71.net>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ea4f926e6b9b..37a585beef5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -327,8 +327,10 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
-extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-						  gfp_t gfp_mask, bool noswap);
+extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+						  unsigned long nr_pages,
+						  gfp_t gfp_mask,
+						  bool may_swap);
 extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						struct zone *zone,
-- 
cgit v1.2.3


From d6d86c0a7f8ddc5b38cf089222cb1d9540762dc2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:27 -0700
Subject: mm/balloon_compaction: redesign ballooned pages management

Sasha Levin reported KASAN splash inside isolate_migratepages_range().
Problem is in the function __is_movable_balloon_page() which tests
AS_BALLOON_MAP in page->mapping->flags.  This function has no protection
against anonymous pages.  As result it tried to check address space flags
inside struct anon_vma.

Further investigation shows more problems in current implementation:

* Special branch in __unmap_and_move() never works:
  balloon_page_movable() checks page flags and page_count.  In
  __unmap_and_move() page is locked, reference counter is elevated, thus
  balloon_page_movable() always fails.  As a result execution goes to the
  normal migration path.  virtballoon_migratepage() returns
  MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS,
  move_to_new_page() thinks this is an error code and assigns
  newpage->mapping to NULL.  Newly migrated page lose connectivity with
  balloon an all ability for further migration.

* lru_lock erroneously required in isolate_migratepages_range() for
  isolation ballooned page.  This function releases lru_lock periodically,
  this makes migration mostly impossible for some pages.

* balloon_page_dequeue have a tight race with balloon_page_isolate:
  balloon_page_isolate could be executed in parallel with dequeue between
  picking page from list and locking page_lock.  Race is rare because they
  use trylock_page() for locking.

This patch fixes all of them.

Instead of fake mapping with special flag this patch uses special state of
page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256.  Buddy allocator uses
PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose.  Storing mark
directly in struct page makes everything safer and easier.

PagePrivate is used to mark pages present in page list (i.e.  not
isolated, like PageLRU for normal pages).  It replaces special rules for
reference counter and makes balloon migration similar to migration of
normal pages.  This flag is protected by page_lock together with link to
the balloon device.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Link: http://lkml.kernel.org/p/53E6CEAA.9020105@oracle.com
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: <stable@vger.kernel.org>	[3.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/balloon_compaction.h | 97 ++++++++++----------------------------
 include/linux/migrate.h            | 11 +----
 include/linux/mm.h                 | 19 ++++++++
 3 files changed, 44 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 089743ade734..38aa07d5b81c 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -27,10 +27,13 @@
  *      counter raised only while it is under our special handling;
  *
  * iii. after the lockless scan step have selected a potential balloon page for
- *      isolation, re-test the page->mapping flags and the page ref counter
+ *      isolation, re-test the PageBalloon mark and the PagePrivate flag
  *      under the proper page lock, to ensure isolating a valid balloon page
  *      (not yet isolated, nor under release procedure)
  *
+ *  iv. isolation or dequeueing procedure must clear PagePrivate flag under
+ *      page lock together with removing page from balloon device page list.
+ *
  * The functions provided by this interface are placed to help on coping with
  * the aforementioned balloon page corner case, as well as to ensure the simple
  * set of exposed rules are satisfied while we are dealing with balloon pages
@@ -71,28 +74,6 @@ static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
 	kfree(b_dev_info);
 }
 
-/*
- * balloon_page_free - release a balloon page back to the page free lists
- * @page: ballooned page to be set free
- *
- * This function must be used to properly set free an isolated/dequeued balloon
- * page at the end of a sucessful page migration, or at the balloon driver's
- * page release procedure.
- */
-static inline void balloon_page_free(struct page *page)
-{
-	/*
-	 * Balloon pages always get an extra refcount before being isolated
-	 * and before being dequeued to help on sorting out fortuite colisions
-	 * between a thread attempting to isolate and another thread attempting
-	 * to release the very same balloon page.
-	 *
-	 * Before we handle the page back to Buddy, lets drop its extra refcnt.
-	 */
-	put_page(page);
-	__free_page(page);
-}
-
 #ifdef CONFIG_BALLOON_COMPACTION
 extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
@@ -108,74 +89,33 @@ static inline void balloon_mapping_free(struct address_space *balloon_mapping)
 }
 
 /*
- * page_flags_cleared - helper to perform balloon @page ->flags tests.
- *
- * As balloon pages are obtained from buddy and we do not play with page->flags
- * at driver level (exception made when we get the page lock for compaction),
- * we can safely identify a ballooned page by checking if the
- * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared.  This approach also
- * helps us skip ballooned pages that are locked for compaction or release, thus
- * mitigating their racy check at balloon_page_movable()
- */
-static inline bool page_flags_cleared(struct page *page)
-{
-	return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
-}
-
-/*
- * __is_movable_balloon_page - helper to perform @page mapping->flags tests
+ * __is_movable_balloon_page - helper to perform @page PageBalloon tests
  */
 static inline bool __is_movable_balloon_page(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	return mapping_balloon(mapping);
+	return PageBalloon(page);
 }
 
 /*
- * balloon_page_movable - test page->mapping->flags to identify balloon pages
- *			  that can be moved by compaction/migration.
- *
- * This function is used at core compaction's page isolation scheme, therefore
- * most pages exposed to it are not enlisted as balloon pages and so, to avoid
- * undesired side effects like racing against __free_pages(), we cannot afford
- * holding the page locked while testing page->mapping->flags here.
+ * balloon_page_movable - test PageBalloon to identify balloon pages
+ *			  and PagePrivate to check that the page is not
+ *			  isolated and can be moved by compaction/migration.
  *
  * As we might return false positives in the case of a balloon page being just
- * released under us, the page->mapping->flags need to be re-tested later,
- * under the proper page lock, at the functions that will be coping with the
- * balloon page case.
+ * released under us, this need to be re-tested later, under the page lock.
  */
 static inline bool balloon_page_movable(struct page *page)
 {
-	/*
-	 * Before dereferencing and testing mapping->flags, let's make sure
-	 * this is not a page that uses ->mapping in a different way
-	 */
-	if (page_flags_cleared(page) && !page_mapped(page) &&
-	    page_count(page) == 1)
-		return __is_movable_balloon_page(page);
-
-	return false;
+	return PageBalloon(page) && PagePrivate(page);
 }
 
 /*
  * isolated_balloon_page - identify an isolated balloon page on private
  *			   compaction/migration page lists.
- *
- * After a compaction thread isolates a balloon page for migration, it raises
- * the page refcount to prevent concurrent compaction threads from re-isolating
- * the same page. For that reason putback_movable_pages(), or other routines
- * that need to identify isolated balloon pages on private pagelists, cannot
- * rely on balloon_page_movable() to accomplish the task.
  */
 static inline bool isolated_balloon_page(struct page *page)
 {
-	/* Already isolated balloon pages, by default, have a raised refcount */
-	if (page_flags_cleared(page) && !page_mapped(page) &&
-	    page_count(page) >= 2)
-		return __is_movable_balloon_page(page);
-
-	return false;
+	return PageBalloon(page);
 }
 
 /*
@@ -192,6 +132,8 @@ static inline void balloon_page_insert(struct page *page,
 				       struct address_space *mapping,
 				       struct list_head *head)
 {
+	__SetPageBalloon(page);
+	SetPagePrivate(page);
 	page->mapping = mapping;
 	list_add(&page->lru, head);
 }
@@ -206,8 +148,12 @@ static inline void balloon_page_insert(struct page *page,
  */
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	page->mapping = NULL;
-	list_del(&page->lru);
+	if (PagePrivate(page)) {
+		ClearPagePrivate(page);
+		list_del(&page->lru);
+	}
 }
 
 /*
@@ -258,6 +204,11 @@ static inline void balloon_page_delete(struct page *page)
 	list_del(&page->lru);
 }
 
+static inline bool __is_movable_balloon_page(struct page *page)
+{
+	return false;
+}
+
 static inline bool balloon_page_movable(struct page *page)
 {
 	return false;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index b66fd10f4b93..01aad3ed89ec 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,18 +13,9 @@ typedef void free_page_t(struct page *page, unsigned long private);
  * Return values from addresss_space_operations.migratepage():
  * - negative errno on page migration failure;
  * - zero on page migration success;
- *
- * The balloon page migration introduces this special case where a 'distinct'
- * return code is used to flag a successful page migration to unmap_and_move().
- * This approach is necessary because page migration can race against balloon
- * deflation procedure, and for such case we could introduce a nasty page leak
- * if a successfully migrated balloon page gets released concurrently with
- * migration's unmap_and_move() wrap-up steps.
  */
 #define MIGRATEPAGE_SUCCESS		0
-#define MIGRATEPAGE_BALLOON_SUCCESS	1 /* special ret code for balloon page
-					   * sucessful migration case.
-					   */
+
 enum migrate_reason {
 	MR_COMPACTION,
 	MR_MEMORY_FAILURE,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d814aa97785..fa0d74e06428 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -554,6 +554,25 @@ static inline void __ClearPageBuddy(struct page *page)
 	atomic_set(&page->_mapcount, -1);
 }
 
+#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
+
+static inline int PageBalloon(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageBalloon(struct page *page)
+{
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
+	atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageBalloon(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageBalloon(page), page);
+	atomic_set(&page->_mapcount, -1);
+}
+
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
-- 
cgit v1.2.3


From 9d1ba8056474a208ed9efb7e58cd014795d9f818 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:29 -0700
Subject: mm/balloon_compaction: remove balloon mapping and flag AS_BALLOON_MAP

Now ballooned pages are detected using PageBalloon().  Fake mapping is no
longer required.  This patch links ballooned pages to balloon device using
field page->private instead of page->mapping.  Also this patch embeds
balloon_dev_info directly into struct virtio_balloon.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/balloon_compaction.h | 72 ++++++++++----------------------------
 include/linux/pagemap.h            | 18 +---------
 2 files changed, 20 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 38aa07d5b81c..bc3d2985cc9a 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -57,21 +57,22 @@
  * balloon driver as a page book-keeper for its registered balloon devices.
  */
 struct balloon_dev_info {
-	void *balloon_device;		/* balloon device descriptor */
-	struct address_space *mapping;	/* balloon special page->mapping */
 	unsigned long isolated_pages;	/* # of isolated pages for migration */
 	spinlock_t pages_lock;		/* Protection to pages list */
 	struct list_head pages;		/* Pages enqueued & handled to Host */
+	int (*migratepage)(struct balloon_dev_info *, struct page *newpage,
+			struct page *page, enum migrate_mode mode);
 };
 
 extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
 extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
-extern struct balloon_dev_info *balloon_devinfo_alloc(
-						void *balloon_dev_descriptor);
 
-static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
+static inline void balloon_devinfo_init(struct balloon_dev_info *balloon)
 {
-	kfree(b_dev_info);
+	balloon->isolated_pages = 0;
+	spin_lock_init(&balloon->pages_lock);
+	INIT_LIST_HEAD(&balloon->pages);
+	balloon->migratepage = NULL;
 }
 
 #ifdef CONFIG_BALLOON_COMPACTION
@@ -79,14 +80,6 @@ extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
 extern int balloon_page_migrate(struct page *newpage,
 				struct page *page, enum migrate_mode mode);
-extern struct address_space
-*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-			const struct address_space_operations *a_ops);
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	kfree(balloon_mapping);
-}
 
 /*
  * __is_movable_balloon_page - helper to perform @page PageBalloon tests
@@ -120,27 +113,25 @@ static inline bool isolated_balloon_page(struct page *page)
 
 /*
  * balloon_page_insert - insert a page into the balloon's page list and make
- *		         the page->mapping assignment accordingly.
+ *			 the page->private assignment accordingly.
+ * @balloon : pointer to balloon device
  * @page    : page to be assigned as a 'balloon page'
- * @mapping : allocated special 'balloon_mapping'
- * @head    : balloon's device page list head
  *
  * Caller must ensure the page is locked and the spin_lock protecting balloon
  * pages list is held before inserting a page into the balloon device.
  */
-static inline void balloon_page_insert(struct page *page,
-				       struct address_space *mapping,
-				       struct list_head *head)
+static inline void balloon_page_insert(struct balloon_dev_info *balloon,
+				       struct page *page)
 {
 	__SetPageBalloon(page);
 	SetPagePrivate(page);
-	page->mapping = mapping;
-	list_add(&page->lru, head);
+	set_page_private(page, (unsigned long)balloon);
+	list_add(&page->lru, &balloon->pages);
 }
 
 /*
  * balloon_page_delete - delete a page from balloon's page list and clear
- *			 the page->mapping assignement accordingly.
+ *			 the page->private assignement accordingly.
  * @page    : page to be released from balloon's page list
  *
  * Caller must ensure the page is locked and the spin_lock protecting balloon
@@ -149,7 +140,7 @@ static inline void balloon_page_insert(struct page *page,
 static inline void balloon_page_delete(struct page *page)
 {
 	__ClearPageBalloon(page);
-	page->mapping = NULL;
+	set_page_private(page, 0);
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
 		list_del(&page->lru);
@@ -162,11 +153,7 @@ static inline void balloon_page_delete(struct page *page)
  */
 static inline struct balloon_dev_info *balloon_page_device(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	if (likely(mapping))
-		return mapping->private_data;
-
-	return NULL;
+	return (struct balloon_dev_info *)page_private(page);
 }
 
 static inline gfp_t balloon_mapping_gfp_mask(void)
@@ -174,29 +161,12 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER_MOVABLE;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return true;
-}
-
 #else /* !CONFIG_BALLOON_COMPACTION */
 
-static inline void *balloon_mapping_alloc(void *balloon_device,
-				const struct address_space_operations *a_ops)
+static inline void balloon_page_insert(struct balloon_dev_info *balloon,
+				       struct page *page)
 {
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	return;
-}
-
-static inline void balloon_page_insert(struct page *page,
-				       struct address_space *mapping,
-				       struct list_head *head)
-{
-	list_add(&page->lru, head);
+	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
@@ -240,9 +210,5 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return false;
-}
 #endif /* CONFIG_BALLOON_COMPACTION */
 #endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 19191d39c4f3..7ea069cd3257 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -24,8 +24,7 @@ enum mapping_flags {
 	AS_ENOSPC	= __GFP_BITS_SHIFT + 1,	/* ENOSPC on async write */
 	AS_MM_ALL_LOCKS	= __GFP_BITS_SHIFT + 2,	/* under mm_take_all_locks() */
 	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
-	AS_BALLOON_MAP  = __GFP_BITS_SHIFT + 4, /* balloon page special map */
-	AS_EXITING	= __GFP_BITS_SHIFT + 5, /* final truncate in progress */
+	AS_EXITING	= __GFP_BITS_SHIFT + 4, /* final truncate in progress */
 };
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -55,21 +54,6 @@ static inline int mapping_unevictable(struct address_space *mapping)
 	return !!mapping;
 }
 
-static inline void mapping_set_balloon(struct address_space *mapping)
-{
-	set_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
-static inline void mapping_clear_balloon(struct address_space *mapping)
-{
-	clear_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
-static inline int mapping_balloon(struct address_space *mapping)
-{
-	return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
 static inline void mapping_set_exiting(struct address_space *mapping)
 {
 	set_bit(AS_EXITING, &mapping->flags);
-- 
cgit v1.2.3


From 09316c09dde33aae14f34489d9e3d243ec0d5938 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:32 -0700
Subject: mm/balloon_compaction: add vmstat counters and kpageflags bit

Always mark pages with PageBalloon even if balloon compaction is disabled
and expose this mark in /proc/kpageflags as KPF_BALLOON.

Also this patch adds three counters into /proc/vmstat: "balloon_inflate",
"balloon_deflate" and "balloon_migrate".  They accumulate balloon
activity.  Current size of balloon is (balloon_inflate - balloon_deflate)
pages.

All generic balloon code now gathered under option CONFIG_MEMORY_BALLOON.
It should be selected by ballooning driver which wants use this feature.
Currently virtio-balloon is the only user.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/balloon_compaction.h | 2 ++
 include/linux/vm_event_item.h      | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index bc3d2985cc9a..9b0a15d06a4f 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -166,11 +166,13 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
+	__SetPageBalloon(page);
 	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	list_del(&page->lru);
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ced92345c963..730334cdf037 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -72,6 +72,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+		BALLOON_INFLATE,
+		BALLOON_DEFLATE,
+#ifdef CONFIG_BALLOON_COMPACTION
+		BALLOON_MIGRATE,
+#endif
+#endif
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
-- 
cgit v1.2.3


From 722cdc17232f0f684011407f7cf3c40d39457971 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 9 Oct 2014 15:29:50 -0700
Subject: zsmalloc: change return value unit of zs_get_total_size_bytes

zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with
*byte unit* but zsmalloc operates *page unit* rather than byte unit so
let's change the API so benefit we could get is that reduce unnecessary
overhead (ie, change page unit with byte unit) in zsmalloc.

Since return type is pages, "zs_get_total_pages" is better than
"zs_get_total_size_bytes".

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Dan Streetman <ddstreet@ieee.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: <juno.choi@lge.com>
Cc: <seungho1.park@lge.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: David Horner <ds2horner@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zsmalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index e44d634e7fb7..05c214760977 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -46,6 +46,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 			enum zs_mapmode mm);
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool);
+unsigned long zs_get_total_pages(struct zs_pool *pool);
 
 #endif
-- 
cgit v1.2.3


From 2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 9 Oct 2014 15:30:13 -0700
Subject: include/linux/kernel.h: rewrite min3, max3 and clamp using min and
 max

It appears that gcc is better at optimising a double call to min and max
rather than open coded min3 and max3.  This can be observed here:

    $ cat min-max.c
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define min3(x, y, z) ({			\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	typeof(z) _min3 = (z);			\
    	(void) (&_min1 == &_min2);		\
    	(void) (&_min1 == &_min3);		\
    	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
    		(_min2 < _min3 ? _min2 : _min3); })

    int fmin3(int x, int y, int z) { return min3(x, y, z); }
    int fmin2(int x, int y, int z) { return min(min(x, y), z); }

    $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s
    	.file	"min-max.c"
    	.text
    	.p2align 4,,15
    	.globl	fmin3
    	.type	fmin3, @function
    fmin3:
    .LFB0:
    	.cfi_startproc
    	cmpl	%esi, %edi
    	jl	.L5
    	cmpl	%esi, %edx
    	movl	%esi, %eax
    	cmovle	%edx, %eax
    	ret
    	.p2align 4,,10
    	.p2align 3
    .L5:
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fmin3, .-fmin3
    	.p2align 4,,15
    	.globl	fmin2
    	.type	fmin2, @function
    fmin2:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovle	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fmin2, .-fmin2
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

fmin3 function, which uses open-coded min3 macro, is compiled into total
of ten instructions including a conditional branch, whereas fmin2
function, which uses two calls to min2 macro, is compiled into six
instructions with no branches.

Similarly, open-coded clamp produces the same code as clamp using min and
max macros, but the latter is much shorter:

    $ cat clamp.c
    #define clamp(val, min, max) ({			\
    	typeof(val) __val = (val);		\
    	typeof(min) __min = (min);		\
    	typeof(max) __max = (max);		\
    	(void) (&__val == &__min);		\
    	(void) (&__val == &__max);		\
    	__val = __val < __min ? __min: __val;	\
    	__val > __max ? __max: __val; })
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define max(x, y) ({				\
    	typeof(x) _max1 = (x);			\
    	typeof(y) _max2 = (y);			\
    	(void) (&_max1 == &_max2);		\
    	_max1 > _max2 ? _max1 : _max2; })

    int fclamp(int v, int min, int max) { return clamp(v, min, max); }
    int fclampmm(int v, int min, int max) { return min(max(v, min), max); }

    $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s
    	.file	"clamp.c"
    	.text
    	.p2align 4,,15
    	.globl	fclamp
    	.type	fclamp, @function
    fclamp:
    .LFB0:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovge	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fclamp, .-fclamp
    	.p2align 4,,15
    	.globl	fclampmm
    	.type	fclampmm, @function
    fclampmm:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	cmovge	%esi, %edi
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fclampmm, .-fclampmm
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

    Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
    gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
    Copyright (C) 2011 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before
    -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after

48 bytes reduction.  The do_fault_around was a few instruction shorter
and as far as I can tell saved 12 bytes on the stack, i.e.:

    $ grep -e rsp -e pop -e push do_fault_around.*
    do_fault_around.before.s:push   %rbp
    do_fault_around.before.s:mov    %rsp,%rbp
    do_fault_around.before.s:push   %r13
    do_fault_around.before.s:push   %r12
    do_fault_around.before.s:push   %rbx
    do_fault_around.before.s:sub    $0x38,%rsp
    do_fault_around.before.s:add    $0x38,%rsp
    do_fault_around.before.s:pop    %rbx
    do_fault_around.before.s:pop    %r12
    do_fault_around.before.s:pop    %r13
    do_fault_around.before.s:pop    %rbp

    do_fault_around.after.s:push   %rbp
    do_fault_around.after.s:mov    %rsp,%rbp
    do_fault_around.after.s:push   %r12
    do_fault_around.after.s:push   %rbx
    do_fault_around.after.s:sub    $0x30,%rsp
    do_fault_around.after.s:add    $0x30,%rsp
    do_fault_around.after.s:pop    %rbx
    do_fault_around.after.s:pop    %r12
    do_fault_around.after.s:pop    %rbp

or here side-by-side:

    Before                    After
    push   %rbp               push   %rbp
    mov    %rsp,%rbp          mov    %rsp,%rbp
    push   %r13
    push   %r12               push   %r12
    push   %rbx               push   %rbx
    sub    $0x38,%rsp         sub    $0x30,%rsp
    add    $0x38,%rsp         add    $0x30,%rsp
    pop    %rbx               pop    %rbx
    pop    %r12               pop    %r12
    pop    %r13
    pop    %rbp               pop    %rbp

There are also fewer branches:

    $ grep ^j do_fault_around.*
    do_fault_around.before.s:jae    ffffffff812079b7
    do_fault_around.before.s:jmp    ffffffff812079c5
    do_fault_around.before.s:jmp    ffffffff81207a14
    do_fault_around.before.s:ja     ffffffff812079f9
    do_fault_around.before.s:jb     ffffffff81207a10
    do_fault_around.before.s:jmp    ffffffff81207a63
    do_fault_around.before.s:jne    ffffffff812079df

    do_fault_around.after.s:jmp    ffffffff812079fd
    do_fault_around.after.s:ja     ffffffff812079e2
    do_fault_around.after.s:jb     ffffffff812079f9
    do_fault_around.after.s:jmp    ffffffff81207a4c
    do_fault_around.after.s:jne    ffffffff812079c8

And here's with allyesconfig on a different machine:

    $ uname -a; gcc --version; ls -l vmlinux.*
    Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux
    gcc (GCC) 4.8.3
    Copyright (C) 2013 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before
    -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after

530 bytes reduction.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: David Rientjes <rientjes@google.com>
Cc: "Rustad, Mark D" <mark.d.rustad@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 95624bed87ef..aa2a0cb57f50 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -715,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	(void) (&_max1 == &_max2);		\
 	_max1 > _max2 ? _max1 : _max2; })
 
-#define min3(x, y, z) ({			\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	typeof(z) _min3 = (z);			\
-	(void) (&_min1 == &_min2);		\
-	(void) (&_min1 == &_min3);		\
-	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
-		(_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({			\
-	typeof(x) _max1 = (x);			\
-	typeof(y) _max2 = (y);			\
-	typeof(z) _max3 = (z);			\
-	(void) (&_max1 == &_max2);		\
-	(void) (&_max1 == &_max3);		\
-	_max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
-		(_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
 
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -746,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 /**
  * clamp - return a value clamped to a given range with strict typechecking
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
  *
  * This macro does strict typechecking of min/max to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, min, max) ({			\
-	typeof(val) __val = (val);		\
-	typeof(min) __min = (min);		\
-	typeof(max) __max = (max);		\
-	(void) (&__val == &__min);		\
-	(void) (&__val == &__max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
 
 /*
  * ..and if you can't take the strict
-- 
cgit v1.2.3


From c185b07fc9f24d52a864376ed22a6d84384b0c53 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 9 Oct 2014 15:30:15 -0700
Subject: include/linux/kernel.h: deduplicate code implementing clamp* macros

Instead of open-coding clamp_t macro min_t and max_t the way clamp macro
does and instead of open-coding clamp_val simply use clamp_t.
Furthermore, normalise argument naming in the macros to be lo and hi.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Mark Rustad <mark.d.rustad@intel.com>
Cc: "Kirsher, Jeffrey T" <jeffrey.t.kirsher@intel.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index aa2a0cb57f50..e9e420b6d931 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,7 +734,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @lo: lowest allowable value
  * @hi: highest allowable value
  *
- * This macro does strict typechecking of min/max to make sure they are of the
+ * This macro does strict typechecking of lo/hi to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
@@ -759,36 +759,26 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * clamp_t - return a value clamped to a given range using a given type
  * @type: the type of variable to use
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of type
  * 'type' to make all the comparisons.
  */
-#define clamp_t(type, val, min, max) ({		\
-	type __val = (val);			\
-	type __min = (min);			\
-	type __max = (max);			\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 /**
  * clamp_val - return a value clamped to a given range using val's type
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of whatever
  * type the input argument 'val' is.  This is useful when val is an unsigned
  * type and min and max are literals that will otherwise be assigned a signed
  * integer type.
  */
-#define clamp_val(val, min, max) ({		\
-	typeof(val) __val = (val);		\
-	typeof(val) __min = (min);		\
-	typeof(val) __max = (max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
 
 
 /*
-- 
cgit v1.2.3


From 61a04e5b306ab9d6a30f78e86f1f140d7c888304 Mon Sep 17 00:00:00 2001
From: Michele Curti <michele.curti@gmail.com>
Date: Thu, 9 Oct 2014 15:30:17 -0700
Subject: include/linux/blkdev.h: use NULL instead of zero

Quite useless but it shuts up some warnings.

Signed-off-by: Michele Curti <michele.curti@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b46555b80..87be398166d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1564,7 +1564,7 @@ static inline int blk_rq_map_integrity_sg(struct request_queue *q,
 }
 static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
 {
-	return 0;
+	return NULL;
 }
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-- 
cgit v1.2.3


From 578b25dfce2990d8bab5631f33a4283bd5b01556 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 9 Oct 2014 15:30:28 -0700
Subject: include/linux/screen_info.h: remove unused ORIG_* macros

The ORIG_* macros definitions to access struct screen_info members and all
of their users were removed 7 years ago by commit 3ea335100014785f
("Remove magic macros for screen_info structure members"), but (only) the
definitions reappeared a few days later in commit ee8e7cfe9d330d6f ("Make
asm-x86/bootparam.h includable from userspace.").

Remove them for good. Amen.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/screen_info.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 005bf3e38db5..f0f8bad54be9 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -5,12 +5,4 @@
 
 extern struct screen_info screen_info;
 
-#define ORIG_X			(screen_info.orig_x)
-#define ORIG_Y			(screen_info.orig_y)
-#define ORIG_VIDEO_MODE		(screen_info.orig_video_mode)
-#define ORIG_VIDEO_COLS 	(screen_info.orig_video_cols)
-#define ORIG_VIDEO_EGA_BX	(screen_info.orig_video_ega_bx)
-#define ORIG_VIDEO_LINES	(screen_info.orig_video_lines)
-#define ORIG_VIDEO_ISVGA	(screen_info.orig_video_isVGA)
-#define ORIG_VIDEO_POINTS       (screen_info.orig_video_points)
 #endif /* _SCREEN_INFO_H */
-- 
cgit v1.2.3