summaryrefslogtreecommitdiff
path: root/mm/util.c
diff options
context:
space:
mode:
authorAndrey Ryabinin <aryabinin@virtuozzo.com>2016-03-17 14:18:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 15:09:34 -0700
commit39a1aa8e194ab67983de3b9d0b204ccee12e689a (patch)
tree66e70a955436cd15a170b848fdba9563bf30cb37 /mm/util.c
parentea606cf5d8df370e7932460dfd960b21f20e7c6d (diff)
mm: deduplicate memory overcommitment code
Currently we have two copies of the same code which implements memory overcommitment logic. Let's move it into mm/util.c and hence avoid duplication. No functional changes here. Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/util.c')
-rw-r--r--mm/util.c124
1 files changed, 124 insertions, 0 deletions
diff --git a/mm/util.c b/mm/util.c
index 4fb14ca5a419..47a57e557614 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -396,6 +396,13 @@ int __page_mapcount(struct page *page)
}
EXPORT_SYMBOL_GPL(__page_mapcount);
+int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
+int sysctl_overcommit_ratio __read_mostly = 50;
+unsigned long sysctl_overcommit_kbytes __read_mostly;
+int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
+unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
+
int overcommit_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -437,6 +444,123 @@ unsigned long vm_commit_limit(void)
return allowed;
}
+/*
+ * Make sure vm_committed_as in one cacheline and not cacheline shared with
+ * other variables. It can be updated by several CPUs frequently.
+ */
+struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
+
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+ return percpu_counter_read_positive(&vm_committed_as);
+}
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ *
+ * Note this is a helper function intended to be used by LSMs which
+ * wish to use this logic.
+ */
+int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
+{
+ long free, allowed, reserve;
+
+ VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+ -(s64)vm_committed_as_batch * num_online_cpus(),
+ "memory commitment underflow");
+
+ vm_acct_memory(pages);
+
+ /*
+ * Sometimes we want to use more memory than we have
+ */
+ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+ return 0;
+
+ if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+ free = global_page_state(NR_FREE_PAGES);
+ free += global_page_state(NR_FILE_PAGES);
+
+ /*
+ * shmem pages shouldn't be counted as free in this
+ * case, they can't be purged, only swapped out, and
+ * that won't affect the overall amount of available
+ * memory in the system.
+ */
+ free -= global_page_state(NR_SHMEM);
+
+ free += get_nr_swap_pages();
+
+ /*
+ * Any slabs which are created with the
+ * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+ * which are reclaimable, under pressure. The dentry
+ * cache and most inode caches should fall into this
+ */
+ free += global_page_state(NR_SLAB_RECLAIMABLE);
+
+ /*
+ * Leave reserved pages. The pages are not for anonymous pages.
+ */
+ if (free <= totalreserve_pages)
+ goto error;
+ else
+ free -= totalreserve_pages;
+
+ /*
+ * Reserve some for root
+ */
+ if (!cap_sys_admin)
+ free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
+
+ if (free > pages)
+ return 0;
+
+ goto error;
+ }
+
+ allowed = vm_commit_limit();
+ /*
+ * Reserve some for root
+ */
+ if (!cap_sys_admin)
+ allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
+
+ /*
+ * Don't let a single process grow so big a user can't recover
+ */
+ if (mm) {
+ reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+ allowed -= min_t(long, mm->total_vm / 32, reserve);
+ }
+
+ if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+ return 0;
+error:
+ vm_unacct_memory(pages);
+
+ return -ENOMEM;
+}
+
/**
* get_cmdline() - copy the cmdline value to a buffer.
* @task: the task whose cmdline value to copy.