summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.freezer2
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup.c265
-rw-r--r--kernel/cgroup_debug.c4
-rw-r--r--kernel/cgroup_freezer.c379
-rw-r--r--kernel/configs.c9
-rw-r--r--kernel/cpuset.c17
-rw-r--r--kernel/freezer.c154
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/power/process.c119
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcupreempt.c2
-rw-r--r--kernel/rcutorture.c2
-rw-r--r--kernel/sysctl.c10
15 files changed, 724 insertions, 250 deletions
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 000000000000..a3bb4cb52539
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
+config FREEZER
+ def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e2..066550aa61c5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
+obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c17e6d3..046c1609606b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
- write_lock(&css_set_lock);
hlist_del(&cg->hlist);
css_set_count--;
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
list_del(&link->cgrp_link_list);
kfree(link);
}
-
- write_unlock(&css_set_lock);
}
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
{
int i;
- struct css_set *cg = container_of(k, struct css_set, ref);
-
+ /*
+ * Ensure that the refcount doesn't hit zero while any readers
+ * can see it. Similar to atomic_dec_and_lock(), but for an
+ * rwlock
+ */
+ if (atomic_add_unless(&cg->refcount, -1, 1))
+ return;
+ write_lock(&css_set_lock);
+ if (!atomic_dec_and_test(&cg->refcount)) {
+ write_unlock(&css_set_lock);
+ return;
+ }
unlink_css_set(cg);
+ write_unlock(&css_set_lock);
rcu_read_lock();
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
kfree(cg);
}
-static void release_css_set(struct kref *k)
-{
- __release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
- __release_css_set(k, 1);
-}
-
/*
* refcounted get/put for css_set objects
*/
static inline void get_css_set(struct css_set *cg)
{
- kref_get(&cg->ref);
+ atomic_inc(&cg->refcount);
}
static inline void put_css_set(struct css_set *cg)
{
- kref_put(&cg->ref, release_css_set);
+ __put_css_set(cg, 0);
}
static inline void put_css_set_taskexit(struct css_set *cg)
{
- kref_put(&cg->ref, release_css_set_taskexit);
+ __put_css_set(cg, 1);
}
/*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
return NULL;
}
- kref_init(&res->ref);
+ atomic_set(&res->refcount, 1);
INIT_LIST_HEAD(&res->cg_links);
INIT_LIST_HEAD(&res->tasks);
INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
.remount_fs = cgroup_remount,
};
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+ INIT_LIST_HEAD(&cgrp->sibling);
+ INIT_LIST_HEAD(&cgrp->children);
+ INIT_LIST_HEAD(&cgrp->css_sets);
+ INIT_LIST_HEAD(&cgrp->release_list);
+ init_rwsem(&cgrp->pids_mutex);
+}
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
root->number_of_cgroups = 1;
cgrp->root = root;
cgrp->top_cgroup = cgrp;
- INIT_LIST_HEAD(&cgrp->sibling);
- INIT_LIST_HEAD(&cgrp->children);
- INIT_LIST_HEAD(&cgrp->css_sets);
- INIT_LIST_HEAD(&cgrp->release_list);
+ init_cgroup_housekeeping(cgrp);
}
static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
read_lock(&css_set_lock);
list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
- count += atomic_read(&link->cg->ref.refcount);
+ count += atomic_read(&link->cg->refcount);
}
read_unlock(&css_set_lock);
return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
* but we cannot guarantee that the information we produce is correct
* unless we produce it entirely atomically.
*
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here). The struct
- * ctr_struct * is stored in file->private_data. Its resources will
- * be freed by release() when the file is closed. The array is used
- * to sprintf the PIDs and then used by read().
*/
-struct ctr_struct {
- char *buf;
- int bufsz;
-};
/*
* Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
return *(pid_t *)a - *(pid_t *)b;
}
+
/*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * seq_file methods for the "tasks" file. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->tasks_pids array.
*/
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
{
- int cnt = 0;
- int i;
+ /*
+ * Initially we receive a position value that corresponds to
+ * one more than the last pid shown (or 0 on the first call or
+ * after a seek to the start). Use a binary-search to find the
+ * next pid to display, if any
+ */
+ struct cgroup *cgrp = s->private;
+ int index = 0, pid = *pos;
+ int *iter;
- for (i = 0; i < npids; i++)
- cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
- return cnt;
+ down_read(&cgrp->pids_mutex);
+ if (pid) {
+ int end = cgrp->pids_length;
+ int i;
+ while (index < end) {
+ int mid = (index + end) / 2;
+ if (cgrp->tasks_pids[mid] == pid) {
+ index = mid;
+ break;
+ } else if (cgrp->tasks_pids[mid] <= pid)
+ index = mid + 1;
+ else
+ end = mid;
+ }
+ }
+ /* If we're off the end of the array, we're done */
+ if (index >= cgrp->pids_length)
+ return NULL;
+ /* Update the abstract position to be the actual pid that we found */
+ iter = cgrp->tasks_pids + index;
+ *pos = *iter;
+ return iter;
+}
+
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+ struct cgroup *cgrp = s->private;
+ up_read(&cgrp->pids_mutex);
}
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct cgroup *cgrp = s->private;
+ int *p = v;
+ int *end = cgrp->tasks_pids + cgrp->pids_length;
+
+ /*
+ * Advance to the next pid in the array. If this goes off the
+ * end, we're done
+ */
+ p++;
+ if (p >= end) {
+ return NULL;
+ } else {
+ *pos = *p;
+ return p;
+ }
+}
+
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+ return seq_printf(s, "%d\n", *(int *)v);
+}
+
+static struct seq_operations cgroup_tasks_seq_operations = {
+ .start = cgroup_tasks_start,
+ .stop = cgroup_tasks_stop,
+ .next = cgroup_tasks_next,
+ .show = cgroup_tasks_show,
+};
+
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+ down_write(&cgrp->pids_mutex);
+ BUG_ON(!cgrp->pids_use_count);
+ if (!--cgrp->pids_use_count) {
+ kfree(cgrp->tasks_pids);
+ cgrp->tasks_pids = NULL;
+ cgrp->pids_length = 0;
+ }
+ up_write(&cgrp->pids_mutex);
+}
+
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+ struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+ if (!(file->f_mode & FMODE_READ))
+ return 0;
+
+ release_cgroup_pid_array(cgrp);
+ return seq_release(inode, file);
+}
+
+static struct file_operations cgroup_tasks_operations = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = cgroup_file_write,
+ .release = cgroup_tasks_release,
+};
+
/*
- * Handle an open on 'tasks' file. Prepare a buffer listing the
+ * Handle an open on 'tasks' file. Prepare an array containing the
* process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
*/
+
static int cgroup_tasks_open(struct inode *unused, struct file *file)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- struct ctr_struct *ctr;
pid_t *pidarray;
int npids;
- char c;
+ int retval;
+ /* Nothing to do for write-only files */
if (!(file->f_mode & FMODE_READ))
return 0;
- ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
- if (!ctr)
- goto err0;
-
/*
* If cgroup gets more users after we read count, we won't have
* enough space - tough. This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
* show up until sometime later on.
*/
npids = cgroup_task_count(cgrp);
- if (npids) {
- pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
- if (!pidarray)
- goto err1;
-
- npids = pid_array_load(pidarray, npids, cgrp);
- sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
- /* Call pid_array_to_buf() twice, first just to get bufsz */
- ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
- ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
- if (!ctr->buf)
- goto err2;
- ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
- kfree(pidarray);
- } else {
- ctr->buf = NULL;
- ctr->bufsz = 0;
- }
- file->private_data = ctr;
- return 0;
-
-err2:
- kfree(pidarray);
-err1:
- kfree(ctr);
-err0:
- return -ENOMEM;
-}
+ pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+ if (!pidarray)
+ return -ENOMEM;
+ npids = pid_array_load(pidarray, npids, cgrp);
+ sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file, char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- struct ctr_struct *ctr = file->private_data;
+ /*
+ * Store the array in the cgroup, freeing the old
+ * array if necessary
+ */
+ down_write(&cgrp->pids_mutex);
+ kfree(cgrp->tasks_pids);
+ cgrp->tasks_pids = pidarray;
+ cgrp->pids_length = npids;
+ cgrp->pids_use_count++;
+ up_write(&cgrp->pids_mutex);
- return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
+ file->f_op = &cgroup_tasks_operations;
-static int cgroup_tasks_release(struct inode *unused_inode,
- struct file *file)
-{
- struct ctr_struct *ctr;
-
- if (file->f_mode & FMODE_READ) {
- ctr = file->private_data;
- kfree(ctr->buf);
- kfree(ctr);
+ retval = seq_open(file, &cgroup_tasks_seq_operations);
+ if (retval) {
+ release_cgroup_pid_array(cgrp);
+ return retval;
}
+ ((struct seq_file *)file->private_data)->private = cgrp;
return 0;
}
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
{
.name = "tasks",
.open = cgroup_tasks_open,
- .read = cgroup_tasks_read,
.write_u64 = cgroup_tasks_write,
.release = cgroup_tasks_release,
.private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
mutex_lock(&cgroup_mutex);
- INIT_LIST_HEAD(&cgrp->sibling);
- INIT_LIST_HEAD(&cgrp->children);
- INIT_LIST_HEAD(&cgrp->css_sets);
- INIT_LIST_HEAD(&cgrp->release_list);
+ init_cgroup_housekeeping(cgrp);
cgrp->parent = parent;
cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
int __init cgroup_init_early(void)
{
int i;
- kref_init(&init_css_set.ref);
- kref_get(&init_css_set.ref);
+ atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cg_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3aba4c02..daca6209202d 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
u64 count;
rcu_read_lock();
- count = atomic_read(&current->cgroups->ref.refcount);
+ count = atomic_read(&current->cgroups->refcount);
rcu_read_unlock();
return count;
}
@@ -90,7 +90,7 @@ static struct cftype files[] = {
{
.name = "releasable",
.read_u64 = releasable_read,
- }
+ },
};
static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 000000000000..e95056954498
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,379 @@
+/*
+ * cgroup_freezer.c - control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+
+enum freezer_state {
+ CGROUP_THAWED = 0,
+ CGROUP_FREEZING,
+ CGROUP_FROZEN,
+};
+
+struct freezer {
+ struct cgroup_subsys_state css;
+ enum freezer_state state;
+ spinlock_t lock; /* protects _writes_ to state */
+};
+
+static inline struct freezer *cgroup_freezer(
+ struct cgroup *cgroup)
+{
+ return container_of(
+ cgroup_subsys_state(cgroup, freezer_subsys_id),
+ struct freezer, css);
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+ return container_of(task_subsys_state(task, freezer_subsys_id),
+ struct freezer, css);
+}
+
+int cgroup_frozen(struct task_struct *task)
+{
+ struct freezer *freezer;
+ enum freezer_state state;
+
+ task_lock(task);
+ freezer = task_freezer(task);
+ state = freezer->state;
+ task_unlock(task);
+
+ return state == CGROUP_FROZEN;
+}
+
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+ "THAWED",
+ "FREEZING",
+ "FROZEN",
+};
+
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ * ^ ^ | |
+ * | \_______THAWED_______/ |
+ * \__________________________THAWED____________/
+ */
+
+struct cgroup_subsys freezer_subsys;
+
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ * sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ * freezer->lock
+ * read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ * freezer->lock
+ * read_lock css_set_lock (cgroup iterator start)
+ * sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ * freezer->lock
+ * read_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock (to prevent races with freeze_task())
+ * sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+ struct cgroup *cgroup)
+{
+ struct freezer *freezer;
+
+ freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+ if (!freezer)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&freezer->lock);
+ freezer->state = CGROUP_THAWED;
+ return &freezer->css;
+}
+
+static void freezer_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cgroup)
+{
+ kfree(cgroup_freezer(cgroup));
+}
+
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+ return frozen(task) ||
+ (task_is_stopped_or_traced(task) && freezing(task));
+}
+
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
+static int freezer_can_attach(struct cgroup_subsys *ss,
+ struct cgroup *new_cgroup,
+ struct task_struct *task)
+{
+ struct freezer *freezer;
+ int retval;
+
+ /* Anything frozen can't move or be moved to/from */
+
+ if (is_task_frozen_enough(task))
+ return -EBUSY;
+
+ freezer = cgroup_freezer(new_cgroup);
+ if (freezer->state == CGROUP_FROZEN)
+ return -EBUSY;
+
+ retval = 0;
+ task_lock(task);
+ freezer = task_freezer(task);
+ if (freezer->state == CGROUP_FROZEN)
+ retval = -EBUSY;
+ task_unlock(task);
+ return retval;
+}
+
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+ struct freezer *freezer;
+
+ task_lock(task);
+ freezer = task_freezer(task);
+ task_unlock(task);
+
+ BUG_ON(freezer->state == CGROUP_FROZEN);
+ spin_lock_irq(&freezer->lock);
+ /* Locking avoids race with FREEZING -> THAWED transitions. */
+ if (freezer->state == CGROUP_FREEZING)
+ freeze_task(task, true);
+ spin_unlock_irq(&freezer->lock);
+}
+
+/*
+ * caller must hold freezer->lock
+ */
+static void update_freezer_state(struct cgroup *cgroup,
+ struct freezer *freezer)
+{
+ struct cgroup_iter it;
+ struct task_struct *task;
+ unsigned int nfrozen = 0, ntotal = 0;
+
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ ntotal++;
+ if (is_task_frozen_enough(task))
+ nfrozen++;
+ }
+
+ /*
+ * Transition to FROZEN when no new tasks can be added ensures
+ * that we never exist in the FROZEN state while there are unfrozen
+ * tasks.
+ */
+ if (nfrozen == ntotal)
+ freezer->state = CGROUP_FROZEN;
+ else if (nfrozen > 0)
+ freezer->state = CGROUP_FREEZING;
+ else
+ freezer->state = CGROUP_THAWED;
+ cgroup_iter_end(cgroup, &it);
+}
+
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+ struct seq_file *m)
+{
+ struct freezer *freezer;
+ enum freezer_state state;
+
+ if (!cgroup_lock_live_group(cgroup))
+ return -ENODEV;
+
+ freezer = cgroup_freezer(cgroup);
+ spin_lock_irq(&freezer->lock);
+ state = freezer->state;
+ if (state == CGROUP_FREEZING) {
+ /* We change from FREEZING to FROZEN lazily if the cgroup was
+ * only partially frozen when we exitted write. */
+ update_freezer_state(cgroup, freezer);
+ state = freezer->state;
+ }
+ spin_unlock_irq(&freezer->lock);
+ cgroup_unlock();
+
+ seq_puts(m, freezer_state_strs[state]);
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+ struct cgroup_iter it;
+ struct task_struct *task;
+ unsigned int num_cant_freeze_now = 0;
+
+ freezer->state = CGROUP_FREEZING;
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ if (!freeze_task(task, true))
+ continue;
+ if (is_task_frozen_enough(task))
+ continue;
+ if (!freezing(task) && !freezer_should_skip(task))
+ num_cant_freeze_now++;
+ }
+ cgroup_iter_end(cgroup, &it);
+
+ return num_cant_freeze_now ? -EBUSY : 0;
+}
+
+static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+ struct cgroup_iter it;
+ struct task_struct *task;
+
+ cgroup_iter_start(cgroup, &it);
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ int do_wake;
+
+ task_lock(task);
+ do_wake = __thaw_process(task);
+ task_unlock(task);
+ if (do_wake)
+ wake_up_process(task);
+ }
+ cgroup_iter_end(cgroup, &it);
+ freezer->state = CGROUP_THAWED;
+
+ return 0;
+}
+
+static int freezer_change_state(struct cgroup *cgroup,
+ enum freezer_state goal_state)
+{
+ struct freezer *freezer;
+ int retval = 0;
+
+ freezer = cgroup_freezer(cgroup);
+ spin_lock_irq(&freezer->lock);
+ update_freezer_state(cgroup, freezer);
+ if (goal_state == freezer->state)
+ goto out;
+ switch (freezer->state) {
+ case CGROUP_THAWED:
+ retval = try_to_freeze_cgroup(cgroup, freezer);
+ break;
+ case CGROUP_FREEZING:
+ if (goal_state == CGROUP_FROZEN) {
+ /* Userspace is retrying after
+ * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
+ retval = try_to_freeze_cgroup(cgroup, freezer);
+ break;
+ }
+ /* state == FREEZING and goal_state == THAWED, so unfreeze */
+ case CGROUP_FROZEN:
+ retval = unfreeze_cgroup(cgroup, freezer);
+ break;
+ default:
+ break;
+ }
+out:
+ spin_unlock_irq(&freezer->lock);
+
+ return retval;
+}
+
+static int freezer_write(struct cgroup *cgroup,
+ struct cftype *cft,
+ const char *buffer)
+{
+ int retval;
+ enum freezer_state goal_state;
+
+ if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+ goal_state = CGROUP_THAWED;
+ else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+ goal_state = CGROUP_FROZEN;
+ else
+ return -EIO;
+
+ if (!cgroup_lock_live_group(cgroup))
+ return -ENODEV;
+ retval = freezer_change_state(cgroup, goal_state);
+ cgroup_unlock();
+ return retval;
+}
+
+static struct cftype files[] = {
+ {
+ .name = "state",
+ .read_seq_string = freezer_read,
+ .write_string = freezer_write,
+ },
+};
+
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+ return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys freezer_subsys = {
+ .name = "freezer",
+ .create = freezer_create,
+ .destroy = freezer_destroy,
+ .populate = freezer_populate,
+ .subsys_id = freezer_subsys_id,
+ .can_attach = freezer_can_attach,
+ .attach = NULL,
+ .fork = freezer_fork,
+ .exit = NULL,
+};
diff --git a/kernel/configs.c b/kernel/configs.c
index 4c345210ed8c..abaee684ecbf 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -54,9 +54,6 @@
#ifdef CONFIG_IKCONFIG_PROC
-/**************************************************/
-/* globals and useful constants */
-
static ssize_t
ikconfig_read_current(struct file *file, char __user *buf,
size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
.read = ikconfig_read_current,
};
-/***************************************************/
-/* ikconfig_init: start up everything we need to */
-
static int __init ikconfig_init(void)
{
struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
return 0;
}
-/***************************************************/
-/* ikconfig_cleanup: clean up our mess */
-
static void __exit ikconfig_cleanup(void)
{
remove_proc_entry("config.gz", NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index eab7bd6628e0..3e00526f52ec 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
{
struct cpuset trialcs;
int err;
- int cpus_nonempty, balance_flag_changed;
+ int balance_flag_changed;
trialcs = *cs;
if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
if (err < 0)
return err;
- cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
balance_flag_changed = (is_sched_load_balance(cs) !=
is_sched_load_balance(&trialcs));
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
cs->flags = trialcs.flags;
mutex_unlock(&callback_mutex);
- if (cpus_nonempty && balance_flag_changed)
+ if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
return 0;
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t");
- m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
- task->cpus_allowed);
+ seq_cpumask(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Cpus_allowed_list:\t");
- m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
- task->cpus_allowed);
+ seq_cpumask_list(m, &task->cpus_allowed);
seq_printf(m, "\n");
seq_printf(m, "Mems_allowed:\t");
- m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
- task->mems_allowed);
+ seq_nodemask(m, &task->mems_allowed);
seq_printf(m, "\n");
seq_printf(m, "Mems_allowed_list:\t");
- m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
- task->mems_allowed);
+ seq_nodemask_list(m, &task->mems_allowed);
seq_printf(m, "\n");
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644
index 000000000000..ba6248b323ef
--- /dev/null
+++ b/kernel/freezer.c
@@ -0,0 +1,154 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+ if (!unlikely(current->flags & PF_NOFREEZE)) {
+ current->flags |= PF_FROZEN;
+ wmb();
+ }
+ clear_freeze_flag(current);
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+ /* Hmm, should we be allowed to suspend when there are realtime
+ processes around? */
+ long save;
+
+ task_lock(current);
+ if (freezing(current)) {
+ frozen_process();
+ task_unlock(current);
+ } else {
+ task_unlock(current);
+ return;
+ }
+ save = current->state;
+ pr_debug("%s entered refrigerator\n", current->comm);
+
+ spin_lock_irq(&current->sighand->siglock);
+ recalc_sigpending(); /* We sent fake signal, clean it up */
+ spin_unlock_irq(&current->sighand->siglock);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!frozen(current))
+ break;
+ schedule();
+ }
+ pr_debug("%s left refrigerator\n", current->comm);
+ __set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+
+static void fake_signal_wake_up(struct task_struct *p)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ signal_wake_up(p, 0);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+
+/**
+ * freeze_task - send a freeze request to given task
+ * @p: task to send the request to
+ * @sig_only: if set, the request will only be sent if the task has the
+ * PF_FREEZER_NOSIG flag unset
+ * Return value: 'false', if @sig_only is set and the task has
+ * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ * either sending a fake signal to it or waking it up, depending on whether
+ * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
+ * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ * TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+ /*
+ * We first check if the task is freezing and next if it has already
+ * been frozen to avoid the race with frozen_process() which first marks
+ * the task as frozen and next clears its TIF_FREEZE.
+ */
+ if (!freezing(p)) {
+ rmb();
+ if (frozen(p))
+ return false;
+
+ if (!sig_only || should_send_signal(p))
+ set_freeze_flag(p);
+ else
+ return false;
+ }
+
+ if (should_send_signal(p)) {
+ if (!signal_pending(p))
+ fake_signal_wake_up(p);
+ } else if (sig_only) {
+ return false;
+ } else {
+ wake_up_state(p, TASK_INTERRUPTIBLE);
+ }
+
+ return true;
+}
+
+void cancel_freezing(struct task_struct *p)
+{
+ unsigned long flags;
+
+ if (freezing(p)) {
+ pr_debug(" clean up: %s\n", p->comm);
+ clear_freeze_flag(p);
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ recalc_sigpending_and_wake(p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ }
+}
+
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails. Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int __thaw_process(struct task_struct *p)
+{
+ if (frozen(p)) {
+ p->flags &= ~PF_FROZEN;
+ return 1;
+ }
+ clear_freeze_flag(p);
+ return 0;
+}
+
+int thaw_process(struct task_struct *p)
+{
+ task_lock(p);
+ if (__thaw_process(p) == 1) {
+ task_unlock(p);
+ wake_up_process(p);
+ return 1;
+ }
+ task_unlock(p);
+ return 0;
+}
+EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index aef265325cd3..777ac458ac99 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1371,6 +1371,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(node_online_map);
VMCOREINFO_SYMBOL(swapper_pg_dir);
VMCOREINFO_SYMBOL(_stext);
+ VMCOREINFO_SYMBOL(vmlist);
#ifndef CONFIG_NEED_MULTIPLE_NODES
VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1407,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_OFFSET(free_area, free_list);
VMCOREINFO_OFFSET(list_head, next);
VMCOREINFO_OFFSET(list_head, prev);
+ VMCOREINFO_OFFSET(vm_struct, addr);
VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
VMCOREINFO_NUMBER(NR_FREE_PAGES);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710b..14ec64fe175a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -171,12 +171,11 @@ EXPORT_SYMBOL(kthread_create);
*/
void kthread_bind(struct task_struct *k, unsigned int cpu)
{
- if (k->state != TASK_UNINTERRUPTIBLE) {
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
WARN_ON(1);
return;
}
- /* Must have done schedule() in kthread() before we set_task_cpu */
- wait_task_inactive(k, 0);
set_task_cpu(k, cpu);
k->cpus_allowed = cpumask_of_cpu(cpu);
k->rt.nr_cpus_allowed = 1;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 278946aecaf0..ca634019497a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
return 1;
}
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
- if (!unlikely(current->flags & PF_NOFREEZE)) {
- current->flags |= PF_FROZEN;
- wmb();
- }
- clear_freeze_flag(current);
-}
-
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
- /* Hmm, should we be allowed to suspend when there are realtime
- processes around? */
- long save;
-
- task_lock(current);
- if (freezing(current)) {
- frozen_process();
- task_unlock(current);
- } else {
- task_unlock(current);
- return;
- }
- save = current->state;
- pr_debug("%s entered refrigerator\n", current->comm);
-
- spin_lock_irq(&current->sighand->siglock);
- recalc_sigpending(); /* We sent fake signal, clean it up */
- spin_unlock_irq(&current->sighand->siglock);
-
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!frozen(current))
- break;
- schedule();
- }
- pr_debug("%s left refrigerator\n", current->comm);
- __set_current_state(save);
-}
-
-static void fake_signal_wake_up(struct task_struct *p)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&p->sighand->siglock, flags);
- signal_wake_up(p, 0);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
-}
-
-static inline bool should_send_signal(struct task_struct *p)
-{
- return !(p->flags & PF_FREEZER_NOSIG);
-}
-
-/**
- * freeze_task - send a freeze request to given task
- * @p: task to send the request to
- * @sig_only: if set, the request will only be sent if the task has the
- * PF_FREEZER_NOSIG flag unset
- * Return value: 'false', if @sig_only is set and the task has
- * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
- *
- * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- * either sending a fake signal to it or waking it up, depending on whether
- * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
- * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- * TIF_FREEZE flag will not be set.
- */
-static bool freeze_task(struct task_struct *p, bool sig_only)
-{
- /*
- * We first check if the task is freezing and next if it has already
- * been frozen to avoid the race with frozen_process() which first marks
- * the task as frozen and next clears its TIF_FREEZE.
- */
- if (!freezing(p)) {
- rmb();
- if (frozen(p))
- return false;
-
- if (!sig_only || should_send_signal(p))
- set_freeze_flag(p);
- else
- return false;
- }
-
- if (should_send_signal(p)) {
- if (!signal_pending(p))
- fake_signal_wake_up(p);
- } else if (sig_only) {
- return false;
- } else {
- wake_up_state(p, TASK_INTERRUPTIBLE);
- }
-
- return true;
-}
-
-static void cancel_freezing(struct task_struct *p)
-{
- unsigned long flags;
-
- if (freezing(p)) {
- pr_debug(" clean up: %s\n", p->comm);
- clear_freeze_flag(p);
- spin_lock_irqsave(&p->sighand->siglock, flags);
- recalc_sigpending_and_wake(p);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- }
-}
-
static int try_to_freeze_tasks(bool sig_only)
{
struct task_struct *g, *p;
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
if (nosig_only && should_send_signal(p))
continue;
+ if (cgroup_frozen(p))
+ continue;
+
thaw_process(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
@@ -264,4 +152,3 @@ void thaw_processes(void)
printk("done.\n");
}
-EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 356699a96d56..1e68e4c39e2c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
* TASK_TRACED, resume it now.
* Requires that irqs be disabled.
*/
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
{
spin_lock(&child->sighand->siglock);
if (task_is_traced(child)) {
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe04aa4..59236e8b9daa 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -54,9 +54,9 @@
#include <linux/cpu.h>
#include <linux/random.h>
#include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
#include <linux/cpumask.h>
#include <linux/rcupreempt_trace.h>
+#include <asm/byteorder.h>
/*
* PREEMPT_RCU data structures.
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 90b5b123f7a1..85cb90588a55 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -42,10 +42,10 @@
#include <linux/freezer.h>
#include <linux/cpu.h>
#include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
#include <linux/stat.h>
#include <linux/srcu.h>
#include <linux/slab.h>
+#include <asm/byteorder.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a0..b3cc73931d1f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "scan_unevictable_pages",
+ .data = &scan_unevictable_pages,
+ .maxlen = sizeof(scan_unevictable_pages),
+ .mode = 0644,
+ .proc_handler = &scan_unevictable_handler,
+ },
+#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt