diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 193 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 8 | ||||
-rw-r--r-- | kernel/cpuset.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 16 | ||||
-rw-r--r-- | kernel/irq/pm.c | 6 | ||||
-rw-r--r-- | kernel/panic.c | 5 | ||||
-rw-r--r-- | kernel/power/Kconfig | 67 | ||||
-rw-r--r-- | kernel/power/Makefile | 5 | ||||
-rw-r--r-- | kernel/power/consoleearlysuspend.c | 78 | ||||
-rw-r--r-- | kernel/power/earlysuspend.c | 178 | ||||
-rw-r--r-- | kernel/power/fbearlysuspend.c | 153 | ||||
-rw-r--r-- | kernel/power/main.c | 19 | ||||
-rw-r--r-- | kernel/power/power.h | 24 | ||||
-rw-r--r-- | kernel/power/process.c | 27 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/power/userwakelock.c | 219 | ||||
-rw-r--r-- | kernel/power/wakelock.c | 607 | ||||
-rw-r--r-- | kernel/printk.c | 54 | ||||
-rw-r--r-- | kernel/sched.c | 91 | ||||
-rw-r--r-- | kernel/sched_fair.c | 6 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
21 files changed, 1675 insertions, 99 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b24d7027b83c..d96bd1eb5627 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -57,6 +57,7 @@ #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ #include <linux/eventfd.h> #include <linux/poll.h> +#include <linux/capability.h> #include <asm/atomic.h> @@ -267,6 +268,33 @@ static void cgroup_release_agent(struct work_struct *work); static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); +/* + * A queue for waiters to do rmdir() cgroup. A tasks will sleep when + * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some + * reference to css->refcnt. In general, this refcnt is expected to goes down + * to zero, soon. + * + * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; + */ +DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); + +static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) +{ + if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) + wake_up_all(&cgroup_rmdir_waitq); +} + +void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) +{ + css_get(css); +} + +void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) +{ + cgroup_wakeup_rmdir_waiter(css->cgroup); + css_put(css); +} + /* Link structure for associating css_set objects with cgroups */ struct cg_cgroup_link { /* @@ -326,10 +354,35 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) return &css_set_table[index]; } +static void free_css_set_work(struct work_struct *work) +{ + struct css_set *cg = container_of(work, struct css_set, work); + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; + + write_lock(&css_set_lock); + list_for_each_entry_safe(link, saved_link, &cg->cg_links, + cg_link_list) { + struct cgroup *cgrp = link->cgrp; + list_del(&link->cg_link_list); + list_del(&link->cgrp_link_list); + if (atomic_dec_and_test(&cgrp->count)) { + check_for_release(cgrp); + cgroup_wakeup_rmdir_waiter(cgrp); + } + kfree(link); + } + write_unlock(&css_set_lock); + + kfree(cg); +} + static void free_css_set_rcu(struct rcu_head *obj) { struct css_set *cg = container_of(obj, struct css_set, rcu_head); - kfree(cg); + + INIT_WORK(&cg->work, free_css_set_work); + schedule_work(&cg->work); } /* We don't maintain the lists running through each css_set to its @@ -338,10 +391,16 @@ static void free_css_set_rcu(struct rcu_head *obj) * compiled into their kernel but not actually in use */ static int use_task_css_set_links __read_mostly; -static void __put_css_set(struct css_set *cg, int taskexit) +/* + * refcounted get/put for css_set objects + */ +static inline void get_css_set(struct css_set *cg) +{ + atomic_inc(&cg->refcount); +} + +static void put_css_set(struct css_set *cg) { - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an @@ -355,48 +414,14 @@ static void __put_css_set(struct css_set *cg, int taskexit) return; } - /* This css_set is dead. unlink it and release cgroup refcounts */ hlist_del(&cg->hlist); css_set_count--; - list_for_each_entry_safe(link, saved_link, &cg->cg_links, - cg_link_list) { - struct cgroup *cgrp = link->cgrp; - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); - if (atomic_dec_and_test(&cgrp->count) && - notify_on_release(cgrp)) { - if (taskexit) - set_bit(CGRP_RELEASABLE, &cgrp->flags); - check_for_release(cgrp); - } - - kfree(link); - } - write_unlock(&css_set_lock); call_rcu(&cg->rcu_head, free_css_set_rcu); } /* - * refcounted get/put for css_set objects - */ -static inline void get_css_set(struct css_set *cg) -{ - atomic_inc(&cg->refcount); -} - -static inline void put_css_set(struct css_set *cg) -{ - __put_css_set(cg, 0); -} - -static inline void put_css_set_taskexit(struct css_set *cg) -{ - __put_css_set(cg, 1); -} - -/* * compare_css_sets - helper function for find_existing_css_set(). * @cg: candidate css_set being tested * @old_cg: existing css_set for a task @@ -725,9 +750,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * cgroup_attach_task(), which overwrites one tasks cgroup pointer with * another. It does so using cgroup_mutex, however there are * several performance critical places that need to reference - * task->cgroup without the expense of grabbing a system global + * task->cgroups without the expense of grabbing a system global * mutex. Therefore except as noted below, when dereferencing or, as - * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use + * in cgroup_attach_task(), modifying a task's cgroups pointer we use * task_lock(), which acts on a spinlock (task->alloc_lock) already in * the task_struct routinely used for such matters. * @@ -924,33 +949,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry) } /* - * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some - * reference to css->refcnt. In general, this refcnt is expected to goes down - * to zero, soon. - * - * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; - */ -DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); - -static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) -{ - if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) - wake_up_all(&cgroup_rmdir_waitq); -} - -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) -{ - css_get(css); -} - -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) -{ - cgroup_wakeup_rmdir_waiter(css->cgroup); - css_put(css); -} - -/* * Call with cgroup_mutex held. Drops reference counts on modules, including * any duplicate ones that parse_cgroupfs_options took. If this function * returns an error, no reference counts are touched. @@ -1783,6 +1781,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) failed_ss = ss; goto out; } + } else if (!capable(CAP_SYS_ADMIN)) { + const struct cred *cred = current_cred(), *tcred; + + /* No can_attach() - check perms generically */ + tcred = __task_cred(tsk); + if (cred->euid != tcred->uid && + cred->euid != tcred->suid) { + return -EACCES; + } } } @@ -1823,8 +1830,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) if (ss->attach) ss->attach(ss, cgrp, oldcgrp, tsk, false); } - set_bit(CGRP_RELEASABLE, &oldcgrp->flags); - synchronize_rcu(); + set_bit(CGRP_RELEASABLE, &cgrp->flags); + /* put_css_set will not destroy cg until after an RCU grace period */ put_css_set(cg); /* @@ -1881,7 +1888,6 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all); static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { struct task_struct *tsk; - const struct cred *cred = current_cred(), *tcred; int ret; if (pid) { @@ -1891,14 +1897,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); return -ESRCH; } - - tcred = __task_cred(tsk); - if (cred->euid && - cred->euid != tcred->uid && - cred->euid != tcred->suid) { - rcu_read_unlock(); - return -EACCES; - } get_task_struct(tsk); rcu_read_unlock(); } else { @@ -3452,6 +3450,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (err < 0) goto err_remove; + set_bit(CGRP_RELEASABLE, &parent->flags); + /* The cgroup directory was pre-locked for us */ BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); @@ -3583,6 +3583,21 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } +/* checks if all of the css_sets attached to a cgroup have a refcount of 0. + * Must be called with css_set_lock held */ +static int cgroup_css_sets_empty(struct cgroup *cgrp) +{ + struct cg_cgroup_link *link; + + list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { + struct css_set *cg = link->cg; + if (atomic_read(&cg->refcount) > 0) + return 0; + } + + return 1; +} + static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) { struct cgroup *cgrp = dentry->d_fsdata; @@ -3595,7 +3610,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) /* the vfs holds both inode->i_mutex already */ again: mutex_lock(&cgroup_mutex); - if (atomic_read(&cgrp->count) != 0) { + if (!cgroup_css_sets_empty(cgrp)) { mutex_unlock(&cgroup_mutex); return -EBUSY; } @@ -3628,7 +3643,7 @@ again: mutex_lock(&cgroup_mutex); parent = cgrp->parent; - if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { + if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) { clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); mutex_unlock(&cgroup_mutex); return -EBUSY; @@ -3668,7 +3683,6 @@ again: cgroup_d_remove_dir(d); dput(d); - set_bit(CGRP_RELEASABLE, &parent->flags); check_for_release(parent); /* @@ -4253,7 +4267,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) if (!list_empty(&tsk->cg_list)) { write_lock(&css_set_lock); if (!list_empty(&tsk->cg_list)) - list_del(&tsk->cg_list); + list_del_init(&tsk->cg_list); write_unlock(&css_set_lock); } @@ -4263,7 +4277,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) tsk->cgroups = &init_css_set; task_unlock(tsk); if (cg) - put_css_set_taskexit(cg); + put_css_set(cg); } /** @@ -4433,6 +4447,14 @@ static void check_for_release(struct cgroup *cgrp) } /* Caller must verify that the css is not for root cgroup */ +void __css_get(struct cgroup_subsys_state *css, int count) +{ + atomic_add(count, &css->refcnt); + set_bit(CGRP_RELEASABLE, &css->cgroup->flags); +} +EXPORT_SYMBOL_GPL(__css_get); + +/* Caller must verify that the css is not for root cgroup */ void __css_put(struct cgroup_subsys_state *css, int count) { struct cgroup *cgrp = css->cgroup; @@ -4440,10 +4462,7 @@ void __css_put(struct cgroup_subsys_state *css, int count) rcu_read_lock(); val = atomic_sub_return(count, &css->refcnt); if (val == 1) { - if (notify_on_release(cgrp)) { - set_bit(CGRP_RELEASABLE, &cgrp->flags); - check_for_release(cgrp); - } + check_for_release(cgrp); cgroup_wakeup_rmdir_waiter(cgrp); } rcu_read_unlock(); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e7bebb7c6c38..12a4881dffae 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -164,6 +164,14 @@ static int freezer_can_attach(struct cgroup_subsys *ss, { struct freezer *freezer; + if ((current != task) && (!capable(CAP_SYS_ADMIN))) { + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(task); + if (cred->euid != tcred->uid && cred->euid != tcred->suid) + return -EPERM; + } + /* * Anything frozen can't move or be moved to/from. */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e92e98189032..e210b3d30085 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1382,6 +1382,13 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, int ret; struct cpuset *cs = cgroup_cs(cont); + if ((current != task) && (!capable(CAP_SYS_ADMIN))) { + const struct cred *cred = current_cred(), *tcred; + + if (cred->euid != tcred->uid && cred->euid != tcred->suid) + return -EPERM; + } + if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) return -ENOSPC; diff --git a/kernel/fork.c b/kernel/fork.c index 25e429152ddc..515abda40843 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -149,6 +149,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +/* Notifier list called when a task struct is freed */ +static ATOMIC_NOTIFIER_HEAD(task_free_notifier); + static void account_kernel_stack(struct thread_info *ti, int account) { struct zone *zone = page_zone(virt_to_page(ti)); @@ -180,6 +183,18 @@ static inline void put_signal_struct(struct signal_struct *sig) free_signal_struct(sig); } +int task_free_register(struct notifier_block *n) +{ + return atomic_notifier_chain_register(&task_free_notifier, n); +} +EXPORT_SYMBOL(task_free_register); + +int task_free_unregister(struct notifier_block *n) +{ + return atomic_notifier_chain_unregister(&task_free_notifier, n); +} +EXPORT_SYMBOL(task_free_unregister); + void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); @@ -190,6 +205,7 @@ void __put_task_struct(struct task_struct *tsk) delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); + atomic_notifier_call_chain(&task_free_notifier, 0, tsk); if (!profile_handoff_task(tsk)) free_task(tsk); } diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 0d4005d85b03..1df62ef4713b 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -72,8 +72,12 @@ int check_wakeup_irqs(void) int irq; for_each_irq_desc(irq, desc) - if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING)) + if ((desc->status & IRQ_WAKEUP) && + (desc->status & IRQ_PENDING)) { + pr_info("Wakeup IRQ %d %s pending, suspend aborted\n", + irq, desc->name ? desc->name : ""); return -EBUSY; + } return 0; } diff --git a/kernel/panic.c b/kernel/panic.c index 991bb87a1704..e510d28b3e63 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -33,7 +33,10 @@ static int pause_on_oops; static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); -int panic_timeout; +#ifndef CONFIG_PANIC_TIMEOUT +#define CONFIG_PANIC_TIMEOUT 0 +#endif +int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); ATOMIC_NOTIFIER_HEAD(panic_notifier_list); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 3367a428d40c..cd4fadac096e 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -145,6 +145,73 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. +config HAS_WAKELOCK + bool + +config HAS_EARLYSUSPEND + bool + +config WAKELOCK + bool "Wake lock" + depends on PM && RTC_CLASS + default n + select HAS_WAKELOCK + ---help--- + Enable wakelocks. When user space request a sleep state the + sleep request will be delayed until no wake locks are held. + +config WAKELOCK_STAT + bool "Wake lock stats" + depends on WAKELOCK + default y + ---help--- + Report wake lock stats in /proc/wakelocks + +config USER_WAKELOCK + bool "Userspace wake locks" + depends on WAKELOCK + default y + ---help--- + User-space wake lock api. Write "lockname" or "lockname timeout" + to /sys/power/wake_lock lock and if needed create a wake lock. + Write "lockname" to /sys/power/wake_unlock to unlock a user wake + lock. + +config EARLYSUSPEND + bool "Early suspend" + depends on WAKELOCK + default y + select HAS_EARLYSUSPEND + ---help--- + Call early suspend handlers when the user requested sleep state + changes. + +choice + prompt "User-space screen access" + default FB_EARLYSUSPEND if !FRAMEBUFFER_CONSOLE + default CONSOLE_EARLYSUSPEND + depends on HAS_EARLYSUSPEND + + config NO_USER_SPACE_SCREEN_ACCESS_CONTROL + bool "None" + + config CONSOLE_EARLYSUSPEND + bool "Console switch on early-suspend" + depends on HAS_EARLYSUSPEND && VT + ---help--- + Register early suspend handler to perform a console switch to + when user-space should stop drawing to the screen and a switch + back when it should resume. + + config FB_EARLYSUSPEND + bool "Sysfs interface" + depends on HAS_EARLYSUSPEND + ---help--- + Register early suspend handler that notifies and waits for + user-space through sysfs when user-space should stop drawing + to the screen and notifies user-space when it should resume. +endchoice + config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c350e18b53e3..d128ccea8864 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,5 +7,10 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_WAKELOCK) += wakelock.o +obj-$(CONFIG_USER_WAKELOCK) += userwakelock.o +obj-$(CONFIG_EARLYSUSPEND) += earlysuspend.o +obj-$(CONFIG_CONSOLE_EARLYSUSPEND) += consoleearlysuspend.o +obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/consoleearlysuspend.c b/kernel/power/consoleearlysuspend.c new file mode 100644 index 000000000000..a3edcb267389 --- /dev/null +++ b/kernel/power/consoleearlysuspend.c @@ -0,0 +1,78 @@ +/* kernel/power/consoleearlysuspend.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/console.h> +#include <linux/earlysuspend.h> +#include <linux/kbd_kern.h> +#include <linux/module.h> +#include <linux/vt_kern.h> +#include <linux/wait.h> + +#define EARLY_SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) + +static int orig_fgconsole; +static void console_early_suspend(struct early_suspend *h) +{ + acquire_console_sem(); + orig_fgconsole = fg_console; + if (vc_allocate(EARLY_SUSPEND_CONSOLE)) + goto err; + if (set_console(EARLY_SUSPEND_CONSOLE)) + goto err; + release_console_sem(); + + if (vt_waitactive(EARLY_SUSPEND_CONSOLE + 1)) + pr_warning("console_early_suspend: Can't switch VCs.\n"); + return; +err: + pr_warning("console_early_suspend: Can't set console\n"); + release_console_sem(); +} + +static void console_late_resume(struct early_suspend *h) +{ + int ret; + acquire_console_sem(); + ret = set_console(orig_fgconsole); + release_console_sem(); + if (ret) { + pr_warning("console_late_resume: Can't set console.\n"); + return; + } + + if (vt_waitactive(orig_fgconsole + 1)) + pr_warning("console_late_resume: Can't switch VCs.\n"); +} + +static struct early_suspend console_early_suspend_desc = { + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = console_early_suspend, + .resume = console_late_resume, +}; + +static int __init console_early_suspend_init(void) +{ + register_early_suspend(&console_early_suspend_desc); + return 0; +} + +static void __exit console_early_suspend_exit(void) +{ + unregister_early_suspend(&console_early_suspend_desc); +} + +module_init(console_early_suspend_init); +module_exit(console_early_suspend_exit); + diff --git a/kernel/power/earlysuspend.c b/kernel/power/earlysuspend.c new file mode 100644 index 000000000000..84bed51dcdce --- /dev/null +++ b/kernel/power/earlysuspend.c @@ -0,0 +1,178 @@ +/* kernel/power/earlysuspend.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/earlysuspend.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/rtc.h> +#include <linux/syscalls.h> /* sys_sync */ +#include <linux/wakelock.h> +#include <linux/workqueue.h> + +#include "power.h" + +enum { + DEBUG_USER_STATE = 1U << 0, + DEBUG_SUSPEND = 1U << 2, +}; +static int debug_mask = DEBUG_USER_STATE; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +static DEFINE_MUTEX(early_suspend_lock); +static LIST_HEAD(early_suspend_handlers); +static void early_suspend(struct work_struct *work); +static void late_resume(struct work_struct *work); +static DECLARE_WORK(early_suspend_work, early_suspend); +static DECLARE_WORK(late_resume_work, late_resume); +static DEFINE_SPINLOCK(state_lock); +enum { + SUSPEND_REQUESTED = 0x1, + SUSPENDED = 0x2, + SUSPEND_REQUESTED_AND_SUSPENDED = SUSPEND_REQUESTED | SUSPENDED, +}; +static int state; + +void register_early_suspend(struct early_suspend *handler) +{ + struct list_head *pos; + + mutex_lock(&early_suspend_lock); + list_for_each(pos, &early_suspend_handlers) { + struct early_suspend *e; + e = list_entry(pos, struct early_suspend, link); + if (e->level > handler->level) + break; + } + list_add_tail(&handler->link, pos); + if ((state & SUSPENDED) && handler->suspend) + handler->suspend(handler); + mutex_unlock(&early_suspend_lock); +} +EXPORT_SYMBOL(register_early_suspend); + +void unregister_early_suspend(struct early_suspend *handler) +{ + mutex_lock(&early_suspend_lock); + list_del(&handler->link); + mutex_unlock(&early_suspend_lock); +} +EXPORT_SYMBOL(unregister_early_suspend); + +static void early_suspend(struct work_struct *work) +{ + struct early_suspend *pos; + unsigned long irqflags; + int abort = 0; + + mutex_lock(&early_suspend_lock); + spin_lock_irqsave(&state_lock, irqflags); + if (state == SUSPEND_REQUESTED) + state |= SUSPENDED; + else + abort = 1; + spin_unlock_irqrestore(&state_lock, irqflags); + + if (abort) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("early_suspend: abort, state %d\n", state); + mutex_unlock(&early_suspend_lock); + goto abort; + } + + if (debug_mask & DEBUG_SUSPEND) + pr_info("early_suspend: call handlers\n"); + list_for_each_entry(pos, &early_suspend_handlers, link) { + if (pos->suspend != NULL) + pos->suspend(pos); + } + mutex_unlock(&early_suspend_lock); + + if (debug_mask & DEBUG_SUSPEND) + pr_info("early_suspend: sync\n"); + + sys_sync(); +abort: + spin_lock_irqsave(&state_lock, irqflags); + if (state == SUSPEND_REQUESTED_AND_SUSPENDED) + wake_unlock(&main_wake_lock); + spin_unlock_irqrestore(&state_lock, irqflags); +} + +static void late_resume(struct work_struct *work) +{ + struct early_suspend *pos; + unsigned long irqflags; + int abort = 0; + + mutex_lock(&early_suspend_lock); + spin_lock_irqsave(&state_lock, irqflags); + if (state == SUSPENDED) + state &= ~SUSPENDED; + else + abort = 1; + spin_unlock_irqrestore(&state_lock, irqflags); + + if (abort) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("late_resume: abort, state %d\n", state); + goto abort; + } + if (debug_mask & DEBUG_SUSPEND) + pr_info("late_resume: call handlers\n"); + list_for_each_entry_reverse(pos, &early_suspend_handlers, link) + if (pos->resume != NULL) + pos->resume(pos); + if (debug_mask & DEBUG_SUSPEND) + pr_info("late_resume: done\n"); +abort: + mutex_unlock(&early_suspend_lock); +} + +void request_suspend_state(suspend_state_t new_state) +{ + unsigned long irqflags; + int old_sleep; + + spin_lock_irqsave(&state_lock, irqflags); + old_sleep = state & SUSPEND_REQUESTED; + if (debug_mask & DEBUG_USER_STATE) { + struct timespec ts; + struct rtc_time tm; + getnstimeofday(&ts); + rtc_time_to_tm(ts.tv_sec, &tm); + pr_info("request_suspend_state: %s (%d->%d) at %lld " + "(%d-%02d-%02d %02d:%02d:%02d.%09lu UTC)\n", + new_state != PM_SUSPEND_ON ? "sleep" : "wakeup", + requested_suspend_state, new_state, + ktime_to_ns(ktime_get()), + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); + } + if (!old_sleep && new_state != PM_SUSPEND_ON) { + state |= SUSPEND_REQUESTED; + queue_work(suspend_work_queue, &early_suspend_work); + } else if (old_sleep && new_state == PM_SUSPEND_ON) { + state &= ~SUSPEND_REQUESTED; + wake_lock(&main_wake_lock); + queue_work(suspend_work_queue, &late_resume_work); + } + requested_suspend_state = new_state; + spin_unlock_irqrestore(&state_lock, irqflags); +} + +suspend_state_t get_suspend_state(void) +{ + return requested_suspend_state; +} diff --git a/kernel/power/fbearlysuspend.c b/kernel/power/fbearlysuspend.c new file mode 100644 index 000000000000..15137650149c --- /dev/null +++ b/kernel/power/fbearlysuspend.c @@ -0,0 +1,153 @@ +/* kernel/power/fbearlysuspend.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/earlysuspend.h> +#include <linux/module.h> +#include <linux/wait.h> + +#include "power.h" + +static wait_queue_head_t fb_state_wq; +static DEFINE_SPINLOCK(fb_state_lock); +static enum { + FB_STATE_STOPPED_DRAWING, + FB_STATE_REQUEST_STOP_DRAWING, + FB_STATE_DRAWING_OK, +} fb_state; + +/* tell userspace to stop drawing, wait for it to stop */ +static void stop_drawing_early_suspend(struct early_suspend *h) +{ + int ret; + unsigned long irq_flags; + + spin_lock_irqsave(&fb_state_lock, irq_flags); + fb_state = FB_STATE_REQUEST_STOP_DRAWING; + spin_unlock_irqrestore(&fb_state_lock, irq_flags); + + wake_up_all(&fb_state_wq); + ret = wait_event_timeout(fb_state_wq, + fb_state == FB_STATE_STOPPED_DRAWING, + HZ); + if (unlikely(fb_state != FB_STATE_STOPPED_DRAWING)) + pr_warning("stop_drawing_early_suspend: timeout waiting for " + "userspace to stop drawing\n"); +} + +/* tell userspace to start drawing */ +static void start_drawing_late_resume(struct early_suspend *h) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&fb_state_lock, irq_flags); + fb_state = FB_STATE_DRAWING_OK; + spin_unlock_irqrestore(&fb_state_lock, irq_flags); + wake_up(&fb_state_wq); +} + +static struct early_suspend stop_drawing_early_suspend_desc = { + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = stop_drawing_early_suspend, + .resume = start_drawing_late_resume, +}; + +static ssize_t wait_for_fb_sleep_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + int ret; + + ret = wait_event_interruptible(fb_state_wq, + fb_state != FB_STATE_DRAWING_OK); + if (ret && fb_state == FB_STATE_DRAWING_OK) + return ret; + else + s += sprintf(buf, "sleeping"); + return s - buf; +} + +static ssize_t wait_for_fb_wake_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + int ret; + unsigned long irq_flags; + + spin_lock_irqsave(&fb_state_lock, irq_flags); + if (fb_state == FB_STATE_REQUEST_STOP_DRAWING) { + fb_state = FB_STATE_STOPPED_DRAWING; + wake_up(&fb_state_wq); + } + spin_unlock_irqrestore(&fb_state_lock, irq_flags); + + ret = wait_event_interruptible(fb_state_wq, + fb_state == FB_STATE_DRAWING_OK); + if (ret && fb_state != FB_STATE_DRAWING_OK) + return ret; + else + s += sprintf(buf, "awake"); + + return s - buf; +} + +#define power_ro_attr(_name) \ +static struct kobj_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0444, \ + }, \ + .show = _name##_show, \ + .store = NULL, \ +} + +power_ro_attr(wait_for_fb_sleep); +power_ro_attr(wait_for_fb_wake); + +static struct attribute *g[] = { + &wait_for_fb_sleep_attr.attr, + &wait_for_fb_wake_attr.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = g, +}; + +static int __init android_power_init(void) +{ + int ret; + + init_waitqueue_head(&fb_state_wq); + fb_state = FB_STATE_DRAWING_OK; + + ret = sysfs_create_group(power_kobj, &attr_group); + if (ret) { + pr_err("android_power_init: sysfs_create_group failed\n"); + return ret; + } + + register_early_suspend(&stop_drawing_early_suspend_desc); + return 0; +} + +static void __exit android_power_exit(void) +{ + unregister_early_suspend(&stop_drawing_early_suspend_desc); + sysfs_remove_group(power_kobj, &attr_group); +} + +module_init(android_power_init); +module_exit(android_power_exit); + diff --git a/kernel/power/main.c b/kernel/power/main.c index ca4808780e29..2c5fa0cf9b03 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -173,7 +173,11 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { #ifdef CONFIG_SUSPEND +#ifdef CONFIG_EARLYSUSPEND + suspend_state_t state = PM_SUSPEND_ON; +#else suspend_state_t state = PM_SUSPEND_STANDBY; +#endif const char * const *s; #endif char *p; @@ -195,8 +199,15 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, break; } if (state < PM_SUSPEND_MAX && *s) +#ifdef CONFIG_EARLYSUSPEND + if (state == PM_SUSPEND_ON || valid_state(state)) { + error = 0; + request_suspend_state(state); + } +#else error = enter_state(state); #endif +#endif Exit: return error ? error : n; @@ -300,6 +311,10 @@ power_attr(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ +#ifdef CONFIG_USER_WAKELOCK +power_attr(wake_lock); +power_attr(wake_unlock); +#endif #ifdef CONFIG_SUSPEND_DEVICE_TIME_DEBUG /* * threshold of device suspend time consumption in microsecond(0.5ms), the @@ -348,6 +363,10 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif +#ifdef CONFIG_USER_WAKELOCK + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif #endif NULL, }; diff --git a/kernel/power/power.h b/kernel/power/power.h index 03634be55f62..3237ad456e77 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -241,3 +241,27 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_WAKELOCK +/* kernel/power/wakelock.c */ +extern struct workqueue_struct *suspend_work_queue; +extern struct wake_lock main_wake_lock; +extern suspend_state_t requested_suspend_state; +#endif + +#ifdef CONFIG_USER_WAKELOCK +ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); +ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n); +ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); +ssize_t wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n); +#endif + +#ifdef CONFIG_EARLYSUSPEND +/* kernel/power/earlysuspend.c */ +void request_suspend_state(suspend_state_t state); +suspend_state_t get_suspend_state(void); +#endif diff --git a/kernel/power/process.c b/kernel/power/process.c index 0cf3a27a6c9d..31338cdeafc4 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -16,6 +16,7 @@ #include <linux/freezer.h> #include <linux/delay.h> #include <linux/workqueue.h> +#include <linux/wakelock.h> /* * Timeout for stopping processes @@ -82,6 +83,10 @@ static int try_to_freeze_tasks(bool sig_only) todo += wq_busy; } + if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) { + wakeup = 1; + break; + } if (!todo || time_after(jiffies, end_time)) break; @@ -108,19 +113,25 @@ static int try_to_freeze_tasks(bool sig_only) * and caller must call thaw_processes() if something fails), * but it cleans up leftover PF_FREEZE requests. */ - printk("\n"); - printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", - wakeup ? "aborted" : "failed", - elapsed_csecs / 100, elapsed_csecs % 100, - todo - wq_busy, wq_busy); - + if(wakeup) { + printk("\n"); + printk(KERN_ERR "Freezing of %s aborted\n", + sig_only ? "user space " : "tasks "); + } + else { + printk("\n"); + printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " + "(%d tasks refusing to freeze, wq_busy=%d):\n", + elapsed_csecs / 100, elapsed_csecs % 100, + todo - wq_busy, wq_busy); + } thaw_workqueues(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); - if (!wakeup && freezing(p) && !freezer_should_skip(p)) + if (freezing(p) && !freezer_should_skip(p) && + elapsed_csecs > 100) sched_show_task(p); cancel_freezing(p); task_unlock(p); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index de6f86bfa303..7c4cd6a64841 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -27,6 +27,9 @@ #include "power.h" const char *const pm_states[PM_SUSPEND_MAX] = { +#ifdef CONFIG_EARLYSUSPEND + [PM_SUSPEND_ON] = "on", +#endif [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", }; diff --git a/kernel/power/userwakelock.c b/kernel/power/userwakelock.c new file mode 100644 index 000000000000..a28a8db41468 --- /dev/null +++ b/kernel/power/userwakelock.c @@ -0,0 +1,219 @@ +/* kernel/power/userwakelock.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/wakelock.h> +#include <linux/slab.h> + +#include "power.h" + +enum { + DEBUG_FAILURE = BIT(0), + DEBUG_ERROR = BIT(1), + DEBUG_NEW = BIT(2), + DEBUG_ACCESS = BIT(3), + DEBUG_LOOKUP = BIT(4), +}; +static int debug_mask = DEBUG_FAILURE; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +static DEFINE_MUTEX(tree_lock); + +struct user_wake_lock { + struct rb_node node; + struct wake_lock wake_lock; + char name[0]; +}; +struct rb_root user_wake_locks; + +static struct user_wake_lock *lookup_wake_lock_name( + const char *buf, int allocate, long *timeoutptr) +{ + struct rb_node **p = &user_wake_locks.rb_node; + struct rb_node *parent = NULL; + struct user_wake_lock *l; + int diff; + u64 timeout; + int name_len; + const char *arg; + + /* Find length of lock name and start of optional timeout string */ + arg = buf; + while (*arg && !isspace(*arg)) + arg++; + name_len = arg - buf; + if (!name_len) + goto bad_arg; + while (isspace(*arg)) + arg++; + + /* Process timeout string */ + if (timeoutptr && *arg) { + timeout = simple_strtoull(arg, (char **)&arg, 0); + while (isspace(*arg)) + arg++; + if (*arg) + goto bad_arg; + /* convert timeout from nanoseconds to jiffies > 0 */ + timeout += (NSEC_PER_SEC / HZ) - 1; + do_div(timeout, (NSEC_PER_SEC / HZ)); + if (timeout <= 0) + timeout = 1; + *timeoutptr = timeout; + } else if (*arg) + goto bad_arg; + else if (timeoutptr) + *timeoutptr = 0; + + /* Lookup wake lock in rbtree */ + while (*p) { + parent = *p; + l = rb_entry(parent, struct user_wake_lock, node); + diff = strncmp(buf, l->name, name_len); + if (!diff && l->name[name_len]) + diff = -1; + if (debug_mask & DEBUG_ERROR) + pr_info("lookup_wake_lock_name: compare %.*s %s %d\n", + name_len, buf, l->name, diff); + + if (diff < 0) + p = &(*p)->rb_left; + else if (diff > 0) + p = &(*p)->rb_right; + else + return l; + } + + /* Allocate and add new wakelock to rbtree */ + if (!allocate) { + if (debug_mask & DEBUG_ERROR) + pr_info("lookup_wake_lock_name: %.*s not found\n", + name_len, buf); + return ERR_PTR(-EINVAL); + } + l = kzalloc(sizeof(*l) + name_len + 1, GFP_KERNEL); + if (l == NULL) { + if (debug_mask & DEBUG_FAILURE) + pr_err("lookup_wake_lock_name: failed to allocate " + "memory for %.*s\n", name_len, buf); + return ERR_PTR(-ENOMEM); + } + memcpy(l->name, buf, name_len); + if (debug_mask & DEBUG_NEW) + pr_info("lookup_wake_lock_name: new wake lock %s\n", l->name); + wake_lock_init(&l->wake_lock, WAKE_LOCK_SUSPEND, l->name); + rb_link_node(&l->node, parent, p); + rb_insert_color(&l->node, &user_wake_locks); + return l; + +bad_arg: + if (debug_mask & DEBUG_ERROR) + pr_info("lookup_wake_lock_name: wake lock, %.*s, bad arg, %s\n", + name_len, buf, arg); + return ERR_PTR(-EINVAL); +} + +ssize_t wake_lock_show( + struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + char *end = buf + PAGE_SIZE; + struct rb_node *n; + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + + for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) { + l = rb_entry(n, struct user_wake_lock, node); + if (wake_lock_active(&l->wake_lock)) + s += scnprintf(s, end - s, "%s ", l->name); + } + s += scnprintf(s, end - s, "\n"); + + mutex_unlock(&tree_lock); + return (s - buf); +} + +ssize_t wake_lock_store( + struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + long timeout; + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + l = lookup_wake_lock_name(buf, 1, &timeout); + if (IS_ERR(l)) { + n = PTR_ERR(l); + goto bad_name; + } + + if (debug_mask & DEBUG_ACCESS) + pr_info("wake_lock_store: %s, timeout %ld\n", l->name, timeout); + + if (timeout) + wake_lock_timeout(&l->wake_lock, timeout); + else + wake_lock(&l->wake_lock); +bad_name: + mutex_unlock(&tree_lock); + return n; +} + + +ssize_t wake_unlock_show( + struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + char *end = buf + PAGE_SIZE; + struct rb_node *n; + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + + for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) { + l = rb_entry(n, struct user_wake_lock, node); + if (!wake_lock_active(&l->wake_lock)) + s += scnprintf(s, end - s, "%s ", l->name); + } + s += scnprintf(s, end - s, "\n"); + + mutex_unlock(&tree_lock); + return (s - buf); +} + +ssize_t wake_unlock_store( + struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + struct user_wake_lock *l; + + mutex_lock(&tree_lock); + l = lookup_wake_lock_name(buf, 0, NULL); + if (IS_ERR(l)) { + n = PTR_ERR(l); + goto not_found; + } + + if (debug_mask & DEBUG_ACCESS) + pr_info("wake_unlock_store: %s\n", l->name); + + wake_unlock(&l->wake_lock); +not_found: + mutex_unlock(&tree_lock); + return n; +} + diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 000000000000..ee9781c5adb2 --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,607 @@ +/* kernel/power/wakelock.c + * + * Copyright (C) 2005-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> +#include <linux/suspend.h> +#include <linux/syscalls.h> /* sys_sync */ +#include <linux/wakelock.h> +#ifdef CONFIG_WAKELOCK_STAT +#include <linux/proc_fs.h> +#endif +#include "power.h" + +enum { + DEBUG_EXIT_SUSPEND = 1U << 0, + DEBUG_WAKEUP = 1U << 1, + DEBUG_SUSPEND = 1U << 2, + DEBUG_EXPIRE = 1U << 3, + DEBUG_WAKE_LOCK = 1U << 4, +}; +static int debug_mask = DEBUG_EXIT_SUSPEND | DEBUG_WAKEUP; +module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); + +#define WAKE_LOCK_TYPE_MASK (0x0f) +#define WAKE_LOCK_INITIALIZED (1U << 8) +#define WAKE_LOCK_ACTIVE (1U << 9) +#define WAKE_LOCK_AUTO_EXPIRE (1U << 10) +#define WAKE_LOCK_PREVENTING_SUSPEND (1U << 11) + +static DEFINE_SPINLOCK(list_lock); +static LIST_HEAD(inactive_locks); +static struct list_head active_wake_locks[WAKE_LOCK_TYPE_COUNT]; +static int current_event_num; +struct workqueue_struct *suspend_work_queue; +struct wake_lock main_wake_lock; +suspend_state_t requested_suspend_state = PM_SUSPEND_MEM; +static struct wake_lock unknown_wakeup; + +#ifdef CONFIG_WAKELOCK_STAT +static struct wake_lock deleted_wake_locks; +static ktime_t last_sleep_time_update; +static int wait_for_wakeup; + +int get_expired_time(struct wake_lock *lock, ktime_t *expire_time) +{ + struct timespec ts; + struct timespec kt; + struct timespec tomono; + struct timespec delta; + unsigned long seq; + long timeout; + + if (!(lock->flags & WAKE_LOCK_AUTO_EXPIRE)) + return 0; + do { + seq = read_seqbegin(&xtime_lock); + timeout = lock->expires - jiffies; + if (timeout > 0) + return 0; + kt = current_kernel_time(); + tomono = __get_wall_to_monotonic(); + } while (read_seqretry(&xtime_lock, seq)); + jiffies_to_timespec(-timeout, &delta); + set_normalized_timespec(&ts, kt.tv_sec + tomono.tv_sec - delta.tv_sec, + kt.tv_nsec + tomono.tv_nsec - delta.tv_nsec); + *expire_time = timespec_to_ktime(ts); + return 1; +} + + +static int print_lock_stat(struct seq_file *m, struct wake_lock *lock) +{ + int lock_count = lock->stat.count; + int expire_count = lock->stat.expire_count; + ktime_t active_time = ktime_set(0, 0); + ktime_t total_time = lock->stat.total_time; + ktime_t max_time = lock->stat.max_time; + + ktime_t prevent_suspend_time = lock->stat.prevent_suspend_time; + if (lock->flags & WAKE_LOCK_ACTIVE) { + ktime_t now, add_time; + int expired = get_expired_time(lock, &now); + if (!expired) + now = ktime_get(); + add_time = ktime_sub(now, lock->stat.last_time); + lock_count++; + if (!expired) + active_time = add_time; + else + expire_count++; + total_time = ktime_add(total_time, add_time); + if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) + prevent_suspend_time = ktime_add(prevent_suspend_time, + ktime_sub(now, last_sleep_time_update)); + if (add_time.tv64 > max_time.tv64) + max_time = add_time; + } + + return seq_printf(m, + "\"%s\"\t%d\t%d\t%d\t%lld\t%lld\t%lld\t%lld\t%lld\n", + lock->name, lock_count, expire_count, + lock->stat.wakeup_count, ktime_to_ns(active_time), + ktime_to_ns(total_time), + ktime_to_ns(prevent_suspend_time), ktime_to_ns(max_time), + ktime_to_ns(lock->stat.last_time)); +} + +static int wakelock_stats_show(struct seq_file *m, void *unused) +{ + unsigned long irqflags; + struct wake_lock *lock; + int ret; + int type; + + spin_lock_irqsave(&list_lock, irqflags); + + ret = seq_puts(m, "name\tcount\texpire_count\twake_count\tactive_since" + "\ttotal_time\tsleep_time\tmax_time\tlast_change\n"); + list_for_each_entry(lock, &inactive_locks, link) + ret = print_lock_stat(m, lock); + for (type = 0; type < WAKE_LOCK_TYPE_COUNT; type++) { + list_for_each_entry(lock, &active_wake_locks[type], link) + ret = print_lock_stat(m, lock); + } + spin_unlock_irqrestore(&list_lock, irqflags); + return 0; +} + +static void wake_unlock_stat_locked(struct wake_lock *lock, int expired) +{ + ktime_t duration; + ktime_t now; + if (!(lock->flags & WAKE_LOCK_ACTIVE)) + return; + if (get_expired_time(lock, &now)) + expired = 1; + else + now = ktime_get(); + lock->stat.count++; + if (expired) + lock->stat.expire_count++; + duration = ktime_sub(now, lock->stat.last_time); + lock->stat.total_time = ktime_add(lock->stat.total_time, duration); + if (ktime_to_ns(duration) > ktime_to_ns(lock->stat.max_time)) + lock->stat.max_time = duration; + lock->stat.last_time = ktime_get(); + if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) { + duration = ktime_sub(now, last_sleep_time_update); + lock->stat.prevent_suspend_time = ktime_add( + lock->stat.prevent_suspend_time, duration); + lock->flags &= ~WAKE_LOCK_PREVENTING_SUSPEND; + } +} + +static void update_sleep_wait_stats_locked(int done) +{ + struct wake_lock *lock; + ktime_t now, etime, elapsed, add; + int expired; + + now = ktime_get(); + elapsed = ktime_sub(now, last_sleep_time_update); + list_for_each_entry(lock, &active_wake_locks[WAKE_LOCK_SUSPEND], link) { + expired = get_expired_time(lock, &etime); + if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) { + if (expired) + add = ktime_sub(etime, last_sleep_time_update); + else + add = elapsed; + lock->stat.prevent_suspend_time = ktime_add( + lock->stat.prevent_suspend_time, add); + } + if (done || expired) + lock->flags &= ~WAKE_LOCK_PREVENTING_SUSPEND; + else + lock->flags |= WAKE_LOCK_PREVENTING_SUSPEND; + } + last_sleep_time_update = now; +} +#endif + + +static void expire_wake_lock(struct wake_lock *lock) +{ +#ifdef CONFIG_WAKELOCK_STAT + wake_unlock_stat_locked(lock, 1); +#endif + lock->flags &= ~(WAKE_LOCK_ACTIVE | WAKE_LOCK_AUTO_EXPIRE); + list_del(&lock->link); + list_add(&lock->link, &inactive_locks); + if (debug_mask & (DEBUG_WAKE_LOCK | DEBUG_EXPIRE)) + pr_info("expired wake lock %s\n", lock->name); +} + +/* Caller must acquire the list_lock spinlock */ +static void print_active_locks(int type) +{ + struct wake_lock *lock; + bool print_expired = true; + + BUG_ON(type >= WAKE_LOCK_TYPE_COUNT); + list_for_each_entry(lock, &active_wake_locks[type], link) { + if (lock->flags & WAKE_LOCK_AUTO_EXPIRE) { + long timeout = lock->expires - jiffies; + if (timeout > 0) + pr_info("active wake lock %s, time left %ld\n", + lock->name, timeout); + else if (print_expired) + pr_info("wake lock %s, expired\n", lock->name); + } else { + pr_info("active wake lock %s\n", lock->name); + if (!(debug_mask & DEBUG_EXPIRE)) + print_expired = false; + } + } +} + +static long has_wake_lock_locked(int type) +{ + struct wake_lock *lock, *n; + long max_timeout = 0; + + BUG_ON(type >= WAKE_LOCK_TYPE_COUNT); + list_for_each_entry_safe(lock, n, &active_wake_locks[type], link) { + if (lock->flags & WAKE_LOCK_AUTO_EXPIRE) { + long timeout = lock->expires - jiffies; + if (timeout <= 0) + expire_wake_lock(lock); + else if (timeout > max_timeout) + max_timeout = timeout; + } else + return -1; + } + return max_timeout; +} + +long has_wake_lock(int type) +{ + long ret; + unsigned long irqflags; + spin_lock_irqsave(&list_lock, irqflags); + ret = has_wake_lock_locked(type); + if (ret && (debug_mask & DEBUG_SUSPEND) && type == WAKE_LOCK_SUSPEND) + print_active_locks(type); + spin_unlock_irqrestore(&list_lock, irqflags); + return ret; +} + +static void suspend(struct work_struct *work) +{ + int ret; + int entry_event_num; + + if (has_wake_lock(WAKE_LOCK_SUSPEND)) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("suspend: abort suspend\n"); + return; + } + + entry_event_num = current_event_num; + sys_sync(); + if (debug_mask & DEBUG_SUSPEND) + pr_info("suspend: enter suspend\n"); + ret = pm_suspend(requested_suspend_state); + if (debug_mask & DEBUG_EXIT_SUSPEND) { + struct timespec ts; + struct rtc_time tm; + getnstimeofday(&ts); + rtc_time_to_tm(ts.tv_sec, &tm); + pr_info("suspend: exit suspend, ret = %d " + "(%d-%02d-%02d %02d:%02d:%02d.%09lu UTC)\n", ret, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); + } + if (current_event_num == entry_event_num) { + if (debug_mask & DEBUG_SUSPEND) + pr_info("suspend: pm_suspend returned with no event\n"); + wake_lock_timeout(&unknown_wakeup, HZ / 2); + } +} +static DECLARE_WORK(suspend_work, suspend); + +static void expire_wake_locks(unsigned long data) +{ + long has_lock; + unsigned long irqflags; + if (debug_mask & DEBUG_EXPIRE) + pr_info("expire_wake_locks: start\n"); + spin_lock_irqsave(&list_lock, irqflags); + if (debug_mask & DEBUG_SUSPEND) + print_active_locks(WAKE_LOCK_SUSPEND); + has_lock = has_wake_lock_locked(WAKE_LOCK_SUSPEND); + if (debug_mask & DEBUG_EXPIRE) + pr_info("expire_wake_locks: done, has_lock %ld\n", has_lock); + if (has_lock == 0) + queue_work(suspend_work_queue, &suspend_work); + spin_unlock_irqrestore(&list_lock, irqflags); +} +static DEFINE_TIMER(expire_timer, expire_wake_locks, 0, 0); + +static int power_suspend_late(struct device *dev) +{ + int ret = has_wake_lock(WAKE_LOCK_SUSPEND) ? -EAGAIN : 0; +#ifdef CONFIG_WAKELOCK_STAT + wait_for_wakeup = 1; +#endif + if (debug_mask & DEBUG_SUSPEND) + pr_info("power_suspend_late return %d\n", ret); + return ret; +} + +static struct dev_pm_ops power_driver_pm_ops = { + .suspend_noirq = power_suspend_late, +}; + +static struct platform_driver power_driver = { + .driver.name = "power", + .driver.pm = &power_driver_pm_ops, +}; +static struct platform_device power_device = { + .name = "power", +}; + +void wake_lock_init(struct wake_lock *lock, int type, const char *name) +{ + unsigned long irqflags = 0; + + if (name) + lock->name = name; + BUG_ON(!lock->name); + + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock_init name=%s\n", lock->name); +#ifdef CONFIG_WAKELOCK_STAT + lock->stat.count = 0; + lock->stat.expire_count = 0; + lock->stat.wakeup_count = 0; + lock->stat.total_time = ktime_set(0, 0); + lock->stat.prevent_suspend_time = ktime_set(0, 0); + lock->stat.max_time = ktime_set(0, 0); + lock->stat.last_time = ktime_set(0, 0); +#endif + lock->flags = (type & WAKE_LOCK_TYPE_MASK) | WAKE_LOCK_INITIALIZED; + + INIT_LIST_HEAD(&lock->link); + spin_lock_irqsave(&list_lock, irqflags); + list_add(&lock->link, &inactive_locks); + spin_unlock_irqrestore(&list_lock, irqflags); +} +EXPORT_SYMBOL(wake_lock_init); + +void wake_lock_destroy(struct wake_lock *lock) +{ + unsigned long irqflags; + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock_destroy name=%s\n", lock->name); + spin_lock_irqsave(&list_lock, irqflags); + lock->flags &= ~WAKE_LOCK_INITIALIZED; +#ifdef CONFIG_WAKELOCK_STAT + if (lock->stat.count) { + deleted_wake_locks.stat.count += lock->stat.count; + deleted_wake_locks.stat.expire_count += lock->stat.expire_count; + deleted_wake_locks.stat.total_time = + ktime_add(deleted_wake_locks.stat.total_time, + lock->stat.total_time); + deleted_wake_locks.stat.prevent_suspend_time = + ktime_add(deleted_wake_locks.stat.prevent_suspend_time, + lock->stat.prevent_suspend_time); + deleted_wake_locks.stat.max_time = + ktime_add(deleted_wake_locks.stat.max_time, + lock->stat.max_time); + } +#endif + list_del(&lock->link); + spin_unlock_irqrestore(&list_lock, irqflags); +} +EXPORT_SYMBOL(wake_lock_destroy); + +static void wake_lock_internal( + struct wake_lock *lock, long timeout, int has_timeout) +{ + int type; + unsigned long irqflags; + long expire_in; + + spin_lock_irqsave(&list_lock, irqflags); + type = lock->flags & WAKE_LOCK_TYPE_MASK; + BUG_ON(type >= WAKE_LOCK_TYPE_COUNT); + BUG_ON(!(lock->flags & WAKE_LOCK_INITIALIZED)); +#ifdef CONFIG_WAKELOCK_STAT + if (type == WAKE_LOCK_SUSPEND && wait_for_wakeup) { + if (debug_mask & DEBUG_WAKEUP) + pr_info("wakeup wake lock: %s\n", lock->name); + wait_for_wakeup = 0; + lock->stat.wakeup_count++; + } + if ((lock->flags & WAKE_LOCK_AUTO_EXPIRE) && + (long)(lock->expires - jiffies) <= 0) { + wake_unlock_stat_locked(lock, 0); + lock->stat.last_time = ktime_get(); + } +#endif + if (!(lock->flags & WAKE_LOCK_ACTIVE)) { + lock->flags |= WAKE_LOCK_ACTIVE; +#ifdef CONFIG_WAKELOCK_STAT + lock->stat.last_time = ktime_get(); +#endif + } + list_del(&lock->link); + if (has_timeout) { + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock: %s, type %d, timeout %ld.%03lu\n", + lock->name, type, timeout / HZ, + (timeout % HZ) * MSEC_PER_SEC / HZ); + lock->expires = jiffies + timeout; + lock->flags |= WAKE_LOCK_AUTO_EXPIRE; + list_add_tail(&lock->link, &active_wake_locks[type]); + } else { + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_lock: %s, type %d\n", lock->name, type); + lock->expires = LONG_MAX; + lock->flags &= ~WAKE_LOCK_AUTO_EXPIRE; + list_add(&lock->link, &active_wake_locks[type]); + } + if (type == WAKE_LOCK_SUSPEND) { + current_event_num++; +#ifdef CONFIG_WAKELOCK_STAT + if (lock == &main_wake_lock) + update_sleep_wait_stats_locked(1); + else if (!wake_lock_active(&main_wake_lock)) + update_sleep_wait_stats_locked(0); +#endif + if (has_timeout) + expire_in = has_wake_lock_locked(type); + else + expire_in = -1; + if (expire_in > 0) { + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_lock: %s, start expire timer, " + "%ld\n", lock->name, expire_in); + mod_timer(&expire_timer, jiffies + expire_in); + } else { + if (del_timer(&expire_timer)) + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_lock: %s, stop expire timer\n", + lock->name); + if (expire_in == 0) + queue_work(suspend_work_queue, &suspend_work); + } + } + spin_unlock_irqrestore(&list_lock, irqflags); +} + +void wake_lock(struct wake_lock *lock) +{ + wake_lock_internal(lock, 0, 0); +} +EXPORT_SYMBOL(wake_lock); + +void wake_lock_timeout(struct wake_lock *lock, long timeout) +{ + wake_lock_internal(lock, timeout, 1); +} +EXPORT_SYMBOL(wake_lock_timeout); + +void wake_unlock(struct wake_lock *lock) +{ + int type; + unsigned long irqflags; + spin_lock_irqsave(&list_lock, irqflags); + type = lock->flags & WAKE_LOCK_TYPE_MASK; +#ifdef CONFIG_WAKELOCK_STAT + wake_unlock_stat_locked(lock, 0); +#endif + if (debug_mask & DEBUG_WAKE_LOCK) + pr_info("wake_unlock: %s\n", lock->name); + lock->flags &= ~(WAKE_LOCK_ACTIVE | WAKE_LOCK_AUTO_EXPIRE); + list_del(&lock->link); + list_add(&lock->link, &inactive_locks); + if (type == WAKE_LOCK_SUSPEND) { + long has_lock = has_wake_lock_locked(type); + if (has_lock > 0) { + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_unlock: %s, start expire timer, " + "%ld\n", lock->name, has_lock); + mod_timer(&expire_timer, jiffies + has_lock); + } else { + if (del_timer(&expire_timer)) + if (debug_mask & DEBUG_EXPIRE) + pr_info("wake_unlock: %s, stop expire " + "timer\n", lock->name); + if (has_lock == 0) + queue_work(suspend_work_queue, &suspend_work); + } + if (lock == &main_wake_lock) { + if (debug_mask & DEBUG_SUSPEND) + print_active_locks(WAKE_LOCK_SUSPEND); +#ifdef CONFIG_WAKELOCK_STAT + update_sleep_wait_stats_locked(0); +#endif + } + } + spin_unlock_irqrestore(&list_lock, irqflags); +} +EXPORT_SYMBOL(wake_unlock); + +int wake_lock_active(struct wake_lock *lock) +{ + return !!(lock->flags & WAKE_LOCK_ACTIVE); +} +EXPORT_SYMBOL(wake_lock_active); + +static int wakelock_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, wakelock_stats_show, NULL); +} + +static const struct file_operations wakelock_stats_fops = { + .owner = THIS_MODULE, + .open = wakelock_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init wakelocks_init(void) +{ + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(active_wake_locks); i++) + INIT_LIST_HEAD(&active_wake_locks[i]); + +#ifdef CONFIG_WAKELOCK_STAT + wake_lock_init(&deleted_wake_locks, WAKE_LOCK_SUSPEND, + "deleted_wake_locks"); +#endif + wake_lock_init(&main_wake_lock, WAKE_LOCK_SUSPEND, "main"); + wake_lock(&main_wake_lock); + wake_lock_init(&unknown_wakeup, WAKE_LOCK_SUSPEND, "unknown_wakeups"); + + ret = platform_device_register(&power_device); + if (ret) { + pr_err("wakelocks_init: platform_device_register failed\n"); + goto err_platform_device_register; + } + ret = platform_driver_register(&power_driver); + if (ret) { + pr_err("wakelocks_init: platform_driver_register failed\n"); + goto err_platform_driver_register; + } + + suspend_work_queue = create_singlethread_workqueue("suspend"); + if (suspend_work_queue == NULL) { + ret = -ENOMEM; + goto err_suspend_work_queue; + } + +#ifdef CONFIG_WAKELOCK_STAT + proc_create("wakelocks", S_IRUGO, NULL, &wakelock_stats_fops); +#endif + + return 0; + +err_suspend_work_queue: + platform_driver_unregister(&power_driver); +err_platform_driver_register: + platform_device_unregister(&power_device); +err_platform_device_register: + wake_lock_destroy(&unknown_wakeup); + wake_lock_destroy(&main_wake_lock); +#ifdef CONFIG_WAKELOCK_STAT + wake_lock_destroy(&deleted_wake_locks); +#endif + return ret; +} + +static void __exit wakelocks_exit(void) +{ +#ifdef CONFIG_WAKELOCK_STAT + remove_proc_entry("wakelocks", NULL); +#endif + destroy_workqueue(suspend_work_queue); + platform_driver_unregister(&power_driver); + platform_device_unregister(&power_device); + wake_lock_destroy(&unknown_wakeup); + wake_lock_destroy(&main_wake_lock); +#ifdef CONFIG_WAKELOCK_STAT + wake_lock_destroy(&deleted_wake_locks); +#endif +} + +core_initcall(wakelocks_init); +module_exit(wakelocks_exit); diff --git a/kernel/printk.c b/kernel/printk.c index 36231525e22f..f629e802fdc8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -52,6 +52,10 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +#ifdef CONFIG_DEBUG_LL +extern void printascii(char *); +#endif + /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ @@ -256,6 +260,53 @@ static inline void boot_delay_msec(void) } #endif +/* + * Return the number of unread characters in the log buffer. + */ +static int log_buf_get_len(void) +{ + return logged_chars; +} + +/* + * Clears the ring-buffer + */ +void log_buf_clear(void) +{ + logged_chars = 0; +} + +/* + * Copy a range of characters from the log buffer. + */ +int log_buf_copy(char *dest, int idx, int len) +{ + int ret, max; + bool took_lock = false; + + if (!oops_in_progress) { + spin_lock_irq(&logbuf_lock); + took_lock = true; + } + + max = log_buf_get_len(); + if (idx < 0 || idx >= max) { + ret = -1; + } else { + if (len > max - idx) + len = max - idx; + ret = len; + idx += (log_end - max); + while (len-- > 0) + dest[len] = LOG_BUF(idx + len); + } + + if (took_lock) + spin_unlock_irq(&logbuf_lock); + + return ret; +} + #ifdef CONFIG_SECURITY_DMESG_RESTRICT int dmesg_restrict = 1; #else @@ -773,6 +824,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) printed_len += vscnprintf(printk_buf + printed_len, sizeof(printk_buf) - printed_len, fmt, args); +#ifdef CONFIG_DEBUG_LL + printascii(printk_buf); +#endif p = printk_buf; diff --git a/kernel/sched.c b/kernel/sched.c index 42eab5a8437d..b294a1882ffc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,6 +72,7 @@ #include <linux/ctype.h> #include <linux/ftrace.h> #include <linux/slab.h> +#include <linux/cpuacct.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -8078,13 +8079,24 @@ static inline int preempt_count_equals(int preempt_offset) return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); } +static int __might_sleep_init_called; +int __init __might_sleep_init(void) +{ + __might_sleep_init_called = 1; + return 0; +} +early_initcall(__might_sleep_init); + void __might_sleep(const char *file, int line, int preempt_offset) { #ifdef in_atomic static unsigned long prev_jiffy; /* ratelimiting */ if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || - system_state != SYSTEM_RUNNING || oops_in_progress) + oops_in_progress) + return; + if (system_state != SYSTEM_RUNNING && + (!__might_sleep_init_called || system_state != SYSTEM_BOOTING)) return; if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) return; @@ -8835,6 +8847,15 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) static int cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { + if ((current != tsk) && (!capable(CAP_SYS_NICE))) { + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(tsk); + + if (cred->euid != tcred->uid && cred->euid != tcred->suid) + return -EPERM; + } + #ifdef CONFIG_RT_GROUP_SCHED if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) return -EINVAL; @@ -8994,8 +9015,30 @@ struct cpuacct { u64 __percpu *cpuusage; struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; + struct cpuacct_charge_calls *cpufreq_fn; + void *cpuacct_data; }; +static struct cpuacct *cpuacct_root; + +/* Default calls for cpufreq accounting */ +static struct cpuacct_charge_calls *cpuacct_cpufreq; +int cpuacct_register_cpufreq(struct cpuacct_charge_calls *fn) +{ + cpuacct_cpufreq = fn; + + /* + * Root node is created before platform can register callbacks, + * initalize here. + */ + if (cpuacct_root && fn) { + cpuacct_root->cpufreq_fn = fn; + if (fn->init) + fn->init(&cpuacct_root->cpuacct_data); + } + return 0; +} + struct cgroup_subsys cpuacct_subsys; /* return cpu accounting group corresponding to this container */ @@ -9030,8 +9073,16 @@ static struct cgroup_subsys_state *cpuacct_create( if (percpu_counter_init(&ca->cpustat[i], 0)) goto out_free_counters; + ca->cpufreq_fn = cpuacct_cpufreq; + + /* If available, have platform code initalize cpu frequency table */ + if (ca->cpufreq_fn && ca->cpufreq_fn->init) + ca->cpufreq_fn->init(&ca->cpuacct_data); + if (cgrp->parent) ca->parent = cgroup_ca(cgrp->parent); + else + cpuacct_root = ca; return &ca->css; @@ -9159,6 +9210,32 @@ static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, return 0; } +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct cpuacct *ca = cgroup_ca(cgrp); + if (ca->cpufreq_fn && ca->cpufreq_fn->cpufreq_show) + ca->cpufreq_fn->cpufreq_show(ca->cpuacct_data, cb); + + return 0; +} + +/* return total cpu power usage (milliWatt second) of a group */ +static u64 cpuacct_powerusage_read(struct cgroup *cgrp, struct cftype *cft) +{ + int i; + struct cpuacct *ca = cgroup_ca(cgrp); + u64 totalpower = 0; + + if (ca->cpufreq_fn && ca->cpufreq_fn->power_usage) + for_each_present_cpu(i) { + totalpower += ca->cpufreq_fn->power_usage( + ca->cpuacct_data); + } + + return totalpower; +} + static struct cftype files[] = { { .name = "usage", @@ -9173,6 +9250,14 @@ static struct cftype files[] = { .name = "stat", .read_map = cpuacct_stats_show, }, + { + .name = "cpufreq", + .read_map = cpuacct_cpufreq_show, + }, + { + .name = "power", + .read_u64 = cpuacct_powerusage_read + }, }; static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) @@ -9202,6 +9287,10 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) for (; ca; ca = ca->parent) { u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; + + /* Call back into platform code to account for CPU speeds */ + if (ca->cpufreq_fn && ca->cpufreq_fn->charge) + ca->cpufreq_fn->charge(ca->cpuacct_data, cputime, cpu); } rcu_read_unlock(); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c26e2df450e..8faa29ac949f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1014,6 +1014,8 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { + u64 min_vruntime; + /* * Update run-time statistics of the 'current'. */ @@ -1040,6 +1042,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->on_rq = 0; update_cfs_load(cfs_rq, 0); account_entity_dequeue(cfs_rq, se); + + min_vruntime = cfs_rq->min_vruntime; update_min_vruntime(cfs_rq); update_cfs_shares(cfs_rq, 0); @@ -1049,7 +1053,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * movement in our normalized position. */ if (!(flags & DEQUEUE_SLEEP)) - se->vruntime -= cfs_rq->min_vruntime; + se->vruntime -= min_vruntime; } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4eed0af5d144..a26c37df1b19 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -95,6 +95,7 @@ extern char core_pattern[]; extern unsigned int core_pipe_limit; extern int pid_max; extern int min_free_kbytes; +extern int min_free_order_shift; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; @@ -1166,6 +1167,13 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, { + .procname = "min_free_order_shift", + .data = &min_free_order_shift, + .maxlen = sizeof(min_free_order_shift), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .procname = "percpu_pagelist_fraction", .data = &percpu_pagelist_fraction, .maxlen = sizeof(percpu_pagelist_fraction), |