summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cpu_pm.c233
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/irqdomain.c6
-rw-r--r--kernel/posix-cpu-timers.c5
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/resource.c7
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/tsacct.c15
-rw-r--r--kernel/workqueue.c7
14 files changed, 283 insertions, 55 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index eca595e2fd52..988cb3da7031 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
+obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
new file mode 100644
index 000000000000..249152e15308
--- /dev/null
+++ b/kernel/cpu_pm.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * Author:
+ * Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpu_pm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/syscore_ops.h>
+
+static DEFINE_RWLOCK(cpu_pm_notifier_lock);
+static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+
+static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
+{
+ int ret;
+
+ ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
+ nr_to_call, nr_calls);
+
+ return notifier_to_errno(ret);
+}
+
+/**
+ * cpu_pm_register_notifier - register a driver with cpu_pm
+ * @nb: notifier block to register
+ *
+ * Add a driver to a list of drivers that are notified about
+ * CPU and CPU cluster low power entry and exit.
+ *
+ * This function may sleep, and has the same return conditions as
+ * raw_notifier_chain_register.
+ */
+int cpu_pm_register_notifier(struct notifier_block *nb)
+{
+ unsigned long flags;
+ int ret;
+
+ write_lock_irqsave(&cpu_pm_notifier_lock, flags);
+ ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+ write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
+
+/**
+ * cpu_pm_unregister_notifier - unregister a driver with cpu_pm
+ * @nb: notifier block to be unregistered
+ *
+ * Remove a driver from the CPU PM notifier list.
+ *
+ * This function may sleep, and has the same return conditions as
+ * raw_notifier_chain_unregister.
+ */
+int cpu_pm_unregister_notifier(struct notifier_block *nb)
+{
+ unsigned long flags;
+ int ret;
+
+ write_lock_irqsave(&cpu_pm_notifier_lock, flags);
+ ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+ write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
+
+/**
+ * cpm_pm_enter - CPU low power entry notifier
+ *
+ * Notifies listeners that a single CPU is entering a low power state that may
+ * cause some blocks in the same power domain as the cpu to reset.
+ *
+ * Must be called on the affected CPU with interrupts disabled. Platform is
+ * responsible for ensuring that cpu_pm_enter is not called twice on the same
+ * CPU before cpu_pm_exit is called. Notified drivers can include VFP
+ * co-processor, interrupt controller and it's PM extensions, local CPU
+ * timers context save/restore which shouldn't be interrupted. Hence it
+ * must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_pm_enter(void)
+{
+ int nr_calls;
+ int ret = 0;
+
+ read_lock(&cpu_pm_notifier_lock);
+ ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
+ if (ret)
+ /*
+ * Inform listeners (nr_calls - 1) about failure of CPU PM
+ * PM entry who are notified earlier to prepare for it.
+ */
+ cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
+ read_unlock(&cpu_pm_notifier_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_enter);
+
+/**
+ * cpm_pm_exit - CPU low power exit notifier
+ *
+ * Notifies listeners that a single CPU is exiting a low power state that may
+ * have caused some blocks in the same power domain as the cpu to reset.
+ *
+ * Notified drivers can include VFP co-processor, interrupt controller
+ * and it's PM extensions, local CPU timers context save/restore which
+ * shouldn't be interrupted. Hence it must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_pm_exit(void)
+{
+ int ret;
+
+ read_lock(&cpu_pm_notifier_lock);
+ ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
+ read_unlock(&cpu_pm_notifier_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_exit);
+
+/**
+ * cpm_cluster_pm_enter - CPU cluster low power entry notifier
+ *
+ * Notifies listeners that all cpus in a power domain are entering a low power
+ * state that may cause some blocks in the same power domain to reset.
+ *
+ * Must be called after cpu_pm_enter has been called on all cpus in the power
+ * domain, and before cpu_pm_exit has been called on any cpu in the power
+ * domain. Notified drivers can include VFP co-processor, interrupt controller
+ * and it's PM extensions, local CPU timers context save/restore which
+ * shouldn't be interrupted. Hence it must be called with interrupts disabled.
+ *
+ * Must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_cluster_pm_enter(void)
+{
+ int nr_calls;
+ int ret = 0;
+
+ read_lock(&cpu_pm_notifier_lock);
+ ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
+ if (ret)
+ /*
+ * Inform listeners (nr_calls - 1) about failure of CPU cluster
+ * PM entry who are notified earlier to prepare for it.
+ */
+ cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
+ read_unlock(&cpu_pm_notifier_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
+
+/**
+ * cpm_cluster_pm_exit - CPU cluster low power exit notifier
+ *
+ * Notifies listeners that all cpus in a power domain are exiting form a
+ * low power state that may have caused some blocks in the same power domain
+ * to reset.
+ *
+ * Must be called after cpu_pm_exit has been called on all cpus in the power
+ * domain, and before cpu_pm_exit has been called on any cpu in the power
+ * domain. Notified drivers can include VFP co-processor, interrupt controller
+ * and it's PM extensions, local CPU timers context save/restore which
+ * shouldn't be interrupted. Hence it must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_cluster_pm_exit(void)
+{
+ int ret;
+
+ read_lock(&cpu_pm_notifier_lock);
+ ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
+ read_unlock(&cpu_pm_notifier_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit);
+
+#ifdef CONFIG_PM
+static int cpu_pm_suspend(void)
+{
+ int ret;
+
+ ret = cpu_pm_enter();
+ if (ret)
+ return ret;
+
+ ret = cpu_cluster_pm_enter();
+ return ret;
+}
+
+static void cpu_pm_resume(void)
+{
+ cpu_cluster_pm_exit();
+ cpu_pm_exit();
+}
+
+static struct syscore_ops cpu_pm_syscore_ops = {
+ .suspend = cpu_pm_suspend,
+ .resume = cpu_pm_resume,
+};
+
+static int cpu_pm_init(void)
+{
+ register_syscore_ops(&cpu_pm_syscore_ops);
+ return 0;
+}
+core_initcall(cpu_pm_init);
+#endif
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0f857782d06f..fbe38f2e8edb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5758,6 +5758,7 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
pmu = ERR_PTR(ret);
@@ -5765,6 +5766,7 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
goto unlock;
@@ -5891,8 +5893,6 @@ done:
return ERR_PTR(err);
}
- event->pmu = pmu;
-
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
jump_label_inc(&perf_sched_events);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d5a3009da71a..dc5114b4c16c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc)
desc->depth = 1;
if (desc->irq_data.chip->irq_shutdown)
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- if (desc->irq_data.chip->irq_disable)
+ else if (desc->irq_data.chip->irq_disable)
desc->irq_data.chip->irq_disable(&desc->irq_data);
else
desc->irq_data.chip->irq_mask(&desc->irq_data);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index d5828da3fd38..b57a3776de44 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -29,7 +29,11 @@ void irq_domain_add(struct irq_domain *domain)
*/
for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
- if (d || d->domain) {
+ if (!d) {
+ WARN(1, "error: assigning domain to non existant irq_desc");
+ return;
+ }
+ if (d->domain) {
/* things are broken; just report, don't clean up */
WARN(1, "error: irq_desc already assigned to a domain");
return;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 58f405b581e7..c8008dd58ef2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
do {
times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime);
- times->sum_exec_runtime += t->se.sum_exec_runtime;
+ times->sum_exec_runtime += task_sched_runtime(t);
} while_each_thread(tsk, t);
out:
rcu_read_unlock();
@@ -312,7 +312,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
- cpu->sched = thread_group_sched_runtime(p);
+ thread_group_cputime(p, &cputime);
+ cpu->sched = cputime.sum_exec_runtime;
break;
}
return 0;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 3744c594b19b..80a85971cf64 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -235,3 +235,7 @@ config PM_GENERIC_DOMAINS
config PM_GENERIC_DOMAINS_RUNTIME
def_bool y
depends on PM_RUNTIME && PM_GENERIC_DOMAINS
+
+config CPU_PM
+ bool
+ depends on SUSPEND || CPU_IDLE
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 9de3ecfd20f9..a70d2a5d8c7b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -744,20 +744,17 @@ int ptrace_request(struct task_struct *child, long request,
break;
si = child->last_siginfo;
- if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP))
- break;
-
- child->jobctl |= JOBCTL_LISTENING;
-
- /*
- * If NOTIFY is set, it means event happened between start
- * of this trap and now. Trigger re-trap immediately.
- */
- if (child->jobctl & JOBCTL_TRAP_NOTIFY)
- signal_wake_up(child, true);
-
+ if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) {
+ child->jobctl |= JOBCTL_LISTENING;
+ /*
+ * If NOTIFY is set, it means event happened between
+ * start of this trap and now. Trigger re-trap.
+ */
+ if (child->jobctl & JOBCTL_TRAP_NOTIFY)
+ signal_wake_up(child, true);
+ ret = 0;
+ }
unlock_task_sighand(child, &flags);
- ret = 0;
break;
case PTRACE_DETACH: /* detach a process that was attached. */
diff --git a/kernel/resource.c b/kernel/resource.c
index 3b3cedc52592..c8dc249da5ce 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -419,6 +419,9 @@ static int __find_resource(struct resource *root, struct resource *old,
else
tmp.end = root->end;
+ if (tmp.end < tmp.start)
+ goto next;
+
resource_clip(&tmp, constraint->min, constraint->max);
arch_remove_reservations(&tmp);
@@ -436,8 +439,10 @@ static int __find_resource(struct resource *root, struct resource *old,
return 0;
}
}
- if (!this)
+
+next: if (!this || this->end == root->end)
break;
+
if (this != old)
tmp.start = this->end + 1;
this = this->sibling;
diff --git a/kernel/sched.c b/kernel/sched.c
index ec5f472bc5b9..b50b0f0c9aa9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3725,30 +3725,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
}
/*
- * Return sum_exec_runtime for the thread group.
- * In case the task is currently running, return the sum plus current's
- * pending runtime that have not been accounted yet.
- *
- * Note that the thread group might have other running tasks as well,
- * so the return value not includes other pending runtime that other
- * running tasks might have.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
- struct task_cputime totals;
- unsigned long flags;
- struct rq *rq;
- u64 ns;
-
- rq = task_rq_lock(p, &flags);
- thread_group_cputime(p, &totals);
- ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
- task_rq_unlock(rq, p, &flags);
-
- return ns;
-}
-
-/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
@@ -4372,7 +4348,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
blk_schedule_flush_plug(tsk);
}
-asmlinkage void schedule(void)
+asmlinkage void __sched schedule(void)
{
struct task_struct *tsk = current;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 97540f0c9e47..af1177858be3 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1050,7 +1050,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
*/
if (curr && unlikely(rt_task(curr)) &&
(curr->rt.nr_cpus_allowed < 2 ||
- curr->prio < p->prio) &&
+ curr->prio <= p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int target = find_lowest_rq(p);
@@ -1581,7 +1581,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
p->rt.nr_cpus_allowed > 1 &&
rt_task(rq->curr) &&
(rq->curr->rt.nr_cpus_allowed < 2 ||
- rq->curr->prio < p->prio))
+ rq->curr->prio <= p->prio))
push_rt_tasks(rq);
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e19ce1454ee1..e66046456f4f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = {
.cmd = TASKSTATS_CMD_GET,
.doit = taskstats_user_cmd,
.policy = taskstats_cmd_get_policy,
+ .flags = GENL_ADMIN_PERM,
};
static struct genl_ops cgroupstats_ops = {
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 24dc60d9fa1f..5bbfac85866e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
#define KB 1024
#define MB (1024*KB)
+#define KB_MASK (~(KB-1))
/*
* fill in extended accounting fields
*/
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
mmput(mm);
}
- stats->read_char = p->ioac.rchar;
- stats->write_char = p->ioac.wchar;
- stats->read_syscalls = p->ioac.syscr;
- stats->write_syscalls = p->ioac.syscw;
+ stats->read_char = p->ioac.rchar & KB_MASK;
+ stats->write_char = p->ioac.wchar & KB_MASK;
+ stats->read_syscalls = p->ioac.syscr & KB_MASK;
+ stats->write_syscalls = p->ioac.syscw & KB_MASK;
#ifdef CONFIG_TASK_IO_ACCOUNTING
- stats->read_bytes = p->ioac.read_bytes;
- stats->write_bytes = p->ioac.write_bytes;
- stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+ stats->read_bytes = p->ioac.read_bytes & KB_MASK;
+ stats->write_bytes = p->ioac.write_bytes & KB_MASK;
+ stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
#else
stats->read_bytes = 0;
stats->write_bytes = 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25fb1b0e53fa..1783aabc6128 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2412,8 +2412,13 @@ reflush:
for_each_cwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+ bool drained;
- if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+ spin_lock_irq(&cwq->gcwq->lock);
+ drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
+ spin_unlock_irq(&cwq->gcwq->lock);
+
+ if (drained)
continue;
if (++flush_cnt == 10 ||