summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks4
-rw-r--r--kernel/Kconfig.preempt1
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/compat.c68
-rw-r--r--kernel/cpuset.c21
-rw-r--r--kernel/debug/debug_core.c1
-rw-r--r--kernel/debug/kdb/kdb_bt.c1
-rw-r--r--kernel/dma.c1
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/futex.c38
-rw-r--r--kernel/futex_compat.c38
-rw-r--r--kernel/irq/Kconfig15
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/irqdomain.c8
-rw-r--r--kernel/irq/manage.c19
-rw-r--r--kernel/irq/migration.c10
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/module.c37
-rw-r--r--kernel/params.c39
-rw-r--r--kernel/pid_namespace.c33
-rw-r--r--kernel/rwsem.c1
-rw-r--r--kernel/sched/core.c63
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/signal.c1
-rw-r--r--kernel/smp.c90
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sys.c9
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/time.c6
-rw-r--r--kernel/time/alarmtimer.c8
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/ntp.c134
-rw-r--r--kernel/time/timekeeping.c51
34 files changed, 472 insertions, 286 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 5068e2a4e75f..2251882daf53 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_SPIN_LOCK_IRQSAVE
-config INLINE_SPIN_UNLOCK
- def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK)
+config UNINLINE_SPIN_UNLOCK
+ bool
config INLINE_SPIN_UNLOCK_BH
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 24e7cb0ba26a..3f9c97419f02 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY
config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
select PREEMPT_COUNT
+ select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
This option reduces the latency of the kernel by making
all kernel code (that is not executing in a critical section)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f4ea4b6f3cf1..ed64ccac67c9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1883,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
*/
int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
- int retval;
+ int retval = 0;
struct cgroup_subsys *ss, *failed_ss = NULL;
struct cgroup *oldcgrp;
struct cgroupfs_root *root = cgrp->root;
diff --git a/kernel/compat.c b/kernel/compat.c
index f346cedfe24d..74ff8498809a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -31,11 +31,10 @@
#include <asm/uaccess.h>
/*
- * Note that the native side is already converted to a timespec, because
- * that's what we want anyway.
+ * Get/set struct timeval with struct timespec on the native side
*/
-static int compat_get_timeval(struct timespec *o,
- struct compat_timeval __user *i)
+static int compat_get_timeval_convert(struct timespec *o,
+ struct compat_timeval __user *i)
{
long usec;
@@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o,
return 0;
}
-static int compat_put_timeval(struct compat_timeval __user *o,
- struct timeval *i)
+static int compat_put_timeval_convert(struct compat_timeval __user *o,
+ struct timeval *i)
{
return (put_user(i->tv_sec, &o->tv_sec) ||
put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
@@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
if (tv) {
struct timeval ktv;
do_gettimeofday(&ktv);
- if (compat_put_timeval(tv, &ktv))
+ if (compat_put_timeval_convert(tv, &ktv))
return -EFAULT;
}
if (tz) {
@@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
struct timezone ktz;
if (tv) {
- if (compat_get_timeval(&kts, tv))
+ if (compat_get_timeval_convert(&kts, tv))
return -EFAULT;
}
if (tz) {
@@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
}
+int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv)
+{
+ return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
+ __get_user(tv->tv_sec, &ctv->tv_sec) ||
+ __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(get_compat_timeval);
+
+int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv)
+{
+ return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
+ __put_user(tv->tv_sec, &ctv->tv_sec) ||
+ __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(put_compat_timeval);
+
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
{
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
__get_user(ts->tv_sec, &cts->tv_sec) ||
__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
}
+EXPORT_SYMBOL_GPL(get_compat_timespec);
int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
{
@@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user
}
EXPORT_SYMBOL_GPL(put_compat_timespec);
+int compat_get_timeval(struct timeval *tv, const void __user *utv)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0;
+ else
+ return get_compat_timeval(tv, utv);
+}
+EXPORT_SYMBOL_GPL(compat_get_timeval);
+
+int compat_put_timeval(const struct timeval *tv, void __user *utv)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0;
+ else
+ return put_compat_timeval(tv, utv);
+}
+EXPORT_SYMBOL_GPL(compat_put_timeval);
+
+int compat_get_timespec(struct timespec *ts, const void __user *uts)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0;
+ else
+ return get_compat_timespec(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_get_timespec);
+
+int compat_put_timespec(const struct timespec *ts, void __user *uts)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0;
+ else
+ return put_compat_timespec(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_put_timespec);
+
static long compat_nanosleep_restart(struct restart_block *restart)
{
struct compat_timespec __user *rmtp;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1010cc61931f..b96ad75b7e64 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2162,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
mutex_unlock(&callback_mutex);
}
-int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
const struct cpuset *cs;
- int cpu;
rcu_read_lock();
cs = task_cs(tsk);
@@ -2186,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
* changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
* set any mask even if it is not right from task_cs() pov,
* the pending set_cpus_allowed_ptr() will fix things.
+ *
+ * select_fallback_rq() will fix things ups and set cpu_possible_mask
+ * if required.
*/
-
- cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
- if (cpu >= nr_cpu_ids) {
- /*
- * Either tsk->cpus_allowed is wrong (see above) or it
- * is actually empty. The latter case is only possible
- * if we are racing with remove_tasks_in_empty_cpuset().
- * Like above we can temporary set any mask and rely on
- * set_cpus_allowed_ptr() as synchronization point.
- */
- do_set_cpus_allowed(tsk, cpu_possible_mask);
- cpu = cpumask_any(cpu_active_mask);
- }
-
- return cpu;
}
void cpuset_init_current_mems_allowed(void)
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3f88a45e6f0a..1dc53bae56e1 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -53,7 +53,6 @@
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
#include <linux/atomic.h>
-#include <asm/system.h>
#include "debug_core.h"
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 7179eac7b41c..07c9bbb94a0b 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -15,7 +15,6 @@
#include <linux/sched.h>
#include <linux/kdb.h>
#include <linux/nmi.h>
-#include <asm/system.h>
#include "kdb_private.h"
diff --git a/kernel/dma.c b/kernel/dma.c
index 68a2306522c8..6c6262f86c17 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -18,7 +18,6 @@
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <asm/dma.h>
-#include <asm/system.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 3db1909faed9..d8bd3b425fa7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -474,7 +474,7 @@ static void close_files(struct files_struct * files)
i = j * __NFDBITS;
if (i >= fdt->max_fds)
break;
- set = fdt->open_fds->fds_bits[j++];
+ set = fdt->open_fds[j++];
while (set) {
if (set & 1) {
struct file * file = xchg(&fdt->fd[i], NULL);
diff --git a/kernel/futex.c b/kernel/futex.c
index 72efa1e4359a..e2b0fb9a0b3b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -59,6 +59,7 @@
#include <linux/magic.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
+#include <linux/ptrace.h>
#include <asm/futex.h>
@@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
{
struct robust_list_head __user *head;
unsigned long ret;
- const struct cred *cred = current_cred(), *pcred;
+ struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
+ WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
if (!pid)
- head = current->robust_list;
+ p = current;
else {
- struct task_struct *p;
-
- ret = -ESRCH;
- rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
- ret = -EPERM;
- pcred = __task_cred(p);
- /* If victim is in different user_ns, then uids are not
- comparable, so we must have CAP_SYS_PTRACE */
- if (cred->user->user_ns != pcred->user->user_ns) {
- if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
- goto ok;
- }
- /* If victim is in same user_ns, then uids are comparable */
- if (cred->euid != pcred->euid &&
- cred->euid != pcred->uid &&
- !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
-ok:
- head = p->robust_list;
- rcu_read_unlock();
}
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ goto err_unlock;
+
+ head = p->robust_list;
+ rcu_read_unlock();
+
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(head, head_ptr);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 5f9e689dc8f0..83e368b005fc 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -10,6 +10,7 @@
#include <linux/compat.h>
#include <linux/nsproxy.h>
#include <linux/futex.h>
+#include <linux/ptrace.h>
#include <asm/uaccess.h>
@@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
{
struct compat_robust_list_head __user *head;
unsigned long ret;
- const struct cred *cred = current_cred(), *pcred;
+ struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
+ WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
if (!pid)
- head = current->compat_robust_list;
+ p = current;
else {
- struct task_struct *p;
-
- ret = -ESRCH;
- rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
- ret = -EPERM;
- pcred = __task_cred(p);
- /* If victim is in different user_ns, then uids are not
- comparable, so we must have CAP_SYS_PTRACE */
- if (cred->user->user_ns != pcred->user->user_ns) {
- if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
- goto ok;
- }
- /* If victim is in same user_ns, then uids are comparable */
- if (cred->euid != pcred->euid &&
- cred->euid != pcred->uid &&
- !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
-ok:
- head = p->compat_robust_list;
- rcu_read_unlock();
}
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+ rcu_read_unlock();
+
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(ptr_to_compat(head), head_ptr);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 5a38bf4de641..cf1a4a68ce44 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -13,7 +13,7 @@ config GENERIC_HARDIRQS
# Options selectable by the architecture code
# Make sparse irq Kconfig switch below available
-config HAVE_SPARSE_IRQ
+config MAY_HAVE_SPARSE_IRQ
bool
# Enable the generic irq autoprobe mechanism
@@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP
config IRQ_DOMAIN
bool
+config IRQ_DOMAIN_DEBUG
+ bool "Expose hardware/virtual IRQ mapping via debugfs"
+ depends on IRQ_DOMAIN && DEBUG_FS
+ help
+ This option will show the mapping relationship between hardware irq
+ numbers and Linux irq numbers. The mapping is exposed via debugfs
+ in the file "virq_mapping".
+
+ If you don't know what this means you don't need it.
+
# Support forced irq threading
config IRQ_FORCED_THREADING
bool
config SPARSE_IRQ
- bool "Support sparse irq numbering"
- depends on HAVE_SPARSE_IRQ
+ bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
---help---
Sparse irq numbering is useful for distro kernels that want
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6ff84e6a954c..bdb180325551 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
{
/*
- * Wake up the handler thread for this action. In case the
- * thread crashed and was killed we just pretend that we
- * handled the interrupt. The hardirq handler has disabled the
- * device interrupt, so no irq storm is lurking. If the
+ * In case the thread crashed and was killed we just pretend that
+ * we handled the interrupt. The hardirq handler has disabled the
+ * device interrupt, so no irq storm is lurking.
+ */
+ if (action->thread->flags & PF_EXITING)
+ return;
+
+ /*
+ * Wake up the handler thread for this action. If the
* RUNTHREAD bit is already set, nothing to do.
*/
- if ((action->thread->flags & PF_EXITING) ||
- test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
return;
/*
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index af48e59bc2ff..3601f3fbf67c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -632,7 +632,7 @@ unsigned int irq_linear_revmap(struct irq_domain *domain,
return revmap[hwirq];
}
-#ifdef CONFIG_VIRQ_DEBUG
+#ifdef CONFIG_IRQ_DOMAIN_DEBUG
static int virq_debug_show(struct seq_file *m, void *private)
{
unsigned long flags;
@@ -668,7 +668,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
data = irq_desc_get_chip_data(desc);
seq_printf(m, "0x%16p ", data);
- if (desc->irq_data.domain->of_node)
+ if (desc->irq_data.domain && desc->irq_data.domain->of_node)
p = desc->irq_data.domain->of_node->full_name;
else
p = none;
@@ -695,14 +695,14 @@ static const struct file_operations virq_debug_fops = {
static int __init irq_debugfs_init(void)
{
- if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root,
+ if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL,
NULL, &virq_debug_fops) == NULL)
return -ENOMEM;
return 0;
}
__initcall(irq_debugfs_init);
-#endif /* CONFIG_VIRQ_DEBUG */
+#endif /* CONFIG_IRQ_DOMAIN_DEBUG */
int irq_domain_simple_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b0ccd1ac2d6a..89a3ea82569b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct cpumask *set = irq_default_affinity;
- int ret;
+ int ret, node = desc->irq_data.node;
/* Excludes PER_CPU and NO_BALANCE interrupts */
if (!irq_can_set_affinity(irq))
@@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
}
cpumask_and(mask, cpu_online_mask, set);
+ if (node != NUMA_NO_NODE) {
+ const struct cpumask *nodemask = cpumask_of_node(node);
+
+ /* make sure at least one of the cpus in nodemask is online */
+ if (cpumask_intersects(mask, nodemask))
+ cpumask_and(mask, mask, nodemask);
+ }
ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
switch (ret) {
case IRQ_SET_MASK_OK:
@@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action)
* is marked MASKED.
*/
static void irq_finalize_oneshot(struct irq_desc *desc,
- struct irqaction *action, bool force)
+ struct irqaction *action)
{
if (!(desc->istate & IRQS_ONESHOT))
return;
@@ -679,7 +686,7 @@ again:
* we would clear the threads_oneshot bit of this thread which
* was just set.
*/
- if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
goto out_unlock;
desc->threads_oneshot &= ~action->thread_mask;
@@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
local_bh_disable();
ret = action->thread_fn(action->irq, action->dev_id);
- irq_finalize_oneshot(desc, action, false);
+ irq_finalize_oneshot(desc, action);
local_bh_enable();
return ret;
}
@@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
irqreturn_t ret;
ret = action->thread_fn(action->irq, action->dev_id);
- irq_finalize_oneshot(desc, action, false);
+ irq_finalize_oneshot(desc, action);
return ret;
}
@@ -844,7 +851,7 @@ void exit_irq_thread(void)
wake_threads_waitq(desc);
/* Prevent a stale desc->threads_oneshot */
- irq_finalize_oneshot(desc, action, true);
+ irq_finalize_oneshot(desc, action);
}
static void irq_setup_forced_threading(struct irqaction *new)
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 47420908fba0..c3c89751b327 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata)
* masking the irqs.
*/
if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
- < nr_cpu_ids))
- if (!chip->irq_set_affinity(&desc->irq_data,
- desc->pending_mask, false)) {
+ < nr_cpu_ids)) {
+ int ret = chip->irq_set_affinity(&desc->irq_data,
+ desc->pending_mask, false);
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
+ case IRQ_SET_MASK_OK_NOCOPY:
irq_set_thread_affinity(desc);
}
+ }
cpumask_clear(desc->pending_mask);
}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a6a675cb9818..4e2e472f6aeb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -37,7 +37,6 @@
#include <asm/page.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-#include <asm/system.h>
#include <asm/sections.h>
/* Per cpu memory for storing cpu states in case of system crash. */
@@ -1359,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline,
if (*cur == '@')
*crash_base = memparse(cur+1, &cur);
+ else if (*cur != ' ' && *cur != '\0') {
+ pr_warning("crashkernel: unrecognized char\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -1462,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(init_uts_ns);
VMCOREINFO_SYMBOL(node_online_map);
+#ifdef CONFIG_MMU
VMCOREINFO_SYMBOL(swapper_pg_dir);
+#endif
VMCOREINFO_SYMBOL(_stext);
VMCOREINFO_SYMBOL(vmlist);
diff --git a/kernel/module.c b/kernel/module.c
index 2c932760fd33..78ac6ec1e425 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -105,6 +105,7 @@ struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
/* Block module loading/unloading? */
int modules_disabled = 0;
+core_param(nomodule, modules_disabled, bint, 0);
/* Waiting for a module to finish initializing? */
static DECLARE_WAIT_QUEUE_HEAD(module_wq);
@@ -903,6 +904,36 @@ static ssize_t show_refcnt(struct module_attribute *mattr,
static struct module_attribute modinfo_refcnt =
__ATTR(refcnt, 0444, show_refcnt, NULL);
+void __module_get(struct module *module)
+{
+ if (module) {
+ preempt_disable();
+ __this_cpu_inc(module->refptr->incs);
+ trace_module_get(module, _RET_IP_);
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL(__module_get);
+
+bool try_module_get(struct module *module)
+{
+ bool ret = true;
+
+ if (module) {
+ preempt_disable();
+
+ if (likely(module_is_live(module))) {
+ __this_cpu_inc(module->refptr->incs);
+ trace_module_get(module, _RET_IP_);
+ } else
+ ret = false;
+
+ preempt_enable();
+ }
+ return ret;
+}
+EXPORT_SYMBOL(try_module_get);
+
void module_put(struct module *module)
{
if (module) {
@@ -2380,8 +2411,7 @@ static int copy_and_check(struct load_info *info,
return -ENOEXEC;
/* Suck in entire file: we'll want most of it. */
- /* vmalloc barfs on "unusual" numbers. Check here */
- if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
+ if ((hdr = vmalloc(len)) == NULL)
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) {
@@ -2922,7 +2952,8 @@ static struct module *load_module(void __user *umod,
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
- err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
+ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+ -32768, 32767, NULL);
if (err < 0)
goto unlink;
diff --git a/kernel/params.c b/kernel/params.c
index 47f5bf12434a..f37d82631347 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -87,6 +87,8 @@ static int parse_one(char *param,
char *val,
const struct kernel_param *params,
unsigned num_params,
+ s16 min_level,
+ s16 max_level,
int (*handle_unknown)(char *param, char *val))
{
unsigned int i;
@@ -95,6 +97,9 @@ static int parse_one(char *param,
/* Find parameter */
for (i = 0; i < num_params; i++) {
if (parameq(param, params[i].name)) {
+ if (params[i].level < min_level
+ || params[i].level > max_level)
+ return 0;
/* No one handled NULL, so do it here. */
if (!val && params[i].ops->set != param_set_bool
&& params[i].ops->set != param_set_bint)
@@ -174,6 +179,8 @@ int parse_args(const char *name,
char *args,
const struct kernel_param *params,
unsigned num,
+ s16 min_level,
+ s16 max_level,
int (*unknown)(char *param, char *val))
{
char *param, *val;
@@ -189,7 +196,8 @@ int parse_args(const char *name,
args = next_arg(args, &param, &val);
irq_was_disabled = irqs_disabled();
- ret = parse_one(param, val, params, num, unknown);
+ ret = parse_one(param, val, params, num,
+ min_level, max_level, unknown);
if (irq_was_disabled && !irqs_disabled()) {
printk(KERN_WARNING "parse_args(): option '%s' enabled "
"irq's!\n", param);
@@ -297,35 +305,18 @@ EXPORT_SYMBOL(param_ops_charp);
/* Actually could be a bool or an int, for historical reasons. */
int param_set_bool(const char *val, const struct kernel_param *kp)
{
- bool v;
- int ret;
-
/* No equals means "set"... */
if (!val) val = "1";
/* One of =[yYnN01] */
- ret = strtobool(val, &v);
- if (ret)
- return ret;
-
- if (kp->flags & KPARAM_ISBOOL)
- *(bool *)kp->arg = v;
- else
- *(int *)kp->arg = v;
- return 0;
+ return strtobool(val, kp->arg);
}
EXPORT_SYMBOL(param_set_bool);
int param_get_bool(char *buffer, const struct kernel_param *kp)
{
- bool val;
- if (kp->flags & KPARAM_ISBOOL)
- val = *(bool *)kp->arg;
- else
- val = *(int *)kp->arg;
-
/* Y and N chosen as being relatively non-coder friendly */
- return sprintf(buffer, "%c", val ? 'Y' : 'N');
+ return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N');
}
EXPORT_SYMBOL(param_get_bool);
@@ -343,7 +334,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
struct kernel_param dummy;
dummy.arg = &boolval;
- dummy.flags = KPARAM_ISBOOL;
ret = param_set_bool(val, &dummy);
if (ret == 0)
*(bool *)kp->arg = !boolval;
@@ -372,7 +362,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
/* Match bool exactly, by re-using it. */
boolkp = *kp;
boolkp.arg = &v;
- boolkp.flags |= KPARAM_ISBOOL;
ret = param_set_bool(val, &boolkp);
if (ret == 0)
@@ -393,7 +382,7 @@ static int param_array(const char *name,
unsigned int min, unsigned int max,
void *elem, int elemsize,
int (*set)(const char *, const struct kernel_param *kp),
- u16 flags,
+ s16 level,
unsigned int *num)
{
int ret;
@@ -403,7 +392,7 @@ static int param_array(const char *name,
/* Get the name right for errors. */
kp.name = name;
kp.arg = elem;
- kp.flags = flags;
+ kp.level = level;
*num = 0;
/* We expect a comma-separated list of values. */
@@ -444,7 +433,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
unsigned int temp_num;
return param_array(kp->name, val, 1, arr->max, arr->elem,
- arr->elemsize, arr->ops->set, kp->flags,
+ arr->elemsize, arr->ops->set, kp->level,
arr->num ?: &temp_num);
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 17b232869a04..57bc1fd35b3c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
#include <linux/acct.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
+#include <linux/reboot.h>
#define BITS_PER_PAGE (PAGE_SIZE*8)
@@ -183,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
+ if (pid_ns->reboot)
+ current->signal->group_exit_code = pid_ns->reboot;
+
acct_exit_ns(pid_ns);
return;
}
@@ -217,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
+int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
+{
+ if (pid_ns == &init_pid_ns)
+ return 0;
+
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART2:
+ case LINUX_REBOOT_CMD_RESTART:
+ pid_ns->reboot = SIGHUP;
+ break;
+
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ case LINUX_REBOOT_CMD_HALT:
+ pid_ns->reboot = SIGINT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ read_lock(&tasklist_lock);
+ force_sig(SIGKILL, pid_ns->child_reaper);
+ read_unlock(&tasklist_lock);
+
+ do_exit(0);
+
+ /* Not reached */
+ return 0;
+}
+
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index b152f74f02de..6850f53e02d8 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -10,7 +10,6 @@
#include <linux/export.h>
#include <linux/rwsem.h>
-#include <asm/system.h>
#include <linux/atomic.h>
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 503d6426126d..e3ed0ecee7c7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
#include <linux/init_task.h>
#include <linux/binfmts.h>
+#include <asm/switch_to.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
#include <asm/mutex.h>
@@ -1264,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process);
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
- int dest_cpu;
const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+ enum { cpuset, possible, fail } state = cpuset;
+ int dest_cpu;
/* Look for allowed, online CPU in same node. */
- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+ for_each_cpu_mask(dest_cpu, *nodemask) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
return dest_cpu;
+ }
- /* Any allowed, online CPU? */
- dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
- if (dest_cpu < nr_cpu_ids)
- return dest_cpu;
+ for (;;) {
+ /* Any allowed, online CPU? */
+ for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
+ goto out;
+ }
- /* No more Mr. Nice Guy. */
- dest_cpu = cpuset_cpus_allowed_fallback(p);
- /*
- * Don't tell them about moving exiting tasks or
- * kernel threads (both mm NULL), since they never
- * leave kernel.
- */
- if (p->mm && printk_ratelimit()) {
- printk_sched("process %d (%s) no longer affine to cpu%d\n",
- task_pid_nr(p), p->comm, cpu);
+ switch (state) {
+ case cpuset:
+ /* No more Mr. Nice Guy. */
+ cpuset_cpus_allowed_fallback(p);
+ state = possible;
+ break;
+
+ case possible:
+ do_set_cpus_allowed(p, cpu_possible_mask);
+ state = fail;
+ break;
+
+ case fail:
+ BUG();
+ break;
+ }
+ }
+
+out:
+ if (state != cpuset) {
+ /*
+ * Don't tell them about moving exiting tasks or
+ * kernel threads (both mm NULL), since they never
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+ printk_sched("process %d (%s) no longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, cpu);
+ }
}
return dest_cpu;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 94340c7544a9..0d97ebdc58f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
#endif /* CONFIG_FAIR_GROUP_SCHED */
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec);
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
/**************************************************************
* Scheduling class tree data structure manipulation methods:
@@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
__clear_buddies_skip(se);
}
-static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
resched_task(rq_of(cfs_rq)->curr);
}
-static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec)
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
{
if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
return;
@@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq)
}
#else /* CONFIG_CFS_BANDWIDTH */
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec) {}
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
-static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
{
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b60dad720173..44af55e6d5d0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
next_idx:
if (idx >= MAX_RT_PRIO)
continue;
- if (next && next->prio < idx)
+ if (next && next->prio <= idx)
continue;
list_for_each_entry(rt_se, array->queue + idx, run_list) {
struct task_struct *p;
diff --git a/kernel/signal.c b/kernel/signal.c
index d523da02dd14..17afcaf582d0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -36,6 +36,7 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/siginfo.h>
+#include <asm/cacheflush.h>
#include "audit.h" /* audit_signal_info() */
/*
diff --git a/kernel/smp.c b/kernel/smp.c
index db197d60489b..2f8b10ecf759 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
return ret;
}
EXPORT_SYMBOL(on_each_cpu);
+
+/**
+ * on_each_cpu_mask(): Run a function on processors specified by
+ * cpumask, which may include the local processor.
+ * @mask: The set of cpus to run on (only runs on online subset).
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed
+ * on other CPUs.
+ *
+ * If @wait is true, then returns once @func has returned.
+ *
+ * You must not call this function with disabled interrupts or
+ * from a hardware interrupt handler or from a bottom half handler.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
+ void *info, bool wait)
+{
+ int cpu = get_cpu();
+
+ smp_call_function_many(mask, func, info, wait);
+ if (cpumask_test_cpu(cpu, mask)) {
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ }
+ put_cpu();
+}
+EXPORT_SYMBOL(on_each_cpu_mask);
+
+/*
+ * on_each_cpu_cond(): Call a function on each processor for which
+ * the supplied function cond_func returns true, optionally waiting
+ * for all the required CPUs to finish. This may include the local
+ * processor.
+ * @cond_func: A callback function that is passed a cpu id and
+ * the the info parameter. The function is called
+ * with preemption disabled. The function should
+ * return a blooean value indicating whether to IPI
+ * the specified CPU.
+ * @func: The function to run on all applicable CPUs.
+ * This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to both functions.
+ * @wait: If true, wait (atomically) until function has
+ * completed on other CPUs.
+ * @gfp_flags: GFP flags to use when allocating the cpumask
+ * used internally by the function.
+ *
+ * The function might sleep if the GFP flags indicates a non
+ * atomic allocation is allowed.
+ *
+ * Preemption is disabled to protect against CPUs going offline but not online.
+ * CPUs going online during the call will not be seen or sent an IPI.
+ *
+ * You must not call this function with disabled interrupts or
+ * from a hardware interrupt handler or from a bottom half handler.
+ */
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+ smp_call_func_t func, void *info, bool wait,
+ gfp_t gfp_flags)
+{
+ cpumask_var_t cpus;
+ int cpu, ret;
+
+ might_sleep_if(gfp_flags & __GFP_WAIT);
+
+ if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
+ preempt_disable();
+ for_each_online_cpu(cpu)
+ if (cond_func(cpu, info))
+ cpumask_set_cpu(cpu, cpus);
+ on_each_cpu_mask(cpus, func, info, wait);
+ preempt_enable();
+ free_cpumask_var(cpus);
+ } else {
+ /*
+ * No free cpumask, bother. No matter, we'll
+ * just have to IPI them one by one.
+ */
+ preempt_disable();
+ for_each_online_cpu(cpu)
+ if (cond_func(cpu, info)) {
+ ret = smp_call_function_single(cpu, func,
+ info, wait);
+ WARN_ON_ONCE(!ret);
+ }
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL(on_each_cpu_cond);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 84c7d96918bf..5cdd8065a3ce 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
EXPORT_SYMBOL(_raw_spin_lock_bh);
#endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK
+#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
{
__raw_spin_unlock(lock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 9eb7fcab8df6..e7006eb6c1e4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
magic2 != LINUX_REBOOT_MAGIC2C))
return -EINVAL;
+ /*
+ * If pid namespaces are enabled and the current task is in a child
+ * pid_namespace, the command is handled by reboot_pid_ns() which will
+ * call do_exit().
+ */
+ ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
+ if (ret)
+ return ret;
+
/* Instead of trying to make the power_off code look like
* halt when pm_power_off is not set do it the easy way.
*/
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d48ff4fd44c3..52b3a06a02f8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/bitmap.h>
#include <linux/signal.h>
#include <linux/printk.h>
#include <linux/proc_fs.h>
@@ -68,6 +69,9 @@
#include <asm/stacktrace.h>
#include <asm/io.h>
#endif
+#ifdef CONFIG_SPARC
+#include <asm/setup.h>
+#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
#include <linux/acct.h>
#endif
@@ -142,7 +146,6 @@ static const int cap_last_cap = CAP_LAST_CAP;
#include <linux/inotify.h>
#endif
#ifdef CONFIG_SPARC
-#include <asm/system.h>
#endif
#ifdef CONFIG_SPARC64
@@ -2393,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
}
}
- while (val_a <= val_b)
- set_bit(val_a++, tmp_bitmap);
-
+ bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
first = 0;
proc_skip_char(&kbuf, &left, '\n');
}
@@ -2438,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
if (*ppos)
bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
else
- memcpy(bitmap, tmp_bitmap,
- BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
+ bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
}
kfree(tmp_bitmap);
*lenp -= left;
diff --git a/kernel/time.c b/kernel/time.c
index 73e416db0a1e..ba744cf80696 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
return error;
if (tz) {
- /* SMP safe, global irq locking makes it work. */
sys_tz = *tz;
update_vsyscall_tz();
if (firsttime) {
@@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
}
}
if (tv)
- {
- /* SMP safe, again the code in arch/foo/time.c should
- * globally block out interrupts when it runs.
- */
return do_settimeofday(tv);
- }
return 0;
}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 8a46f5d64504..8a538c55fc7b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev,
return 0;
}
+static inline void alarmtimer_rtc_timer_init(void)
+{
+ rtc_timer_init(&rtctimer, NULL, NULL);
+}
+
static struct class_interface alarmtimer_rtc_interface = {
.add_dev = &alarmtimer_rtc_add_device,
};
@@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void)
#define rtcdev (NULL)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
+static inline void alarmtimer_rtc_timer_init(void) { }
#endif
/**
@@ -783,6 +789,8 @@ static int __init alarmtimer_init(void)
.nsleep = alarm_timer_nsleep,
};
+ alarmtimer_rtc_timer_init();
+
posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index a45ca167ab24..c9583382141a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
{
u64 ret;
/*
- * We won't try to correct for more then 11% adjustments (110,000 ppm),
+ * We won't try to correct for more than 11% adjustments (110,000 ppm),
*/
ret = (u64)cs->mult * 11;
do_div(ret,100);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6e039b144daf..f03fd83b170b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
static u64 tick_length;
static u64 tick_length_base;
-static struct hrtimer leap_timer;
-
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)
/*
- * Leap second processing. If in leap-insert state at the end of the
- * day, the system clock is set back one second; if in leap-delete
- * state, the system clock is set ahead one second.
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ * Also handles leap second processing, and returns leap offset
*/
-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
+int second_overflow(unsigned long secs)
{
- enum hrtimer_restart res = HRTIMER_NORESTART;
- unsigned long flags;
+ s64 delta;
int leap = 0;
+ unsigned long flags;
spin_lock_irqsave(&ntp_lock, flags);
+
+ /*
+ * Leap second processing. If in leap-insert state at the end of the
+ * day, the system clock is set back one second; if in leap-delete
+ * state, the system clock is set ahead one second.
+ */
switch (time_state) {
case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
break;
case TIME_INS:
- leap = -1;
- time_state = TIME_OOP;
- printk(KERN_NOTICE
- "Clock: inserting leap second 23:59:60 UTC\n");
- hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
- res = HRTIMER_RESTART;
+ if (secs % 86400 == 0) {
+ leap = -1;
+ time_state = TIME_OOP;
+ printk(KERN_NOTICE
+ "Clock: inserting leap second 23:59:60 UTC\n");
+ }
break;
case TIME_DEL:
- leap = 1;
- time_tai--;
- time_state = TIME_WAIT;
- printk(KERN_NOTICE
- "Clock: deleting leap second 23:59:59 UTC\n");
+ if ((secs + 1) % 86400 == 0) {
+ leap = 1;
+ time_tai--;
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE
+ "Clock: deleting leap second 23:59:59 UTC\n");
+ }
break;
case TIME_OOP:
time_tai++;
time_state = TIME_WAIT;
- /* fall through */
+ break;
+
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
- spin_unlock_irqrestore(&ntp_lock, flags);
- /*
- * We have to call this outside of the ntp_lock to keep
- * the proper locking hierarchy
- */
- if (leap)
- timekeeping_leap_insert(leap);
-
- return res;
-}
-
-/*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- */
-void second_overflow(void)
-{
- s64 delta;
- unsigned long flags;
-
- spin_lock_irqsave(&ntp_lock, flags);
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,15 +472,17 @@ void second_overflow(void)
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
+
+
+
out:
spin_unlock_irqrestore(&ntp_lock, flags);
+
+ return leap;
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
-/* Disable the cmos update - used by virtualization and embedded */
-int no_sync_cmos_clock __read_mostly;
-
static void sync_cmos_clock(struct work_struct *work);
static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work)
static void notify_cmos_timer(void)
{
- if (!no_sync_cmos_clock)
- schedule_delayed_work(&sync_cmos_work, 0);
+ schedule_delayed_work(&sync_cmos_work, 0);
}
#else
static inline void notify_cmos_timer(void) { }
#endif
-/*
- * Start the leap seconds timer:
- */
-static inline void ntp_start_leap_timer(struct timespec *ts)
-{
- long now = ts->tv_sec;
-
- if (time_status & STA_INS) {
- time_state = TIME_INS;
- now += 86400 - now % 86400;
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
-
- return;
- }
-
- if (time_status & STA_DEL) {
- time_state = TIME_DEL;
- now += 86400 - (now + 1) % 86400;
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
- }
-}
/*
* Propagate a new txc->status value into the NTP state:
@@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
- switch (time_state) {
- case TIME_OK:
- ntp_start_leap_timer(ts);
- break;
- case TIME_INS:
- case TIME_DEL:
- time_state = TIME_OK;
- ntp_start_leap_timer(ts);
- case TIME_WAIT:
- if (!(time_status & (STA_INS | STA_DEL)))
- time_state = TIME_OK;
- break;
- case TIME_OOP:
- hrtimer_restart(&leap_timer);
- break;
- }
}
/*
* Called with the xtime lock held, so we can access and modify
@@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc)
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
-
- if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
- hrtimer_cancel(&leap_timer);
}
if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
- hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- leap_timer.function = ntp_leap_second;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 15be32e19c6e..d66b21308f7c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
}
-void timekeeping_leap_insert(int leapsecond)
-{
- unsigned long flags;
-
- write_seqlock_irqsave(&timekeeper.lock, flags);
- timekeeper.xtime.tv_sec += leapsecond;
- timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
- timekeeping_update(false);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
-
-}
-
/**
* timekeeping_forward_now - update clock to the current time
*
@@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
static int change_clocksource(void *data)
{
struct clocksource *new, *old;
+ unsigned long flags;
new = (struct clocksource *) data;
+ write_seqlock_irqsave(&timekeeper.lock, flags);
+
timekeeping_forward_now();
if (!new->enable || new->enable(new) == 0) {
old = timekeeper.clock;
@@ -458,6 +449,10 @@ static int change_clocksource(void *data)
if (old->disable)
old->disable(old);
}
+ timekeeping_update(true);
+
+ write_sequnlock_irqrestore(&timekeeper.lock, flags);
+
return 0;
}
@@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset)
int adj;
/*
- * The point of this is to check if the error is greater then half
+ * The point of this is to check if the error is greater than half
* an interval.
*
* First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
@@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset)
* Note we subtract one in the shift, so that error is really error*2.
* This "saves" dividing(shifting) interval twice, but keeps the
* (error > interval) comparison as still measuring if error is
- * larger then half an interval.
+ * larger than half an interval.
*
* Note: It does not "save" on aggravation when reading the code.
*/
@@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset)
if (error > interval) {
/*
* We now divide error by 4(via shift), which checks if
- * the error is greater then twice the interval.
+ * the error is greater than twice the interval.
* If it is greater, we need a bigadjust, if its smaller,
* we can adjust by 1.
*/
@@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset)
} else /* No adjustment needed */
return;
- WARN_ONCE(timekeeper.clock->maxadj &&
- (timekeeper.mult + adj > timekeeper.clock->mult +
- timekeeper.clock->maxadj),
- "Adjusting %s more then 11%% (%ld vs %ld)\n",
+ if (unlikely(timekeeper.clock->maxadj &&
+ (timekeeper.mult + adj >
+ timekeeper.clock->mult + timekeeper.clock->maxadj))) {
+ printk_once(KERN_WARNING
+ "Adjusting %s more than 11%% (%ld vs %ld)\n",
timekeeper.clock->name, (long)timekeeper.mult + adj,
(long)timekeeper.clock->mult +
timekeeper.clock->maxadj);
+ }
/*
* So the following can be confusing.
*
@@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
u64 raw_nsecs;
- /* If the offset is smaller then a shifted interval, do nothing */
+ /* If the offset is smaller than a shifted interval, do nothing */
if (offset < timekeeper.cycle_interval<<shift)
return offset;
@@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
while (timekeeper.xtime_nsec >= nsecps) {
+ int leap;
timekeeper.xtime_nsec -= nsecps;
timekeeper.xtime.tv_sec++;
- second_overflow();
+ leap = second_overflow(timekeeper.xtime.tv_sec);
+ timekeeper.xtime.tv_sec += leap;
}
/* Accumulate raw time */
@@ -1018,13 +1017,13 @@ static void update_wall_time(void)
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
* we calculate the largest doubling multiple of cycle_intervals
- * that is smaller then the offset. We then accumulate that
+ * that is smaller than the offset. We then accumulate that
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
shift = max(0, shift);
- /* Bound shift to one less then what overflows tick_length */
+ /* Bound shift to one less than what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
@@ -1072,12 +1071,14 @@ static void update_wall_time(void)
/*
* Finally, make sure that after the rounding
- * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+ * xtime.tv_nsec isn't larger than NSEC_PER_SEC
*/
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
+ int leap;
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
timekeeper.xtime.tv_sec++;
- second_overflow();
+ leap = second_overflow(timekeeper.xtime.tv_sec);
+ timekeeper.xtime.tv_sec += leap;
}
timekeeping_update(false);