summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c3
-rw-r--r--kernel/auditfilter.c9
-rw-r--r--kernel/auditsc.c28
-rw-r--r--kernel/cpu.c24
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c15
-rw-r--r--kernel/futex_compat.c12
-rw-r--r--kernel/irq/chip.c140
-rw-r--r--kernel/irq/handle.c19
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/irq/migration.c34
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/irq/resend.c2
-rw-r--r--kernel/irq/spurious.c10
-rw-r--r--kernel/lockdep.c23
-rw-r--r--kernel/module.c94
-rw-r--r--kernel/mutex-debug.c2
-rw-r--r--kernel/nsproxy.c6
-rw-r--r--kernel/posix-cpu-timers.c27
-rw-r--r--kernel/power/disk.c8
-rw-r--r--kernel/power/poweroff.c3
-rw-r--r--kernel/power/swap.c3
-rw-r--r--kernel/power/user.c10
-rw-r--r--kernel/printk.c11
-rw-r--r--kernel/profile.c7
-rw-r--r--kernel/rcupdate.c11
-rw-r--r--kernel/rcutorture.c317
-rw-r--r--kernel/relay.c41
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/sched.c24
-rw-r--r--kernel/srcu.c258
-rw-r--r--kernel/sys.c125
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/workqueue.c7
39 files changed, 1038 insertions, 258 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d948ca12acf0..5e3f3b75563a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o latency.o nsproxy.o
+ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
diff --git a/kernel/audit.c b/kernel/audit.c
index f9889ee77825..98106f6078b0 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -340,7 +340,7 @@ static int kauditd_thread(void *dummy)
{
struct sk_buff *skb;
- while (1) {
+ while (!kthread_should_stop()) {
skb = skb_dequeue(&audit_skb_queue);
wake_up(&audit_backlog_wait);
if (skb) {
@@ -369,6 +369,7 @@ static int kauditd_thread(void *dummy)
remove_wait_queue(&kauditd_wait, &wait);
}
}
+ return 0;
}
int audit_send_list(void *_dest)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 1a58a81fb09d..4f40d923af8e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -411,7 +411,6 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
case AUDIT_FSGID:
case AUDIT_LOGINUID:
case AUDIT_PERS:
- case AUDIT_ARCH:
case AUDIT_MSGTYPE:
case AUDIT_PPID:
case AUDIT_DEVMAJOR:
@@ -423,6 +422,14 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
case AUDIT_ARG2:
case AUDIT_ARG3:
break;
+ /* arch is only allowed to be = or != */
+ case AUDIT_ARCH:
+ if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL)
+ && (f->op != AUDIT_NEGATE) && (f->op)) {
+ err = -EINVAL;
+ goto exit_free;
+ }
+ break;
case AUDIT_PERM:
if (f->val & ~15)
goto exit_free;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 105147631753..42f2f1179711 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -278,8 +278,11 @@ static int audit_filter_rules(struct task_struct *tsk,
result = audit_comparator(tsk->pid, f->op, f->val);
break;
case AUDIT_PPID:
- if (ctx)
+ if (ctx) {
+ if (!ctx->ppid)
+ ctx->ppid = sys_getppid();
result = audit_comparator(ctx->ppid, f->op, f->val);
+ }
break;
case AUDIT_UID:
result = audit_comparator(tsk->uid, f->op, f->val);
@@ -795,7 +798,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
/* tsk == current */
context->pid = tsk->pid;
- context->ppid = sys_getppid(); /* sic. tsk == current in all cases */
+ if (!context->ppid)
+ context->ppid = sys_getppid();
context->uid = tsk->uid;
context->gid = tsk->gid;
context->euid = tsk->euid;
@@ -1137,6 +1141,7 @@ void audit_syscall_entry(int arch, int major,
context->ctime = CURRENT_TIME;
context->in_syscall = 1;
context->auditable = !!(state == AUDIT_RECORD_CONTEXT);
+ context->ppid = 0;
}
/**
@@ -1352,7 +1357,13 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
}
update_context:
- idx = context->name_count++;
+ idx = context->name_count;
+ if (context->name_count == AUDIT_NAMES) {
+ printk(KERN_DEBUG "name_count maxed and losing %s\n",
+ found_name ?: "(null)");
+ return;
+ }
+ context->name_count++;
#if AUDIT_DEBUG
context->ino_count++;
#endif
@@ -1370,7 +1381,16 @@ update_context:
/* A parent was not found in audit_names, so copy the inode data for the
* provided parent. */
if (!found_name) {
- idx = context->name_count++;
+ idx = context->name_count;
+ if (context->name_count == AUDIT_NAMES) {
+ printk(KERN_DEBUG
+ "name_count maxed and losing parent inode data: dev=%02x:%02x, inode=%lu",
+ MAJOR(parent->i_sb->s_dev),
+ MINOR(parent->i_sb->s_dev),
+ parent->i_ino);
+ return;
+ }
+ context->name_count++;
#if AUDIT_DEBUG
context->ino_count++;
#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 32c96628463e..27dd3ee47099 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -19,7 +19,7 @@
static DEFINE_MUTEX(cpu_add_remove_lock);
static DEFINE_MUTEX(cpu_bitmask_lock);
-static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
+static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
@@ -68,7 +68,11 @@ EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
/* Need to know about CPUs going up/down? */
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
{
- return blocking_notifier_chain_register(&cpu_chain, nb);
+ int ret;
+ mutex_lock(&cpu_add_remove_lock);
+ ret = raw_notifier_chain_register(&cpu_chain, nb);
+ mutex_unlock(&cpu_add_remove_lock);
+ return ret;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -77,7 +81,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
void unregister_cpu_notifier(struct notifier_block *nb)
{
- blocking_notifier_chain_unregister(&cpu_chain, nb);
+ mutex_lock(&cpu_add_remove_lock);
+ raw_notifier_chain_unregister(&cpu_chain, nb);
+ mutex_unlock(&cpu_add_remove_lock);
}
EXPORT_SYMBOL(unregister_cpu_notifier);
@@ -126,7 +132,7 @@ static int _cpu_down(unsigned int cpu)
if (!cpu_online(cpu))
return -EINVAL;
- err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
+ err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
(void *)(long)cpu);
if (err == NOTIFY_BAD) {
printk("%s: attempt to take down CPU %u failed\n",
@@ -146,7 +152,7 @@ static int _cpu_down(unsigned int cpu)
if (IS_ERR(p)) {
/* CPU didn't die: tell everyone. Can't complain. */
- if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
@@ -169,7 +175,7 @@ static int _cpu_down(unsigned int cpu)
put_cpu();
/* CPU is completely dead: tell everyone. Too late to complain. */
- if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD,
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
@@ -206,7 +212,7 @@ static int __devinit _cpu_up(unsigned int cpu)
if (cpu_online(cpu) || !cpu_present(cpu))
return -EINVAL;
- ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+ ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
if (ret == NOTIFY_BAD) {
printk("%s: attempt to bring up CPU %u failed\n",
__FUNCTION__, cpu);
@@ -223,11 +229,11 @@ static int __devinit _cpu_up(unsigned int cpu)
BUG_ON(!cpu_online(cpu));
/* Now call notifier in preparation. */
- blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
out_notify:
if (ret != 0)
- blocking_notifier_call_chain(&cpu_chain,
+ raw_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED, hcpu);
return ret;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9d850ae13b1b..6313c38c930e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2137,7 +2137,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
* See also the previous routine cpuset_handle_cpuhp().
*/
-void cpuset_track_online_nodes()
+void cpuset_track_online_nodes(void)
{
common_cpu_mem_hotplug_unplug();
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7dc6140baac6..29ebb30850ed 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -984,6 +984,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (!p)
goto fork_out;
+ rt_mutex_init_task(p);
+
#ifdef CONFIG_TRACE_IRQFLAGS
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
@@ -1088,8 +1090,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->lockdep_recursion = 0;
#endif
- rt_mutex_init_task(p);
-
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 4aaf91951a43..b364e0026191 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1612,10 +1612,10 @@ sys_set_robust_list(struct robust_list_head __user *head,
* @len_ptr: pointer to a length field, the kernel fills in the header size
*/
asmlinkage long
-sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
size_t __user *len_ptr)
{
- struct robust_list_head *head;
+ struct robust_list_head __user *head;
unsigned long ret;
if (!pid)
@@ -1694,14 +1694,15 @@ retry:
* Fetch a robust-list pointer. Bit 0 signals PI futexes:
*/
static inline int fetch_robust_entry(struct robust_list __user **entry,
- struct robust_list __user **head, int *pi)
+ struct robust_list __user * __user *head,
+ int *pi)
{
unsigned long uentry;
- if (get_user(uentry, (unsigned long *)head))
+ if (get_user(uentry, (unsigned long __user *)head))
return -EFAULT;
- *entry = (void *)(uentry & ~1UL);
+ *entry = (void __user *)(uentry & ~1UL);
*pi = uentry & 1;
return 0;
@@ -1739,7 +1740,7 @@ void exit_robust_list(struct task_struct *curr)
return;
if (pending)
- handle_futex_death((void *)pending + futex_offset, curr, pip);
+ handle_futex_death((void __user *)pending + futex_offset, curr, pip);
while (entry != &head->list) {
/*
@@ -1747,7 +1748,7 @@ void exit_robust_list(struct task_struct *curr)
* don't process it twice:
*/
if (entry != pending)
- if (handle_futex_death((void *)entry + futex_offset,
+ if (handle_futex_death((void __user *)entry + futex_offset,
curr, pi))
return;
/*
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index c5cca3f65cb7..50f24eea6cd0 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -18,7 +18,7 @@
*/
static inline int
fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
- compat_uptr_t *head, int *pi)
+ compat_uptr_t __user *head, int *pi)
{
if (get_user(*uentry, head))
return -EFAULT;
@@ -62,7 +62,7 @@ void compat_exit_robust_list(struct task_struct *curr)
&head->list_op_pending, &pip))
return;
if (upending)
- handle_futex_death((void *)pending + futex_offset, curr, pip);
+ handle_futex_death((void __user *)pending + futex_offset, curr, pip);
while (compat_ptr(uentry) != &head->list) {
/*
@@ -70,7 +70,7 @@ void compat_exit_robust_list(struct task_struct *curr)
* dont process it twice:
*/
if (entry != pending)
- if (handle_futex_death((void *)entry + futex_offset,
+ if (handle_futex_death((void __user *)entry + futex_offset,
curr, pi))
return;
@@ -78,7 +78,7 @@ void compat_exit_robust_list(struct task_struct *curr)
* Fetch the next entry in the list:
*/
if (fetch_robust_entry(&uentry, &entry,
- (compat_uptr_t *)&entry->next, &pi))
+ (compat_uptr_t __user *)&entry->next, &pi))
return;
/*
* Avoid excessively long or circular lists:
@@ -103,10 +103,10 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
}
asmlinkage long
-compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr,
+compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
compat_size_t __user *len_ptr)
{
- struct compat_robust_list_head *head;
+ struct compat_robust_list_head __user *head;
unsigned long ret;
if (!pid)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 736cb0bd498f..2d0dc3efe813 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -18,6 +18,69 @@
#include "internals.h"
/**
+ * dynamic_irq_init - initialize a dynamically allocated irq
+ * @irq: irq number to initialize
+ */
+void dynamic_irq_init(unsigned int irq)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS) {
+ printk(KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
+ WARN_ON(1);
+ return;
+ }
+
+ /* Ensure we don't have left over values from a previous use of this irq */
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->status = IRQ_DISABLED;
+ desc->chip = &no_irq_chip;
+ desc->handle_irq = handle_bad_irq;
+ desc->depth = 1;
+ desc->handler_data = NULL;
+ desc->chip_data = NULL;
+ desc->action = NULL;
+ desc->irq_count = 0;
+ desc->irqs_unhandled = 0;
+#ifdef CONFIG_SMP
+ desc->affinity = CPU_MASK_ALL;
+#endif
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ * dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ * @irq: irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS) {
+ printk(KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
+ WARN_ON(1);
+ return;
+ }
+
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock, flags);
+ if (desc->action) {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ printk(KERN_ERR "Destroying IRQ%d without calling free_irq\n",
+ irq);
+ WARN_ON(1);
+ return;
+ }
+ desc->handle_irq = handle_bad_irq;
+ desc->chip = &no_irq_chip;
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+
+/**
* set_irq_chip - set the irq chip for an irq
* @irq: irq number
* @chip: pointer to irq chip description structure
@@ -186,7 +249,6 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
* handle_simple_irq - Simple and software-decoded IRQs.
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
- * @regs: pointer to a register structure
*
* Simple interrupts are either sent from a demultiplexing interrupt
* handler or come from hardware, where no interrupt hardware control
@@ -196,7 +258,7 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
* unmask issues if necessary.
*/
void fastcall
-handle_simple_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+handle_simple_irq(unsigned int irq, struct irq_desc *desc)
{
struct irqaction *action;
irqreturn_t action_ret;
@@ -216,9 +278,9 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
desc->status |= IRQ_INPROGRESS;
spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, regs, action);
+ action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret, regs);
+ note_interrupt(irq, desc, action_ret);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -230,7 +292,6 @@ out_unlock:
* handle_level_irq - Level type irq handler
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
- * @regs: pointer to a register structure
*
* Level type interrupts are active as long as the hardware line has
* the active level. This may require to mask the interrupt and unmask
@@ -238,7 +299,7 @@ out_unlock:
* interrupt line is back to inactive.
*/
void fastcall
-handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+handle_level_irq(unsigned int irq, struct irq_desc *desc)
{
unsigned int cpu = smp_processor_id();
struct irqaction *action;
@@ -266,9 +327,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
desc->status &= ~IRQ_PENDING;
spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, regs, action);
+ action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret, regs);
+ note_interrupt(irq, desc, action_ret);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -282,7 +343,6 @@ out_unlock:
* handle_fasteoi_irq - irq handler for transparent controllers
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
- * @regs: pointer to a register structure
*
* Only a single callback will be issued to the chip: an ->eoi()
* call when the interrupt has been serviced. This enables support
@@ -290,8 +350,7 @@ out_unlock:
* details in hardware, transparently.
*/
void fastcall
-handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc,
- struct pt_regs *regs)
+handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
{
unsigned int cpu = smp_processor_id();
struct irqaction *action;
@@ -319,9 +378,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc,
desc->status &= ~IRQ_PENDING;
spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, regs, action);
+ action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret, regs);
+ note_interrupt(irq, desc, action_ret);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -335,7 +394,6 @@ out:
* handle_edge_irq - edge type IRQ handler
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
- * @regs: pointer to a register structure
*
* Interrupt occures on the falling and/or rising edge of a hardware
* signal. The occurence is latched into the irq controller hardware
@@ -349,7 +407,7 @@ out:
* loop is left.
*/
void fastcall
-handle_edge_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+handle_edge_irq(unsigned int irq, struct irq_desc *desc)
{
const unsigned int cpu = smp_processor_id();
@@ -400,9 +458,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
desc->status &= ~IRQ_PENDING;
spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, regs, action);
+ action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret, regs);
+ note_interrupt(irq, desc, action_ret);
spin_lock(&desc->lock);
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
@@ -417,12 +475,11 @@ out_unlock:
* handle_percpu_IRQ - Per CPU local irq handler
* @irq: the interrupt number
* @desc: the interrupt description structure for this irq
- * @regs: pointer to a register structure
*
* Per CPU interrupts on SMP machines without locking requirements
*/
void fastcall
-handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
{
irqreturn_t action_ret;
@@ -431,9 +488,9 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
if (desc->chip->ack)
desc->chip->ack(irq);
- action_ret = handle_IRQ_event(irq, regs, desc->action);
+ action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret, regs);
+ note_interrupt(irq, desc, action_ret);
if (desc->chip->eoi)
desc->chip->eoi(irq);
@@ -442,10 +499,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
#endif /* CONFIG_SMP */
void
-__set_irq_handler(unsigned int irq,
- void fastcall (*handle)(unsigned int, irq_desc_t *,
- struct pt_regs *),
- int is_chained)
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
{
struct irq_desc *desc;
unsigned long flags;
@@ -486,6 +541,7 @@ __set_irq_handler(unsigned int irq,
desc->depth = 1;
}
desc->handle_irq = handle;
+ desc->name = name;
if (handle != handle_bad_irq && is_chained) {
desc->status &= ~IRQ_DISABLED;
@@ -498,36 +554,16 @@ __set_irq_handler(unsigned int irq,
void
set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
- void fastcall (*handle)(unsigned int,
- struct irq_desc *,
- struct pt_regs *))
+ irq_flow_handler_t handle)
{
set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0);
+ __set_irq_handler(irq, handle, 0, NULL);
}
-/*
- * Get a descriptive string for the highlevel handler, for
- * /proc/interrupts output:
- */
-const char *
-handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
- struct pt_regs *))
+void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle, const char *name)
{
- if (handle == handle_level_irq)
- return "level ";
- if (handle == handle_fasteoi_irq)
- return "fasteoi";
- if (handle == handle_edge_irq)
- return "edge ";
- if (handle == handle_simple_irq)
- return "simple ";
-#ifdef CONFIG_SMP
- if (handle == handle_percpu_irq)
- return "percpu ";
-#endif
- if (handle == handle_bad_irq)
- return "bad ";
-
- return NULL;
+ set_irq_chip(irq, chip);
+ __set_irq_handler(irq, handle, 0, name);
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 4c6cdbaed661..42aa6f1a3f0f 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -27,7 +27,7 @@
* Handles spurious and unhandled IRQ's. It also prints a debugmessage.
*/
void fastcall
-handle_bad_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+handle_bad_irq(unsigned int irq, struct irq_desc *desc)
{
print_irq_desc(irq, desc);
kstat_this_cpu.irqs[irq]++;
@@ -115,7 +115,7 @@ struct irq_chip dummy_irq_chip = {
/*
* Special, empty irq handler:
*/
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
+irqreturn_t no_action(int cpl, void *dev_id)
{
return IRQ_NONE;
}
@@ -123,13 +123,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
/**
* handle_IRQ_event - irq action chain handler
* @irq: the interrupt number
- * @regs: pointer to a register structure
* @action: the interrupt action chain for this irq
*
* Handles the action chain of an irq event
*/
-irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
- struct irqaction *action)
+irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
{
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
@@ -140,7 +138,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
local_irq_enable_in_hardirq();
do {
- ret = action->handler(irq, action->dev_id, regs);
+ ret = action->handler(irq, action->dev_id);
if (ret == IRQ_HANDLED)
status |= action->flags;
retval |= ret;
@@ -158,7 +156,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
/**
* __do_IRQ - original all in one highlevel IRQ handler
* @irq: the interrupt number
- * @regs: pointer to a register structure
*
* __do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
@@ -167,7 +164,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
* This is the original x86 implementation which is used for every
* interrupt type.
*/
-fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
+fastcall unsigned int __do_IRQ(unsigned int irq)
{
struct irq_desc *desc = irq_desc + irq;
struct irqaction *action;
@@ -182,7 +179,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
*/
if (desc->chip->ack)
desc->chip->ack(irq);
- action_ret = handle_IRQ_event(irq, regs, desc->action);
+ action_ret = handle_IRQ_event(irq, desc->action);
desc->chip->end(irq);
return 1;
}
@@ -233,11 +230,11 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, regs, action);
+ action_ret = handle_IRQ_event(irq, action);
spin_lock(&desc->lock);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret, regs);
+ note_interrupt(irq, desc, action_ret);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 92be519eff26..6879202afe9a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -427,8 +427,7 @@ EXPORT_SYMBOL(free_irq);
* IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
*
*/
-int request_irq(unsigned int irq,
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
+int request_irq(unsigned int irq, irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
{
struct irqaction *action;
@@ -475,4 +474,3 @@ int request_irq(unsigned int irq,
return retval;
}
EXPORT_SYMBOL(request_irq);
-
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index a57ebe9fa6f6..4baa3bbcd25a 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -7,17 +7,17 @@ void set_pending_irq(unsigned int irq, cpumask_t mask)
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
- desc->move_irq = 1;
+ desc->status |= IRQ_MOVE_PENDING;
irq_desc[irq].pending_mask = mask;
spin_unlock_irqrestore(&desc->lock, flags);
}
-void move_native_irq(int irq)
+void move_masked_irq(int irq)
{
struct irq_desc *desc = irq_desc + irq;
cpumask_t tmp;
- if (likely(!desc->move_irq))
+ if (likely(!(desc->status & IRQ_MOVE_PENDING)))
return;
/*
@@ -28,7 +28,7 @@ void move_native_irq(int irq)
return;
}
- desc->move_irq = 0;
+ desc->status &= ~IRQ_MOVE_PENDING;
if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
return;
@@ -48,15 +48,29 @@ void move_native_irq(int irq)
* when an active trigger is comming in. This could
* cause some ioapics to mal-function.
* Being paranoid i guess!
+ *
+ * For correct operation this depends on the caller
+ * masking the irqs.
*/
if (likely(!cpus_empty(tmp))) {
- if (likely(!(desc->status & IRQ_DISABLED)))
- desc->chip->disable(irq);
-
desc->chip->set_affinity(irq,tmp);
-
- if (likely(!(desc->status & IRQ_DISABLED)))
- desc->chip->enable(irq);
}
cpus_clear(irq_desc[irq].pending_mask);
}
+
+void move_native_irq(int irq)
+{
+ struct irq_desc *desc = irq_desc + irq;
+
+ if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ return;
+
+ if (likely(!(desc->status & IRQ_DISABLED)))
+ desc->chip->disable(irq);
+
+ move_masked_irq(irq);
+
+ if (likely(!(desc->status & IRQ_DISABLED)))
+ desc->chip->enable(irq);
+}
+
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 607c7809ad01..9a352667007c 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -57,7 +57,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
if (!irq_desc[irq].chip->set_affinity || no_irq_affinity)
return -EIO;
- err = cpumask_parse(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 35f10f7ff94a..5bfeaed7e487 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg)
clear_bit(irq, irqs_resend);
desc = irq_desc + irq;
local_irq_disable();
- desc->handle_irq(irq, desc, NULL);
+ desc->handle_irq(irq, desc);
local_irq_enable();
}
}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 417e98092cf2..543ea2e5ad93 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -16,7 +16,7 @@ static int irqfixup __read_mostly;
/*
* Recovery handler for misrouted interrupts.
*/
-static int misrouted_irq(int irq, struct pt_regs *regs)
+static int misrouted_irq(int irq)
{
int i;
int ok = 0;
@@ -49,7 +49,7 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
while (action) {
/* Only shared IRQ handlers are safe to call */
if (action->flags & IRQF_SHARED) {
- if (action->handler(i, action->dev_id, regs) ==
+ if (action->handler(i, action->dev_id) ==
IRQ_HANDLED)
ok = 1;
}
@@ -70,7 +70,7 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
*/
work = 1;
spin_unlock(&desc->lock);
- handle_IRQ_event(i, regs, action);
+ handle_IRQ_event(i, action);
spin_lock(&desc->lock);
desc->status &= ~IRQ_PENDING;
}
@@ -136,7 +136,7 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
}
void note_interrupt(unsigned int irq, struct irq_desc *desc,
- irqreturn_t action_ret, struct pt_regs *regs)
+ irqreturn_t action_ret)
{
if (unlikely(action_ret != IRQ_HANDLED)) {
desc->irqs_unhandled++;
@@ -147,7 +147,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
if (unlikely(irqfixup)) {
/* Don't punish working computers */
if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
- int ok = misrouted_irq(irq, regs);
+ int ok = misrouted_irq(irq);
if (action_ret == IRQ_NONE)
desc->irqs_unhandled -= ok;
}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 4c0553461000..b739be2a6dc9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -575,6 +575,8 @@ static noinline int print_circular_bug_tail(void)
return 0;
}
+#define RECURSION_LIMIT 40
+
static int noinline print_infinite_recursion_bug(void)
{
__raw_spin_unlock(&hash_lock);
@@ -595,7 +597,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
debug_atomic_inc(&nr_cyclic_check_recursions);
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
/*
* Check this lock's dependency list:
@@ -645,7 +647,7 @@ find_usage_forwards(struct lock_class *source, unsigned int depth)
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
debug_atomic_inc(&nr_find_usage_forwards_checks);
@@ -684,7 +686,7 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
debug_atomic_inc(&nr_find_usage_backwards_checks);
@@ -1114,8 +1116,6 @@ static int count_matching_names(struct lock_class *new_class)
return count + 1;
}
-extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
-
/*
* Register a lock's class in the hash-table, if the class is not present
* yet. Otherwise we look it up. We cache the result in the lock object
@@ -1153,8 +1153,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
* (or spin_lock_init()) call - which acts as the key. For static
* locks we use the lock object itself as the key.
*/
- if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
- __error_too_big_MAX_LOCKDEP_SUBCLASSES();
+ BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class));
key = lock->key->subkeys + subclass;
@@ -1177,7 +1176,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
* itself, so actual lookup of the hash should be once per lock object.
*/
static inline struct lock_class *
-register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
{
struct lockdep_subclass_key *key;
struct list_head *hash_head;
@@ -1249,7 +1248,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass)
out_unlock_set:
__raw_spin_unlock(&hash_lock);
- if (!subclass)
+ if (!subclass || force)
lock->class_cache = class;
DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
@@ -1937,7 +1936,7 @@ void trace_softirqs_off(unsigned long ip)
* Initialize a lock instance's lock-class mapping info:
*/
void lockdep_init_map(struct lockdep_map *lock, const char *name,
- struct lock_class_key *key)
+ struct lock_class_key *key, int subclass)
{
if (unlikely(!debug_locks))
return;
@@ -1957,6 +1956,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
lock->name = name;
lock->key = key;
lock->class_cache = NULL;
+ if (subclass)
+ register_lock_class(lock, subclass, 1);
}
EXPORT_SYMBOL_GPL(lockdep_init_map);
@@ -1995,7 +1996,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* Not cached yet or subclass?
*/
if (unlikely(!class)) {
- class = register_lock_class(lock, subclass);
+ class = register_lock_class(lock, subclass, 0);
if (!class)
return 0;
}
diff --git a/kernel/module.c b/kernel/module.c
index 7f60e782de1e..67009bd56c52 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -87,6 +87,12 @@ static inline int strong_try_module_get(struct module *mod)
return try_module_get(mod);
}
+static inline void add_taint_module(struct module *mod, unsigned flag)
+{
+ add_taint(flag);
+ mod->taints |= flag;
+}
+
/* A thread that wants to hold a reference to a module only while it
* is running can call ths to safely exit.
* nfsd and lockd use this.
@@ -847,12 +853,10 @@ static int check_version(Elf_Shdr *sechdrs,
return 0;
}
/* Not in module's version table. OK, but that taints the kernel. */
- if (!(tainted & TAINT_FORCED_MODULE)) {
+ if (!(tainted & TAINT_FORCED_MODULE))
printk("%s: no version for \"%s\" found: kernel tainted.\n",
mod->name, symname);
- add_taint(TAINT_FORCED_MODULE);
- mod->taints |= TAINT_FORCED_MODULE;
- }
+ add_taint_module(mod, TAINT_FORCED_MODULE);
return 1;
}
@@ -910,7 +914,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
unsigned long ret;
const unsigned long *crc;
- ret = __find_symbol(name, &owner, &crc, mod->license_gplok);
+ ret = __find_symbol(name, &owner, &crc,
+ !(mod->taints & TAINT_PROPRIETARY_MODULE));
if (ret) {
/* use_module can fail due to OOM, or module unloading */
if (!check_version(sechdrs, versindex, name, mod, crc) ||
@@ -1335,12 +1340,11 @@ static void set_license(struct module *mod, const char *license)
if (!license)
license = "unspecified";
- mod->license_gplok = license_is_gpl_compatible(license);
- if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) {
- printk(KERN_WARNING "%s: module license '%s' taints kernel.\n",
- mod->name, license);
- add_taint(TAINT_PROPRIETARY_MODULE);
- mod->taints |= TAINT_PROPRIETARY_MODULE;
+ if (!license_is_gpl_compatible(license)) {
+ if (!(tainted & TAINT_PROPRIETARY_MODULE))
+ printk(KERN_WARNING "%s: module license '%s' taints"
+ "kernel.\n", mod->name, license);
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
}
}
@@ -1619,8 +1623,7 @@ static struct module *load_module(void __user *umod,
modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
/* This is allowed: modprobe --force will invalidate it. */
if (!modmagic) {
- add_taint(TAINT_FORCED_MODULE);
- mod->taints |= TAINT_FORCED_MODULE;
+ add_taint_module(mod, TAINT_FORCED_MODULE);
printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
mod->name);
} else if (!same_magic(modmagic, vermagic)) {
@@ -1714,14 +1717,10 @@ static struct module *load_module(void __user *umod,
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
- if (strcmp(mod->name, "ndiswrapper") == 0) {
- add_taint(TAINT_PROPRIETARY_MODULE);
- mod->taints |= TAINT_PROPRIETARY_MODULE;
- }
- if (strcmp(mod->name, "driverloader") == 0) {
- add_taint(TAINT_PROPRIETARY_MODULE);
- mod->taints |= TAINT_PROPRIETARY_MODULE;
- }
+ if (strcmp(mod->name, "ndiswrapper") == 0)
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ if (strcmp(mod->name, "driverloader") == 0)
+ add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, sechdrs, infoindex);
@@ -1766,8 +1765,7 @@ static struct module *load_module(void __user *umod,
(mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
printk(KERN_WARNING "%s: No versions for exported symbols."
" Tainting kernel.\n", mod->name);
- add_taint(TAINT_FORCED_MODULE);
- mod->taints |= TAINT_FORCED_MODULE;
+ add_taint_module(mod, TAINT_FORCED_MODULE);
}
#endif
@@ -2132,9 +2130,33 @@ static void m_stop(struct seq_file *m, void *p)
mutex_unlock(&module_mutex);
}
+static char *taint_flags(unsigned int taints, char *buf)
+{
+ int bx = 0;
+
+ if (taints) {
+ buf[bx++] = '(';
+ if (taints & TAINT_PROPRIETARY_MODULE)
+ buf[bx++] = 'P';
+ if (taints & TAINT_FORCED_MODULE)
+ buf[bx++] = 'F';
+ /*
+ * TAINT_FORCED_RMMOD: could be added.
+ * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+ * apply to modules.
+ */
+ buf[bx++] = ')';
+ }
+ buf[bx] = '\0';
+
+ return buf;
+}
+
static int m_show(struct seq_file *m, void *p)
{
struct module *mod = list_entry(p, struct module, list);
+ char buf[8];
+
seq_printf(m, "%s %lu",
mod->name, mod->init_size + mod->core_size);
print_unload_info(m, mod);
@@ -2147,6 +2169,10 @@ static int m_show(struct seq_file *m, void *p)
/* Used by oprofile and other similar tools. */
seq_printf(m, " 0x%p", mod->module_core);
+ /* Taints info */
+ if (mod->taints)
+ seq_printf(m, " %s", taint_flags(mod->taints, buf));
+
seq_printf(m, "\n");
return 0;
}
@@ -2235,28 +2261,6 @@ struct module *module_text_address(unsigned long addr)
return mod;
}
-static char *taint_flags(unsigned int taints, char *buf)
-{
- *buf = '\0';
- if (taints) {
- int bx;
-
- buf[0] = '(';
- bx = 1;
- if (taints & TAINT_PROPRIETARY_MODULE)
- buf[bx++] = 'P';
- if (taints & TAINT_FORCED_MODULE)
- buf[bx++] = 'F';
- /*
- * TAINT_FORCED_RMMOD: could be added.
- * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
- * apply to modules.
- */
- buf[bx] = ')';
- }
- return buf;
-}
-
/* Don't grab lock, we're oopsing. */
void print_modules(void)
{
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e3203c654dda..18651641a7b5 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -91,7 +91,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
- lockdep_init_map(&lock->dep_map, name, key);
+ lockdep_init_map(&lock->dep_map, name, key, 0);
#endif
lock->owner = NULL;
lock->magic = lock;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 6ebdb82a0ce4..674aceb7335a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -44,11 +44,9 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
{
struct nsproxy *ns;
- ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
- if (ns) {
- memcpy(ns, orig, sizeof(struct nsproxy));
+ ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
+ if (ns)
atomic_set(&ns->count, 1);
- }
return ns;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 479b16b44f79..7c3e1e6dfb5b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -88,6 +88,19 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
}
/*
+ * Divide and limit the result to res >= 1
+ *
+ * This is necessary to prevent signal delivery starvation, when the result of
+ * the division would be rounded down to 0.
+ */
+static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
+{
+ cputime_t res = cputime_div(time, div);
+
+ return max_t(cputime_t, res, 1);
+}
+
+/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
@@ -483,8 +496,8 @@ static void process_timer_rebalance(struct task_struct *p,
BUG();
break;
case CPUCLOCK_PROF:
- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
- nthreads);
+ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
+ nthreads);
do {
if (likely(!(t->flags & PF_EXITING))) {
ticks = cputime_add(prof_ticks(t), left);
@@ -498,8 +511,8 @@ static void process_timer_rebalance(struct task_struct *p,
} while (t != p);
break;
case CPUCLOCK_VIRT:
- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
- nthreads);
+ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
+ nthreads);
do {
if (likely(!(t->flags & PF_EXITING))) {
ticks = cputime_add(virt_ticks(t), left);
@@ -515,6 +528,7 @@ static void process_timer_rebalance(struct task_struct *p,
case CPUCLOCK_SCHED:
nsleft = expires.sched - val.sched;
do_div(nsleft, nthreads);
+ nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
ns = t->sched_time + nsleft;
@@ -1159,12 +1173,13 @@ static void check_process_timers(struct task_struct *tsk,
prof_left = cputime_sub(prof_expires, utime);
prof_left = cputime_sub(prof_left, stime);
- prof_left = cputime_div(prof_left, nthreads);
+ prof_left = cputime_div_non_zero(prof_left, nthreads);
virt_left = cputime_sub(virt_expires, utime);
- virt_left = cputime_div(virt_left, nthreads);
+ virt_left = cputime_div_non_zero(virt_left, nthreads);
if (sched_expires) {
sched_left = sched_expires - sched_time;
do_div(sched_left, nthreads);
+ sched_left = max_t(unsigned long long, sched_left, 1);
} else {
sched_left = 0;
}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d72234942798..d3a158a60312 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pm.h>
+#include <linux/console.h>
#include <linux/cpu.h>
#include "power.h"
@@ -119,8 +120,10 @@ int pm_suspend_disk(void)
if (error)
return error;
+ suspend_console();
error = device_suspend(PMSG_FREEZE);
if (error) {
+ resume_console();
printk("Some devices failed to suspend\n");
unprepare_processes();
return error;
@@ -133,6 +136,7 @@ int pm_suspend_disk(void)
if (in_suspend) {
device_resume();
+ resume_console();
pr_debug("PM: writing image.\n");
error = swsusp_write();
if (!error)
@@ -148,6 +152,7 @@ int pm_suspend_disk(void)
swsusp_free();
Done:
device_resume();
+ resume_console();
unprepare_processes();
return error;
}
@@ -212,7 +217,9 @@ static int software_resume(void)
pr_debug("PM: Preparing devices for restore.\n");
+ suspend_console();
if ((error = device_suspend(PMSG_PRETHAW))) {
+ resume_console();
printk("Some devices failed to suspend\n");
swsusp_free();
goto Thaw;
@@ -224,6 +231,7 @@ static int software_resume(void)
swsusp_resume();
pr_debug("PM: Restore failed, recovering.n");
device_resume();
+ resume_console();
Thaw:
unprepare_processes();
Done:
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 7a4144ba3afd..f1f900ac3164 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -23,8 +23,7 @@ static void do_poweroff(void *dummy)
static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
-static void handle_poweroff(int key, struct pt_regs *pt_regs,
- struct tty_struct *tty)
+static void handle_poweroff(int key, struct tty_struct *tty)
{
schedule_work(&poweroff_work);
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 9b2ee5344dee..1a3b0dd2c3fc 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -425,7 +425,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
bio_set_pages_dirty(bio);
bio_put(bio);
} else {
- get_page(page);
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
bio->bi_private = *bio_chain;
*bio_chain = bio;
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 72825c853cd7..d991d3b0e5a4 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -19,6 +19,7 @@
#include <linux/swapops.h>
#include <linux/pm.h>
#include <linux/fs.h>
+#include <linux/console.h>
#include <linux/cpu.h>
#include <asm/uaccess.h>
@@ -145,10 +146,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = freeze_processes();
if (error) {
thaw_processes();
+ enable_nonboot_cpus();
error = -EBUSY;
}
}
- enable_nonboot_cpus();
up(&pm_sem);
if (!error)
data->frozen = 1;
@@ -173,12 +174,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (!error) {
+ suspend_console();
error = device_suspend(PMSG_FREEZE);
if (!error) {
in_suspend = 1;
error = swsusp_suspend();
device_resume();
}
+ resume_console();
}
up(&pm_sem);
if (!error)
@@ -196,11 +199,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
snapshot_free_unused_memory(&data->handle);
down(&pm_sem);
pm_prepare_console();
+ suspend_console();
error = device_suspend(PMSG_PRETHAW);
if (!error) {
error = swsusp_resume();
device_resume();
}
+ resume_console();
pm_restore_console();
up(&pm_sem);
break;
@@ -289,6 +294,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
}
/* Put devices to sleep */
+ suspend_console();
error = device_suspend(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "Failed to suspend some devices.\n");
@@ -299,7 +305,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
/* Wake up devices */
device_resume();
}
-
+ resume_console();
if (pm_ops->finish)
pm_ops->finish(PM_SUSPEND_MEM);
diff --git a/kernel/printk.c b/kernel/printk.c
index 771f5e861bcd..f7d427ef5038 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -820,15 +820,8 @@ void release_console_sem(void)
console_locked = 0;
up(&console_sem);
spin_unlock_irqrestore(&logbuf_lock, flags);
- if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
- /*
- * If we printk from within the lock dependency code,
- * from within the scheduler code, then do not lock
- * up due to self-recursion:
- */
- if (!lockdep_internal())
- wake_up_interruptible(&log_wait);
- }
+ if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+ wake_up_interruptible(&log_wait);
}
EXPORT_SYMBOL(release_console_sem);
diff --git a/kernel/profile.c b/kernel/profile.c
index fb660c7d35ba..f940b462eec9 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -25,6 +25,7 @@
#include <linux/mutex.h>
#include <asm/sections.h>
#include <asm/semaphore.h>
+#include <asm/irq_regs.h>
struct profile_hit {
u32 pc, hits;
@@ -366,8 +367,10 @@ void profile_hit(int type, void *__pc)
}
#endif /* !CONFIG_SMP */
-void profile_tick(int type, struct pt_regs *regs)
+void profile_tick(int type)
{
+ struct pt_regs *regs = get_irq_regs();
+
if (type == CPU_PROFILING && timer_hook)
timer_hook(regs);
if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
@@ -396,7 +399,7 @@ static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffe
unsigned long full_count = count, err;
cpumask_t new_value;
- err = cpumask_parse(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 523e46483b99..26bb5ffe1ef1 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -71,9 +71,6 @@ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
static int blimit = 10;
static int qhimark = 10000;
static int qlowmark = 100;
-#ifdef CONFIG_SMP
-static int rsinterval = 1000;
-#endif
static atomic_t rcu_barrier_cpu_count;
static DEFINE_MUTEX(rcu_barrier_mutex);
@@ -86,8 +83,8 @@ static void force_quiescent_state(struct rcu_data *rdp,
int cpu;
cpumask_t cpumask;
set_need_resched();
- if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
- rdp->last_rs_qlen = rdp->qlen;
+ if (unlikely(!rcp->signaled)) {
+ rcp->signaled = 1;
/*
* Don't send IPI to itself. With irqs disabled,
* rdp->cpu is the current cpu.
@@ -301,6 +298,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
smp_mb();
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+ rcp->signaled = 0;
}
}
@@ -628,9 +626,6 @@ void synchronize_rcu(void)
module_param(blimit, int, 0);
module_param(qhimark, int, 0);
module_param(qlowmark, int, 0);
-#ifdef CONFIG_SMP
-module_param(rsinterval, int, 0);
-#endif
EXPORT_SYMBOL_GPL(rcu_batches_completed);
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
EXPORT_SYMBOL_GPL(call_rcu);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 23446e91cded..e2bda18f6f42 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -15,9 +15,10 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
- * Copyright (C) IBM Corporation, 2005
+ * Copyright (C) IBM Corporation, 2005, 2006
*
* Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ * Josh Triplett <josh@freedesktop.org>
*
* See also: Documentation/RCU/torture.txt
*/
@@ -44,19 +45,25 @@
#include <linux/delay.h>
#include <linux/byteorder/swabb.h>
#include <linux/stat.h>
+#include <linux/srcu.h>
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
+ "Josh Triplett <josh@freedesktop.org>");
static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
+static int nfakewriters = 4; /* # fake writer threads */
static int stat_interval; /* Interval between stats, in seconds. */
/* Defaults to "only at end of test". */
static int verbose; /* Print more debug info. */
static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
-static char *torture_type = "rcu"; /* What to torture. */
+static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0);
MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
+module_param(nfakewriters, int, 0);
+MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
module_param(stat_interval, int, 0);
MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
module_param(verbose, bool, 0);
@@ -66,7 +73,7 @@ MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
module_param(shuffle_interval, int, 0);
MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
module_param(torture_type, charp, 0);
-MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh)");
+MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
#define TORTURE_FLAG "-torture:"
#define PRINTK_STRING(s) \
@@ -80,6 +87,7 @@ static char printk_buf[4096];
static int nrealreaders;
static struct task_struct *writer_task;
+static struct task_struct **fakewriter_tasks;
static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
@@ -104,11 +112,12 @@ static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) =
{ 0 };
static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
-atomic_t n_rcu_torture_alloc;
-atomic_t n_rcu_torture_alloc_fail;
-atomic_t n_rcu_torture_free;
-atomic_t n_rcu_torture_mberror;
-atomic_t n_rcu_torture_error;
+static atomic_t n_rcu_torture_alloc;
+static atomic_t n_rcu_torture_alloc_fail;
+static atomic_t n_rcu_torture_free;
+static atomic_t n_rcu_torture_mberror;
+static atomic_t n_rcu_torture_error;
+static struct list_head rcu_torture_removed;
/*
* Allocate an element from the rcu_tortures pool.
@@ -145,7 +154,7 @@ rcu_torture_free(struct rcu_torture *p)
struct rcu_random_state {
unsigned long rrs_state;
- unsigned long rrs_count;
+ long rrs_count;
};
#define RCU_RANDOM_MULT 39916801 /* prime */
@@ -158,7 +167,7 @@ struct rcu_random_state {
* Crude but fast random-number generator. Uses a linear congruential
* generator, with occasional help from get_random_bytes().
*/
-static long
+static unsigned long
rcu_random(struct rcu_random_state *rrsp)
{
long refresh;
@@ -180,9 +189,11 @@ struct rcu_torture_ops {
void (*init)(void);
void (*cleanup)(void);
int (*readlock)(void);
+ void (*readdelay)(struct rcu_random_state *rrsp);
void (*readunlock)(int idx);
int (*completed)(void);
void (*deferredfree)(struct rcu_torture *p);
+ void (*sync)(void);
int (*stats)(char *page);
char *name;
};
@@ -198,6 +209,18 @@ static int rcu_torture_read_lock(void) __acquires(RCU)
return 0;
}
+static void rcu_read_delay(struct rcu_random_state *rrsp)
+{
+ long delay;
+ const long longdelay = 200;
+
+ /* We want there to be long-running readers, but not all the time. */
+
+ delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay);
+ if (!delay)
+ udelay(longdelay);
+}
+
static void rcu_torture_read_unlock(int idx) __releases(RCU)
{
rcu_read_unlock();
@@ -239,13 +262,54 @@ static struct rcu_torture_ops rcu_ops = {
.init = NULL,
.cleanup = NULL,
.readlock = rcu_torture_read_lock,
+ .readdelay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
.completed = rcu_torture_completed,
.deferredfree = rcu_torture_deferred_free,
+ .sync = synchronize_rcu,
.stats = NULL,
.name = "rcu"
};
+static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
+{
+ int i;
+ struct rcu_torture *rp;
+ struct rcu_torture *rp1;
+
+ cur_ops->sync();
+ list_add(&p->rtort_free, &rcu_torture_removed);
+ list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) {
+ i = rp->rtort_pipe_count;
+ if (i > RCU_TORTURE_PIPE_LEN)
+ i = RCU_TORTURE_PIPE_LEN;
+ atomic_inc(&rcu_torture_wcount[i]);
+ if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
+ rp->rtort_mbtest = 0;
+ list_del(&rp->rtort_free);
+ rcu_torture_free(rp);
+ }
+ }
+}
+
+static void rcu_sync_torture_init(void)
+{
+ INIT_LIST_HEAD(&rcu_torture_removed);
+}
+
+static struct rcu_torture_ops rcu_sync_ops = {
+ .init = rcu_sync_torture_init,
+ .cleanup = NULL,
+ .readlock = rcu_torture_read_lock,
+ .readdelay = rcu_read_delay,
+ .readunlock = rcu_torture_read_unlock,
+ .completed = rcu_torture_completed,
+ .deferredfree = rcu_sync_torture_deferred_free,
+ .sync = synchronize_rcu,
+ .stats = NULL,
+ .name = "rcu_sync"
+};
+
/*
* Definitions for rcu_bh torture testing.
*/
@@ -271,19 +335,176 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
}
+struct rcu_bh_torture_synchronize {
+ struct rcu_head head;
+ struct completion completion;
+};
+
+static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head)
+{
+ struct rcu_bh_torture_synchronize *rcu;
+
+ rcu = container_of(head, struct rcu_bh_torture_synchronize, head);
+ complete(&rcu->completion);
+}
+
+static void rcu_bh_torture_synchronize(void)
+{
+ struct rcu_bh_torture_synchronize rcu;
+
+ init_completion(&rcu.completion);
+ call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
+ wait_for_completion(&rcu.completion);
+}
+
static struct rcu_torture_ops rcu_bh_ops = {
.init = NULL,
.cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
+ .readdelay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
.completed = rcu_bh_torture_completed,
.deferredfree = rcu_bh_torture_deferred_free,
+ .sync = rcu_bh_torture_synchronize,
.stats = NULL,
.name = "rcu_bh"
};
+static struct rcu_torture_ops rcu_bh_sync_ops = {
+ .init = rcu_sync_torture_init,
+ .cleanup = NULL,
+ .readlock = rcu_bh_torture_read_lock,
+ .readdelay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = rcu_bh_torture_read_unlock,
+ .completed = rcu_bh_torture_completed,
+ .deferredfree = rcu_sync_torture_deferred_free,
+ .sync = rcu_bh_torture_synchronize,
+ .stats = NULL,
+ .name = "rcu_bh_sync"
+};
+
+/*
+ * Definitions for srcu torture testing.
+ */
+
+static struct srcu_struct srcu_ctl;
+
+static void srcu_torture_init(void)
+{
+ init_srcu_struct(&srcu_ctl);
+ rcu_sync_torture_init();
+}
+
+static void srcu_torture_cleanup(void)
+{
+ synchronize_srcu(&srcu_ctl);
+ cleanup_srcu_struct(&srcu_ctl);
+}
+
+static int srcu_torture_read_lock(void)
+{
+ return srcu_read_lock(&srcu_ctl);
+}
+
+static void srcu_read_delay(struct rcu_random_state *rrsp)
+{
+ long delay;
+ const long uspertick = 1000000 / HZ;
+ const long longdelay = 10;
+
+ /* We want there to be long-running readers, but not all the time. */
+
+ delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
+ if (!delay)
+ schedule_timeout_interruptible(longdelay);
+}
+
+static void srcu_torture_read_unlock(int idx)
+{
+ srcu_read_unlock(&srcu_ctl, idx);
+}
+
+static int srcu_torture_completed(void)
+{
+ return srcu_batches_completed(&srcu_ctl);
+}
+
+static void srcu_torture_synchronize(void)
+{
+ synchronize_srcu(&srcu_ctl);
+}
+
+static int srcu_torture_stats(char *page)
+{
+ int cnt = 0;
+ int cpu;
+ int idx = srcu_ctl.completed & 0x1;
+
+ cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
+ torture_type, TORTURE_FLAG, idx);
+ for_each_possible_cpu(cpu) {
+ cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu,
+ per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
+ per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
+ }
+ cnt += sprintf(&page[cnt], "\n");
+ return cnt;
+}
+
+static struct rcu_torture_ops srcu_ops = {
+ .init = srcu_torture_init,
+ .cleanup = srcu_torture_cleanup,
+ .readlock = srcu_torture_read_lock,
+ .readdelay = srcu_read_delay,
+ .readunlock = srcu_torture_read_unlock,
+ .completed = srcu_torture_completed,
+ .deferredfree = rcu_sync_torture_deferred_free,
+ .sync = srcu_torture_synchronize,
+ .stats = srcu_torture_stats,
+ .name = "srcu"
+};
+
+/*
+ * Definitions for sched torture testing.
+ */
+
+static int sched_torture_read_lock(void)
+{
+ preempt_disable();
+ return 0;
+}
+
+static void sched_torture_read_unlock(int idx)
+{
+ preempt_enable();
+}
+
+static int sched_torture_completed(void)
+{
+ return 0;
+}
+
+static void sched_torture_synchronize(void)
+{
+ synchronize_sched();
+}
+
+static struct rcu_torture_ops sched_ops = {
+ .init = rcu_sync_torture_init,
+ .cleanup = NULL,
+ .readlock = sched_torture_read_lock,
+ .readdelay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = sched_torture_read_unlock,
+ .completed = sched_torture_completed,
+ .deferredfree = rcu_sync_torture_deferred_free,
+ .sync = sched_torture_synchronize,
+ .stats = NULL,
+ .name = "sched"
+};
+
static struct rcu_torture_ops *torture_ops[] =
- { &rcu_ops, &rcu_bh_ops, NULL };
+ { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &srcu_ops,
+ &sched_ops, NULL };
/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
@@ -330,6 +551,30 @@ rcu_torture_writer(void *arg)
}
/*
+ * RCU torture fake writer kthread. Repeatedly calls sync, with a random
+ * delay between calls.
+ */
+static int
+rcu_torture_fakewriter(void *arg)
+{
+ DEFINE_RCU_RANDOM(rand);
+
+ VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
+ set_user_nice(current, 19);
+
+ do {
+ schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
+ udelay(rcu_random(&rand) & 0x3ff);
+ cur_ops->sync();
+ } while (!kthread_should_stop() && !fullstop);
+
+ VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
+ while (!kthread_should_stop())
+ schedule_timeout_uninterruptible(1);
+ return 0;
+}
+
+/*
* RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
* counter in the element should never be greater than 1, otherwise, the
@@ -359,7 +604,7 @@ rcu_torture_reader(void *arg)
}
if (p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
- udelay(rcu_random(&rand) & 0x7f);
+ cur_ops->readdelay(&rand);
preempt_disable();
pipe_count = p->rtort_pipe_count;
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
@@ -483,7 +728,7 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */
/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
* is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
*/
-void rcu_torture_shuffle_tasks(void)
+static void rcu_torture_shuffle_tasks(void)
{
cpumask_t tmp_mask = CPU_MASK_ALL;
int i;
@@ -507,6 +752,12 @@ void rcu_torture_shuffle_tasks(void)
set_cpus_allowed(reader_tasks[i], tmp_mask);
}
+ if (fakewriter_tasks != NULL) {
+ for (i = 0; i < nfakewriters; i++)
+ if (fakewriter_tasks[i])
+ set_cpus_allowed(fakewriter_tasks[i], tmp_mask);
+ }
+
if (writer_task)
set_cpus_allowed(writer_task, tmp_mask);
@@ -540,11 +791,12 @@ rcu_torture_shuffle(void *arg)
static inline void
rcu_torture_print_module_parms(char *tag)
{
- printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d "
+ printk(KERN_ALERT "%s" TORTURE_FLAG
+ "--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
"shuffle_interval = %d\n",
- torture_type, tag, nrealreaders, stat_interval, verbose,
- test_no_idle_hz, shuffle_interval);
+ torture_type, tag, nrealreaders, nfakewriters,
+ stat_interval, verbose, test_no_idle_hz, shuffle_interval);
}
static void
@@ -579,6 +831,19 @@ rcu_torture_cleanup(void)
}
rcu_torture_current = NULL;
+ if (fakewriter_tasks != NULL) {
+ for (i = 0; i < nfakewriters; i++) {
+ if (fakewriter_tasks[i] != NULL) {
+ VERBOSE_PRINTK_STRING(
+ "Stopping rcu_torture_fakewriter task");
+ kthread_stop(fakewriter_tasks[i]);
+ }
+ fakewriter_tasks[i] = NULL;
+ }
+ kfree(fakewriter_tasks);
+ fakewriter_tasks = NULL;
+ }
+
if (stats_task != NULL) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
kthread_stop(stats_task);
@@ -666,7 +931,25 @@ rcu_torture_init(void)
writer_task = NULL;
goto unwind;
}
- reader_tasks = kmalloc(nrealreaders * sizeof(reader_tasks[0]),
+ fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
+ GFP_KERNEL);
+ if (fakewriter_tasks == NULL) {
+ VERBOSE_PRINTK_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+ for (i = 0; i < nfakewriters; i++) {
+ VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
+ fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
+ "rcu_torture_fakewriter");
+ if (IS_ERR(fakewriter_tasks[i])) {
+ firsterr = PTR_ERR(fakewriter_tasks[i]);
+ VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
+ fakewriter_tasks[i] = NULL;
+ goto unwind;
+ }
+ }
+ reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_PRINTK_ERRSTRING("out of memory");
diff --git a/kernel/relay.c b/kernel/relay.c
index 1d63ecddfa70..f04bbdb56ac2 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -887,7 +887,7 @@ static int subbuf_read_actor(size_t read_start,
from = buf->start + read_start;
ret = avail;
- if (copy_to_user(desc->arg.data, from, avail)) {
+ if (copy_to_user(desc->arg.buf, from, avail)) {
desc->error = -EFAULT;
ret = 0;
}
@@ -946,24 +946,17 @@ typedef int (*subbuf_actor_t) (size_t read_start,
*/
static inline ssize_t relay_file_read_subbufs(struct file *filp,
loff_t *ppos,
- size_t count,
subbuf_actor_t subbuf_actor,
read_actor_t actor,
- void *target)
+ read_descriptor_t *desc)
{
struct rchan_buf *buf = filp->private_data;
size_t read_start, avail;
- read_descriptor_t desc;
int ret;
- if (!count)
+ if (!desc->count)
return 0;
- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
-
mutex_lock(&filp->f_dentry->d_inode->i_mutex);
do {
if (!relay_file_read_avail(buf, *ppos))
@@ -974,19 +967,19 @@ static inline ssize_t relay_file_read_subbufs(struct file *filp,
if (!avail)
break;
- avail = min(desc.count, avail);
- ret = subbuf_actor(read_start, buf, avail, &desc, actor);
- if (desc.error < 0)
+ avail = min(desc->count, avail);
+ ret = subbuf_actor(read_start, buf, avail, desc, actor);
+ if (desc->error < 0)
break;
if (ret) {
relay_file_read_consume(buf, read_start, ret);
*ppos = relay_file_read_end_pos(buf, read_start, ret);
}
- } while (desc.count && ret);
+ } while (desc->count && ret);
mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
- return desc.written;
+ return desc->written;
}
static ssize_t relay_file_read(struct file *filp,
@@ -994,8 +987,13 @@ static ssize_t relay_file_read(struct file *filp,
size_t count,
loff_t *ppos)
{
- return relay_file_read_subbufs(filp, ppos, count, subbuf_read_actor,
- NULL, buffer);
+ read_descriptor_t desc;
+ desc.written = 0;
+ desc.count = count;
+ desc.arg.buf = buffer;
+ desc.error = 0;
+ return relay_file_read_subbufs(filp, ppos, subbuf_read_actor,
+ NULL, &desc);
}
static ssize_t relay_file_sendfile(struct file *filp,
@@ -1004,8 +1002,13 @@ static ssize_t relay_file_sendfile(struct file *filp,
read_actor_t actor,
void *target)
{
- return relay_file_read_subbufs(filp, ppos, count, subbuf_send_actor,
- actor, target);
+ read_descriptor_t desc;
+ desc.written = 0;
+ desc.count = count;
+ desc.arg.data = target;
+ desc.error = 0;
+ return relay_file_read_subbufs(filp, ppos, subbuf_send_actor,
+ actor, &desc);
}
struct file_operations relay_file_operations = {
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 0c1faa950af7..da8d6bf46457 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -16,7 +16,6 @@
*
* See rt.c in preempt-rt for proper credits and further information
*/
-#include <linux/config.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/module.h>
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 948bd8f643e2..6dcea9dd8c94 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -6,7 +6,6 @@
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
*/
-#include <linux/config.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index 53608a59d6e3..3399701c680e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -160,15 +160,6 @@
#define TASK_PREEMPTS_CURR(p, rq) \
((p)->prio < (rq)->curr->prio)
-/*
- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
- * to time slice values: [800ms ... 100ms ... 5ms]
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
- */
-
#define SCALE_PRIO(x, prio) \
max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
@@ -180,6 +171,15 @@ static unsigned int static_prio_timeslice(int static_prio)
return SCALE_PRIO(DEF_TIMESLICE, static_prio);
}
+/*
+ * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
+ * to time slice values: [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+
static inline unsigned int task_timeslice(struct task_struct *p)
{
return static_prio_timeslice(p->static_prio);
@@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
- if (unlikely(!mm)) {
+ if (!mm) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (unlikely(!prev->mm)) {
+ if (!prev->mm) {
prev->active_mm = NULL;
WARN_ON(rq->prev_mm);
rq->prev_mm = oldmm;
@@ -3491,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void)
* If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return..
*/
- if (unlikely(ti->preempt_count || irqs_disabled()))
+ if (likely(ti->preempt_count || irqs_disabled()))
return;
need_resched:
diff --git a/kernel/srcu.c b/kernel/srcu.c
new file mode 100644
index 000000000000..3507cabe963b
--- /dev/null
+++ b/kernel/srcu.c
@@ -0,0 +1,258 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * Documentation/RCU/ *.txt
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/srcu.h>
+
+/**
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function. Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+ sp->completed = 0;
+ mutex_init(&sp->mutex);
+ sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+ return (sp->per_cpu_ref ? 0 : -ENOMEM);
+}
+
+/*
+ * srcu_readers_active_idx -- returns approximate number of readers
+ * active on the specified rank of per-CPU counters.
+ */
+
+static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
+{
+ int cpu;
+ int sum;
+
+ sum = 0;
+ for_each_possible_cpu(cpu)
+ sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
+ return sum;
+}
+
+/**
+ * srcu_readers_active - returns approximate number of readers.
+ * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ *
+ * Note that this is not an atomic primitive, and can therefore suffer
+ * severe errors when invoked on an active srcu_struct. That said, it
+ * can be useful as an error check at cleanup time.
+ */
+int srcu_readers_active(struct srcu_struct *sp)
+{
+ return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1);
+}
+
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+ int sum;
+
+ sum = srcu_readers_active(sp);
+ WARN_ON(sum); /* Leakage unless caller handles error. */
+ if (sum != 0)
+ return;
+ free_percpu(sp->per_cpu_ref);
+ sp->per_cpu_ref = NULL;
+}
+
+/**
+ * srcu_read_lock - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Must be called from process context.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int srcu_read_lock(struct srcu_struct *sp)
+{
+ int idx;
+
+ preempt_disable();
+ idx = sp->completed & 0x1;
+ barrier(); /* ensure compiler looks -once- at sp->completed. */
+ per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++;
+ srcu_barrier(); /* ensure compiler won't misorder critical section. */
+ preempt_enable();
+ return idx;
+}
+
+/**
+ * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct. Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ * Must be called from process context.
+ */
+void srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+ preempt_disable();
+ srcu_barrier(); /* ensure compiler won't misorder critical section. */
+ per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
+ preempt_enable();
+}
+
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates. Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchornize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+ int idx;
+
+ idx = sp->completed;
+ mutex_lock(&sp->mutex);
+
+ /*
+ * Check to see if someone else did the work for us while we were
+ * waiting to acquire the lock. We need -two- advances of
+ * the counter, not just one. If there was but one, we might have
+ * shown up -after- our helper's first synchronize_sched(), thus
+ * having failed to prevent CPU-reordering races with concurrent
+ * srcu_read_unlock()s on other CPUs (see comment below). So we
+ * either (1) wait for two or (2) supply the second ourselves.
+ */
+
+ if ((sp->completed - idx) >= 2) {
+ mutex_unlock(&sp->mutex);
+ return;
+ }
+
+ synchronize_sched(); /* Force memory barrier on all CPUs. */
+
+ /*
+ * The preceding synchronize_sched() ensures that any CPU that
+ * sees the new value of sp->completed will also see any preceding
+ * changes to data structures made by this CPU. This prevents
+ * some other CPU from reordering the accesses in its SRCU
+ * read-side critical section to precede the corresponding
+ * srcu_read_lock() -- ensuring that such references will in
+ * fact be protected.
+ *
+ * So it is now safe to do the flip.
+ */
+
+ idx = sp->completed & 0x1;
+ sp->completed++;
+
+ synchronize_sched(); /* Force memory barrier on all CPUs. */
+
+ /*
+ * At this point, because of the preceding synchronize_sched(),
+ * all srcu_read_lock() calls using the old counters have completed.
+ * Their corresponding critical sections might well be still
+ * executing, but the srcu_read_lock() primitives themselves
+ * will have finished executing.
+ */
+
+ while (srcu_readers_active_idx(sp, idx))
+ schedule_timeout_interruptible(1);
+
+ synchronize_sched(); /* Force memory barrier on all CPUs. */
+
+ /*
+ * The preceding synchronize_sched() forces all srcu_read_unlock()
+ * primitives that were executing concurrently with the preceding
+ * for_each_possible_cpu() loop to have completed by this point.
+ * More importantly, it also forces the corresponding SRCU read-side
+ * critical sections to have also completed, and the corresponding
+ * references to SRCU-protected data items to be dropped.
+ *
+ * Note:
+ *
+ * Despite what you might think at first glance, the
+ * preceding synchronize_sched() -must- be within the
+ * critical section ended by the following mutex_unlock().
+ * Otherwise, a task taking the early exit can race
+ * with a srcu_read_unlock(), which might have executed
+ * just before the preceding srcu_readers_active() check,
+ * and whose CPU might have reordered the srcu_read_unlock()
+ * with the preceding critical section. In this case, there
+ * is nothing preventing the synchronize_sched() task that is
+ * taking the early exit from freeing a data structure that
+ * is still being referenced (out of order) by the task
+ * doing the srcu_read_unlock().
+ *
+ * Alternatively, the comparison with "2" on the early exit
+ * could be changed to "3", but this increases synchronize_srcu()
+ * latency for bulk loads. So the current code is preferred.
+ */
+
+ mutex_unlock(&sp->mutex);
+}
+
+/**
+ * srcu_batches_completed - return batches completed.
+ * @sp: srcu_struct on which to report batch completion.
+ *
+ * Report the number of batches, correlated with, but not necessarily
+ * precisely the same as, the number of grace periods that have elapsed.
+ */
+
+long srcu_batches_completed(struct srcu_struct *sp)
+{
+ return sp->completed;
+}
+
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(srcu_read_lock);
+EXPORT_SYMBOL_GPL(srcu_read_unlock);
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+EXPORT_SYMBOL_GPL(srcu_batches_completed);
+EXPORT_SYMBOL_GPL(srcu_readers_active);
diff --git a/kernel/sys.c b/kernel/sys.c
index 2314867ae34f..98489d82801b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -153,7 +153,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
/*
* Atomic notifier chain routines. Registration and unregistration
- * use a mutex, and call_chain is synchronized by RCU (no locks).
+ * use a spinlock, and call_chain is synchronized by RCU (no locks).
*/
/**
@@ -401,6 +401,129 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
+/*
+ * SRCU notifier chain routines. Registration and unregistration
+ * use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ * @nh: Pointer to head of the SRCU notifier chain
+ * @n: New entry in notifier chain
+ *
+ * Adds a notifier to an SRCU notifier chain.
+ * Must be called in process context.
+ *
+ * Currently always returns zero.
+ */
+
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+ struct notifier_block *n)
+{
+ int ret;
+
+ /*
+ * This code gets used during boot-up, when task switching is
+ * not yet working and interrupts must remain disabled. At
+ * such times we must not call mutex_lock().
+ */
+ if (unlikely(system_state == SYSTEM_BOOTING))
+ return notifier_chain_register(&nh->head, n);
+
+ mutex_lock(&nh->mutex);
+ ret = notifier_chain_register(&nh->head, n);
+ mutex_unlock(&nh->mutex);
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ * @nh: Pointer to head of the SRCU notifier chain
+ * @n: Entry to remove from notifier chain
+ *
+ * Removes a notifier from an SRCU notifier chain.
+ * Must be called from process context.
+ *
+ * Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+ struct notifier_block *n)
+{
+ int ret;
+
+ /*
+ * This code gets used during boot-up, when task switching is
+ * not yet working and interrupts must remain disabled. At
+ * such times we must not call mutex_lock().
+ */
+ if (unlikely(system_state == SYSTEM_BOOTING))
+ return notifier_chain_unregister(&nh->head, n);
+
+ mutex_lock(&nh->mutex);
+ ret = notifier_chain_unregister(&nh->head, n);
+ mutex_unlock(&nh->mutex);
+ synchronize_srcu(&nh->srcu);
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ * srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ * @nh: Pointer to head of the SRCU notifier chain
+ * @val: Value passed unmodified to notifier function
+ * @v: Pointer passed unmodified to notifier function
+ *
+ * Calls each function in a notifier chain in turn. The functions
+ * run in a process context, so they are allowed to block.
+ *
+ * If the return value of the notifier can be and'ed
+ * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain
+ * will return immediately, with the return value of
+ * the notifier function which halted execution.
+ * Otherwise the return value is the return value
+ * of the last notifier function called.
+ */
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+ unsigned long val, void *v)
+{
+ int ret;
+ int idx;
+
+ idx = srcu_read_lock(&nh->srcu);
+ ret = notifier_call_chain(&nh->head, val, v);
+ srcu_read_unlock(&nh->srcu, idx);
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ * srcu_init_notifier_head - Initialize an SRCU notifier head
+ * @nh: Pointer to head of the srcu notifier chain
+ *
+ * Unlike other sorts of notifier heads, SRCU notifier heads require
+ * dynamic initialization. Be sure to call this routine before
+ * calling any of the other SRCU notifier routines for this head.
+ *
+ * If an SRCU notifier head is deallocated, it must first be cleaned
+ * up by calling srcu_cleanup_notifier_head(). Otherwise the head's
+ * per-cpu data (used by the SRCU mechanism) will leak.
+ */
+
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+ mutex_init(&nh->mutex);
+ if (init_srcu_struct(&nh->srcu) < 0)
+ BUG();
+ nh->head = NULL;
+}
+
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
/**
* register_reboot_notifier - Register function to be called at reboot time
* @nb: Info about notifier function to be called
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7a3b2e75f040..0e53314b14de 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -49,6 +49,7 @@ cond_syscall(compat_sys_get_robust_list);
cond_syscall(sys_epoll_create);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
+cond_syscall(sys_epoll_pwait);
cond_syscall(sys_semget);
cond_syscall(sys_semop);
cond_syscall(sys_semtimedop);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8020fb273c4f..8bff2c18fb5a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,8 +136,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+#ifdef CONFIG_PROC_SYSCTL
static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
static ctl_table root_table[];
static struct ctl_table_header root_table_header =
@@ -542,6 +544,7 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_PROC_SYSCTL
{
.ctl_name = KERN_CADPID,
.procname = "cad_pid",
@@ -550,6 +553,7 @@ static ctl_table kern_table[] = {
.mode = 0600,
.proc_handler = &proc_do_cad_pid,
},
+#endif
{
.ctl_name = KERN_MAX_THREADS,
.procname = "threads-max",
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 126bb30c4afe..a99b2a6e6a07 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -57,7 +57,7 @@ static cycle_t jiffies_read(void)
struct clocksource clocksource_jiffies = {
.name = "jiffies",
- .rating = 0, /* lowest rating*/
+ .rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cfc737bffe6d..3df9bfc7ff78 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -28,6 +28,7 @@
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/hardirq.h>
+#include <linux/mempolicy.h>
/*
* The per-CPU workqueue (if single thread, we always use the first
@@ -245,6 +246,12 @@ static int worker_thread(void *__cwq)
sigprocmask(SIG_BLOCK, &blocked, NULL);
flush_signals(current);
+ /*
+ * We inherited MPOL_INTERLEAVE from the booting kernel.
+ * Set MPOL_DEFAULT to insure node local allocations.
+ */
+ numa_default_policy();
+
/* SIG_IGN makes children autoreap: see do_notify_parent(). */
sa.sa.sa_handler = SIG_IGN;
sa.sa.sa_flags = 0;