From b3e55c727ff7349c5db722fbdb8d99a151e8e0bf Mon Sep 17 00:00:00 2001 From: "Mao, Bibo" Date: Mon, 12 Dec 2005 00:37:00 -0800 Subject: [PATCH] Kprobes: Reference count the modules when probed on it When a Kprobes are inserted/removed on a modules, the modules must be ref counted so as not to allow to unload while probes are registered on that module. Without this patch, the probed module is free to unload, and when the probing module unregister the probe, the kpobes code while trying to replace the original instruction might crash. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Mao Bibo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5beda378cc75..fde5a16a2913 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -462,9 +462,16 @@ int __kprobes register_kprobe(struct kprobe *p) int ret = 0; unsigned long flags = 0; struct kprobe *old_p; + struct module *mod; + + if ((!kernel_text_address((unsigned long) p->addr)) || + in_kprobes_functions((unsigned long) p->addr)) + return -EINVAL; + + if ((mod = module_text_address((unsigned long) p->addr)) && + (unlikely(!try_module_get(mod)))) + return -EINVAL; - if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) - return ret; if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; @@ -488,6 +495,8 @@ out: rm_kprobe: if (ret == -EEXIST) arch_remove_kprobe(p); + if (ret && mod) + module_put(mod); return ret; } @@ -495,6 +504,7 @@ void __kprobes unregister_kprobe(struct kprobe *p) { unsigned long flags; struct kprobe *old_p; + struct module *mod; spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); @@ -506,6 +516,10 @@ void __kprobes unregister_kprobe(struct kprobe *p) cleanup_kprobe(p, flags); synchronize_sched(); + + if ((mod = module_text_address((unsigned long)p->addr))) + module_put(mod); + if (old_p->pre_handler == aggr_pre_handler && list_empty(&old_p->list)) kfree(old_p); -- cgit v1.2.3 From ab4720ec76b756e1f8705e207a7b392b0453afd6 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Mon, 12 Dec 2005 00:37:05 -0800 Subject: [PATCH] add rcu_barrier() synchronization point This introduces a new interface - rcu_barrier() which waits until all the RCUs queued until this call have been completed. Reiser4 needs this, because we do more than just freeing memory object in our RCU callback: we also remove it from the list hanging off super-block. This means, that before freeing reiser4-specific portion of super-block (during umount) we have to wait until all pending RCU callbacks are executed. The only change of reiser4 made to the original patch, is exporting of rcu_barrier(). Cc: Hans Reiser Cc: Vladimir V. Saveliev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c4d159a21e04..f45b91723dc6 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -116,6 +116,10 @@ void fastcall call_rcu(struct rcu_head *head, local_irq_restore(flags); } +static atomic_t rcu_barrier_cpu_count; +static struct semaphore rcu_barrier_sema; +static struct completion rcu_barrier_completion; + /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -162,6 +166,42 @@ long rcu_batches_completed(void) return rcu_ctrlblk.completed; } +static void rcu_barrier_callback(struct rcu_head *notused) +{ + if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + complete(&rcu_barrier_completion); +} + +/* + * Called with preemption disabled, and from cross-cpu IRQ context. + */ +static void rcu_barrier_func(void *notused) +{ + int cpu = smp_processor_id(); + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_head *head; + + head = &rdp->barrier; + atomic_inc(&rcu_barrier_cpu_count); + call_rcu(head, rcu_barrier_callback); +} + +/** + * rcu_barrier - Wait until all the in-flight RCUs are complete. + */ +void rcu_barrier(void) +{ + BUG_ON(in_interrupt()); + /* Take cpucontrol semaphore to protect against CPU hotplug */ + down(&rcu_barrier_sema); + init_completion(&rcu_barrier_completion); + atomic_set(&rcu_barrier_cpu_count, 0); + on_each_cpu(rcu_barrier_func, NULL, 0, 1); + wait_for_completion(&rcu_barrier_completion); + up(&rcu_barrier_sema); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + /* * Invoke the completed RCU callbacks. They are expected to be in * a per-cpu list. @@ -457,6 +497,7 @@ static struct notifier_block __devinitdata rcu_nb = { */ void __init rcu_init(void) { + sema_init(&rcu_barrier_sema, 1); rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)smp_processor_id()); /* Register notifier for non-boot CPUs */ -- cgit v1.2.3 From 89d46b8778f65223f732d82c0166e0abba20fb1e Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 12 Dec 2005 00:37:06 -0800 Subject: [PATCH] Fix bug in RCU torture test While doing some test of RCU torture module, I hit a OOPS in rcu_do_batch, which was trying to processes callback of a module that was just removed. This is because we weren't waiting long enough for all callbacks to fire. Signed-off-by: Srivatsa Vaddagiri Cc: Dipankar Sarma Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcutorture.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 88c28d476550..49fbbeff201c 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -409,9 +409,8 @@ rcu_torture_cleanup(void) stats_task = NULL; /* Wait for all RCU callbacks to fire. */ + rcu_barrier(); - for (i = 0; i < RCU_TORTURE_PIPE_LEN; i++) - synchronize_rcu(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ printk(KERN_ALERT TORTURE_FLAG "--- End of test: %s\n", -- cgit v1.2.3 From c3f5902325d3053986e7359f706581d8f032e72f Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 12 Dec 2005 00:37:07 -0800 Subject: [PATCH] Fix RCU race in access of nohz_cpu_mask Accessing nohz_cpu_mask before incrementing rcp->cur is racy. It can cause tickless idle CPUs to be included in rsp->cpumask, which will extend graceperiods unnecessarily. Fix this race. It has been tested using extensions to RCU torture module that forces various CPUs to become idle. Signed-off-by: Srivatsa Vaddagiri Cc: Dipankar Sarma Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f45b91723dc6..48d3bce465b8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -257,15 +257,23 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, if (rcp->next_pending && rcp->completed == rcp->cur) { - /* Can't change, since spin lock held. */ - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); - rcp->next_pending = 0; - /* next_pending == 0 must be visible in __rcu_process_callbacks() - * before it can see new value of cur. + /* + * next_pending == 0 must be visible in + * __rcu_process_callbacks() before it can see new value of cur. */ smp_wmb(); rcp->cur++; + + /* + * Accessing nohz_cpu_mask before incrementing rcp->cur needs a + * Barrier Otherwise it can cause tickless idle CPUs to be + * included in rsp->cpumask, which will extend graceperiods + * unnecessarily. + */ + smp_mb(); + cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); + } } -- cgit v1.2.3 From 64123fd42c7a1e4ebf6acd2399c98caddc7e0c26 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 12 Dec 2005 00:37:09 -0800 Subject: [PATCH] Add getnstimestamp function There are several functions that might seem appropriate for a timestamp: get_cycles() current_kernel_time() do_gettimeofday() Each has problems with combinations of SMP-safety, low resolution, and monotonicity. This patch adds a new function that returns a monotonic SMP-safe timestamp with nanosecond resolution where available. Changes: Split timestamp into separate patch Moved to kernel/time.c Renamed to getnstimestamp Fixed unintended-pointer-arithmetic bug Signed-off-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index 245d595a13cb..b94bfa8c03e0 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -561,6 +561,28 @@ void getnstimeofday(struct timespec *tv) EXPORT_SYMBOL_GPL(getnstimeofday); #endif +void getnstimestamp(struct timespec *ts) +{ + unsigned int seq; + struct timespec wall2mono; + + /* synchronize with settimeofday() changes */ + do { + seq = read_seqbegin(&xtime_lock); + getnstimeofday(ts); + wall2mono = wall_to_monotonic; + } while(unlikely(read_seqretry(&xtime_lock, seq))); + + /* adjust to monotonicaly-increasing values */ + ts->tv_sec += wall2mono.tv_sec; + ts->tv_nsec += wall2mono.tv_nsec; + while (unlikely(ts->tv_nsec >= NSEC_PER_SEC)) { + ts->tv_nsec -= NSEC_PER_SEC; + ts->tv_sec++; + } +} +EXPORT_SYMBOL_GPL(getnstimestamp); + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v1.2.3 From adad0f331f9c693129e81e233c5461e2e7c3e443 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:12 -0800 Subject: [PATCH] kprobes: fix race in aggregate kprobe registration When registering multiple kprobes at the same address, we leave a small window where the kprobe hlist will not contain a reference to the registered kprobe, leading to potentially, a system crash if the breakpoint is hit on another processor. Patch below now automically relpace the old kprobe with the new kprobe from the hash list. Signed-off-by: Anil S Keshavamurthy Acked-by: Ananth N Mavinakayanahalli Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index fde5a16a2913..e4f0fc62bd3e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -399,10 +399,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) INIT_LIST_HEAD(&ap->list); list_add_rcu(&p->list, &ap->list); - INIT_HLIST_NODE(&ap->hlist); - hlist_del_rcu(&p->hlist); - hlist_add_head_rcu(&ap->hlist, - &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); + hlist_replace_rcu(&p->hlist, &ap->hlist); } /* -- cgit v1.2.3 From 7a4ae749a478f8bca73d4b5b8c1b8cbb178b2db5 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 12 Dec 2005 00:37:22 -0800 Subject: [PATCH] Add try_to_freeze to kauditd kauditd was causing suspends to fail because it refused to freeze. Adding a try_to_freeze() to its sleep loop solves the issue. Signed-off-by: Pierre Ossman Acked-by: Pavel Machek Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/audit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 0c56320d38dc..32fa03ad1984 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -291,8 +291,10 @@ int kauditd_thread(void *dummy) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&kauditd_wait, &wait); - if (!skb_queue_len(&audit_skb_queue)) + if (!skb_queue_len(&audit_skb_queue)) { + try_to_freeze(); schedule(); + } __set_current_state(TASK_RUNNING); remove_wait_queue(&kauditd_wait, &wait); -- cgit v1.2.3 From 00d7c05ab168c10f9b520e07400923267bc04419 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:33 -0800 Subject: [PATCH] kprobes: no probes on critical path For Kprobes critical path is the path from debug break exception handler till the control reaches kprobes exception code. No probes can be supported in this path as we will end up in recursion. This patch prevents this by moving the below function to safe __kprobes section onto which no probes can be inserted. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index bce933ebb29f..eecf84526afe 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -168,7 +169,7 @@ EXPORT_SYMBOL(notifier_chain_unregister); * of the last notifier function called. */ -int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) +int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) { int ret=NOTIFY_DONE; struct notifier_block *nb = *n; -- cgit v1.2.3 From bf8d5c52c3b6b27061e3b7d779057fd9a6cac164 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:34 -0800 Subject: [PATCH] kprobes: increment kprobe missed count for multiprobes When multiple probes are registered at the same address and if due to some recursion (probe getting triggered within a probe handler), we skip calling pre_handlers and just increment nmissed field. The below patch make sure it walks the list for multiple probes case. Without the below patch we get incorrect results of nmissed count for multiple probe case. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e4f0fc62bd3e..3bb71e63a37e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -246,6 +246,19 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) return ret; } +/* Walks the list and increments nmissed count for multiprobe case */ +void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) +{ + struct kprobe *kp; + if (p->pre_handler != aggr_pre_handler) { + p->nmissed++; + } else { + list_for_each_entry_rcu(kp, &p->list, list) + kp->nmissed++; + } + return; +} + /* Called with kretprobe_lock held */ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { -- cgit v1.2.3