From 22f470f8daea64bc03be1fe30c8c5df382295386 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 11 Jun 2009 09:29:58 -0400 Subject: ring-buffer: use BUF_PAGE_HDR_SIZE in calculating index The index of the event is found by masking PAGE_MASK to it and subtracting the header size. Currently the header size is calculate by PAGE_SIZE - BUF_PAGE_SIZE, when we already have a macro BUF_PAGE_HDR_SIZE to define it. If we want to change BUF_PAGE_SIZE to something less than filling the rest of the page (this is done for debugging), then we break the algorithm to find the index. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ed3559944fcf..6b17a11e42a2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1013,7 +1013,7 @@ rb_event_index(struct ring_buffer_event *event) { unsigned long addr = (unsigned long)event; - return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE); + return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; } static inline int -- cgit v1.2.3 From c6a9d7b55e2df63de012a9a285bf2a0bee8e4d59 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 11 Jun 2009 09:49:15 -0400 Subject: ring-buffer: remove useless warn on check A check if "write > BUF_PAGE_SIZE" is done right after a if (write > BUF_PAGE_SIZE) return ...; Thus the check is actually testing the compiler and not the kernel. This is useless, remove it. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6b17a11e42a2..6cf340e1a4a3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1334,9 +1334,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* We reserved something on the buffer */ - if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE)) - return NULL; - event = __rb_page_index(tail_page, tail); rb_update_event(event, type, length); -- cgit v1.2.3 From 0dcd4d6c3e5b17ccf88d41cb354bb4d57cb18cbf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 17 Jun 2009 14:03:44 -0400 Subject: ring-buffer: remove useless compile check for buffer_page size The original version of the ring buffer had a hack to map the page struct that held the pages of the buffer to also be the structure that the ring buffer would keep the pages in a link list. This overlap of the page struct was very dangerous and that hack was removed a while ago. But there was a check to make sure the buffer_page never became bigger than the page struct, and would fail the compile if it did. The check was only meaningful when we had the hack. Now that we have separate allocated descriptors for the buffer pages, we can remove this check. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6cf340e1a4a3..162da2305cbc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -620,12 +620,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) kfree(cpu_buffer); } -/* - * Causes compile errors if the struct buffer_page gets bigger - * than the struct page. - */ -extern int ring_buffer_page_too_big(void); - #ifdef CONFIG_HOTPLUG_CPU static int rb_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); @@ -648,11 +642,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, int bsize; int cpu; - /* Paranoid! Optimizes out when all is well */ - if (sizeof(struct buffer_page) > sizeof(struct page)) - ring_buffer_page_too_big(); - - /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), GFP_KERNEL); -- cgit v1.2.3 From 5f78abeebbf0a80975d719e11374535ca15396cb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 17 Jun 2009 14:11:10 -0400 Subject: ring-buffer: check for less than two in size allocation The ring buffer must have at least two pages allocated for the reader page swap to work. The page count check will miss the case of a zero size passed in. Even though a zero size ring buffer would probably fail an allocation, making the min size check for less than two instead of equal to one makes the code a bit more robust. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 162da2305cbc..2e99dba6dc48 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -657,8 +657,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, buffer->reader_lock_key = key; /* need at least two pages */ - if (buffer->pages == 1) - buffer->pages++; + if (buffer->pages < 2) + buffer->pages = 2; /* * In case of non-hotplug cpu, if the ring-buffer is allocated -- cgit v1.2.3 From d47882078f05c2cb46b85f1e12a58ed9315b9d63 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 17 Jun 2009 00:39:43 -0400 Subject: ring-buffer: add locks around rb_per_cpu_empty The checking of whether the buffer is empty or not needs to be serialized among the readers. Add the reader spin lock around it. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2e99dba6dc48..969f7cbe8e93 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2756,12 +2756,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); int ring_buffer_empty(struct ring_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; + unsigned long flags; int cpu; + int ret; /* yes this is racy, but if you don't like the race, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - if (!rb_per_cpu_empty(cpu_buffer)) + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + ret = rb_per_cpu_empty(cpu_buffer); + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (!ret) return 0; } @@ -2777,14 +2782,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; + unsigned long flags; int ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 1; cpu_buffer = buffer->buffers[cpu]; + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ret = rb_per_cpu_empty(cpu_buffer); - + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return ret; } -- cgit v1.2.3 From 8d707e8eb4de4b930573155ab4df4b3270ee25dd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 16 Jun 2009 21:22:48 -0400 Subject: ring-buffer: do not grab locks in nmi If ftrace_dump_on_oops is set, and an NMI detects a lockup, then it will need to read from the ring buffer. But the read side of the ring buffer still takes locks. This patch adds a check on the read side that if it is in an NMI, then it will disable the ring buffer and not take any locks. Reads can still happen on a disabled ring buffer. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 59 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 969f7cbe8e93..589b3eedfa67 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2466,6 +2466,21 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) } EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); +static inline int rb_ok_to_lock(void) +{ + /* + * If an NMI die dumps out the content of the ring buffer + * do not grab locks. We also permanently disable the ring + * buffer too. A one time deal is all you get from reading + * the ring buffer from an NMI. + */ + if (likely(!in_nmi() && !oops_in_progress)) + return 1; + + tracing_off_permanent(); + return 0; +} + /** * ring_buffer_peek - peek at the next event to be read * @buffer: The ring buffer to read @@ -2481,14 +2496,20 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; unsigned long flags; + int dolock; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; + dolock = rb_ok_to_lock(); again: - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + local_irq_save(flags); + if (dolock) + spin_lock(&cpu_buffer->reader_lock); event = rb_buffer_peek(buffer, cpu, ts); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (dolock) + spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); @@ -2540,6 +2561,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event = NULL; unsigned long flags; + int dolock; + + dolock = rb_ok_to_lock(); again: /* might be called in atomic */ @@ -2549,7 +2573,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) goto out; cpu_buffer = buffer->buffers[cpu]; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + local_irq_save(flags); + if (dolock) + spin_lock(&cpu_buffer->reader_lock); event = rb_buffer_peek(buffer, cpu, ts); if (!event) @@ -2558,7 +2584,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) rb_advance_reader(cpu_buffer); out_unlock: - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (dolock) + spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); out: preempt_enable(); @@ -2757,15 +2785,23 @@ int ring_buffer_empty(struct ring_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; + int dolock; int cpu; int ret; + dolock = rb_ok_to_lock(); + /* yes this is racy, but if you don't like the race, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + local_irq_save(flags); + if (dolock) + spin_lock(&cpu_buffer->reader_lock); ret = rb_per_cpu_empty(cpu_buffer); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (dolock) + spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); + if (!ret) return 0; } @@ -2783,15 +2819,22 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; + int dolock; int ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 1; + dolock = rb_ok_to_lock(); + cpu_buffer = buffer->buffers[cpu]; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + local_irq_save(flags); + if (dolock) + spin_lock(&cpu_buffer->reader_lock); ret = rb_per_cpu_empty(cpu_buffer); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + if (dolock) + spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); return ret; } -- cgit v1.2.3 From 4b221f0313f0f7f1f7aa0a1fd16ad400840def26 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 17 Jun 2009 17:01:09 -0400 Subject: ring-buffer: have benchmark test print to trace buffer Currently the output of the ring buffer benchmark/test prints to the console. This test runs for ten seconds every ten seconds and ouputs the result after every iteration. This needlessly fills up the logs. This patch makes the ring buffer benchmark/test print to the ftrace buffer using trace_printk. To view the test results, you must examine the debug/tracing/trace file. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer_benchmark.c | 37 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index cf6b0f50134e..573d3cc762c3 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -203,7 +203,7 @@ static void ring_buffer_producer(void) * Hammer the buffer for 10 secs (this may * make the system stall) */ - pr_info("Starting ring buffer hammer\n"); + trace_printk("Starting ring buffer hammer\n"); do_gettimeofday(&start_tv); do { struct ring_buffer_event *event; @@ -239,7 +239,7 @@ static void ring_buffer_producer(void) #endif } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); - pr_info("End ring buffer hammer\n"); + trace_printk("End ring buffer hammer\n"); if (consumer) { /* Init both completions here to avoid races */ @@ -262,49 +262,50 @@ static void ring_buffer_producer(void) overruns = ring_buffer_overruns(buffer); if (kill_test) - pr_info("ERROR!\n"); - pr_info("Time: %lld (usecs)\n", time); - pr_info("Overruns: %lld\n", overruns); + trace_printk("ERROR!\n"); + trace_printk("Time: %lld (usecs)\n", time); + trace_printk("Overruns: %lld\n", overruns); if (disable_reader) - pr_info("Read: (reader disabled)\n"); + trace_printk("Read: (reader disabled)\n"); else - pr_info("Read: %ld (by %s)\n", read, + trace_printk("Read: %ld (by %s)\n", read, read_events ? "events" : "pages"); - pr_info("Entries: %lld\n", entries); - pr_info("Total: %lld\n", entries + overruns + read); - pr_info("Missed: %ld\n", missed); - pr_info("Hit: %ld\n", hit); + trace_printk("Entries: %lld\n", entries); + trace_printk("Total: %lld\n", entries + overruns + read); + trace_printk("Missed: %ld\n", missed); + trace_printk("Hit: %ld\n", hit); /* Convert time from usecs to millisecs */ do_div(time, USEC_PER_MSEC); if (time) hit /= (long)time; else - pr_info("TIME IS ZERO??\n"); + trace_printk("TIME IS ZERO??\n"); - pr_info("Entries per millisec: %ld\n", hit); + trace_printk("Entries per millisec: %ld\n", hit); if (hit) { /* Calculate the average time in nanosecs */ avg = NSEC_PER_MSEC / hit; - pr_info("%ld ns per entry\n", avg); + trace_printk("%ld ns per entry\n", avg); } if (missed) { if (time) missed /= (long)time; - pr_info("Total iterations per millisec: %ld\n", hit + missed); + trace_printk("Total iterations per millisec: %ld\n", + hit + missed); /* it is possible that hit + missed will overflow and be zero */ if (!(hit + missed)) { - pr_info("hit + missed overflowed and totalled zero!\n"); + trace_printk("hit + missed overflowed and totalled zero!\n"); hit--; /* make it non zero */ } /* Caculate the average time in nanosecs */ avg = NSEC_PER_MSEC / (hit + missed); - pr_info("%ld ns per entry\n", avg); + trace_printk("%ld ns per entry\n", avg); } } @@ -355,7 +356,7 @@ static int ring_buffer_producer_thread(void *arg) ring_buffer_producer(); - pr_info("Sleeping for 10 secs\n"); + trace_printk("Sleeping for 10 secs\n"); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ * SLEEP_TIME); __set_current_state(TASK_RUNNING); -- cgit v1.2.3