summaryrefslogtreecommitdiff
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c516
1 files changed, 311 insertions, 205 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a641eb753b8c..b0b20a07f394 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1;
static atomic_t nr_counters __read_mostly;
static atomic_t nr_mmap_counters __read_mostly;
static atomic_t nr_comm_counters __read_mostly;
+static atomic_t nr_task_counters __read_mostly;
/*
* perf counter paranoia level:
@@ -146,6 +147,28 @@ static void put_ctx(struct perf_counter_context *ctx)
}
}
+static void unclone_ctx(struct perf_counter_context *ctx)
+{
+ if (ctx->parent_ctx) {
+ put_ctx(ctx->parent_ctx);
+ ctx->parent_ctx = NULL;
+ }
+}
+
+/*
+ * If we inherit counters we want to return the parent counter id
+ * to userspace.
+ */
+static u64 primary_counter_id(struct perf_counter *counter)
+{
+ u64 id = counter->id;
+
+ if (counter->parent)
+ id = counter->parent->id;
+
+ return id;
+}
+
/*
* Get the perf_counter_context for a task and lock it.
* This has to cope with with the fact that until it is locked,
@@ -1081,7 +1104,7 @@ static void perf_counter_sync_stat(struct perf_counter_context *ctx,
__perf_counter_sync_stat(counter, next_counter);
counter = list_next_entry(counter, event_entry);
- next_counter = list_next_entry(counter, event_entry);
+ next_counter = list_next_entry(next_counter, event_entry);
}
}
@@ -1288,7 +1311,6 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
#define MAX_INTERRUPTS (~0ULL)
static void perf_log_throttle(struct perf_counter *counter, int enable);
-static void perf_log_period(struct perf_counter *counter, u64 period);
static void perf_adjust_period(struct perf_counter *counter, u64 events)
{
@@ -1307,8 +1329,6 @@ static void perf_adjust_period(struct perf_counter *counter, u64 events)
if (!sample_period)
sample_period = 1;
- perf_log_period(counter, sample_period);
-
hwc->sample_period = sample_period;
}
@@ -1463,10 +1483,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
/*
* Unclone this context if we enabled any counter.
*/
- if (enabled && ctx->parent_ctx) {
- put_ctx(ctx->parent_ctx);
- ctx->parent_ctx = NULL;
- }
+ if (enabled)
+ unclone_ctx(ctx);
spin_unlock(&ctx->lock);
@@ -1526,7 +1544,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
{
- struct perf_counter_context *parent_ctx;
struct perf_counter_context *ctx;
struct perf_cpu_context *cpuctx;
struct task_struct *task;
@@ -1586,11 +1603,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
retry:
ctx = perf_lock_task_context(task, &flags);
if (ctx) {
- parent_ctx = ctx->parent_ctx;
- if (parent_ctx) {
- put_ctx(parent_ctx);
- ctx->parent_ctx = NULL; /* no longer a clone */
- }
+ unclone_ctx(ctx);
spin_unlock_irqrestore(&ctx->lock, flags);
}
@@ -1642,6 +1655,8 @@ static void free_counter(struct perf_counter *counter)
atomic_dec(&nr_mmap_counters);
if (counter->attr.comm)
atomic_dec(&nr_comm_counters);
+ if (counter->attr.task)
+ atomic_dec(&nr_task_counters);
}
if (counter->destroy)
@@ -1676,6 +1691,18 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
+static u64 perf_counter_read_tree(struct perf_counter *counter)
+{
+ struct perf_counter *child;
+ u64 total = 0;
+
+ total += perf_counter_read(counter);
+ list_for_each_entry(child, &counter->child_list, child_list)
+ total += perf_counter_read(child);
+
+ return total;
+}
+
/*
* Read the performance counter - simple non blocking version for now
*/
@@ -1695,7 +1722,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
- values[0] = perf_counter_read(counter);
+ values[0] = perf_counter_read_tree(counter);
n = 1;
if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = counter->total_time_enabled +
@@ -1704,7 +1731,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
values[n++] = counter->total_time_running +
atomic64_read(&counter->child_total_time_running);
if (counter->attr.read_format & PERF_FORMAT_ID)
- values[n++] = counter->id;
+ values[n++] = primary_counter_id(counter);
mutex_unlock(&counter->child_mutex);
if (count < n * sizeof(u64))
@@ -1811,8 +1838,6 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
counter->attr.sample_freq = value;
} else {
- perf_log_period(counter, value);
-
counter->attr.sample_period = value;
counter->hw.sample_period = value;
}
@@ -2661,10 +2686,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_ID)
header.size += sizeof(u64);
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ header.size += sizeof(u64);
+
if (sample_type & PERF_SAMPLE_CPU) {
header.size += sizeof(cpu_entry);
cpu_entry.cpu = raw_smp_processor_id();
+ cpu_entry.reserved = 0;
}
if (sample_type & PERF_SAMPLE_PERIOD)
@@ -2685,6 +2714,18 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
header.size += sizeof(u64);
}
+ if (sample_type & PERF_SAMPLE_RAW) {
+ int size = sizeof(u32);
+
+ if (data->raw)
+ size += data->raw->size;
+ else
+ size += sizeof(u32);
+
+ WARN_ON_ONCE(size & (sizeof(u64)-1));
+ header.size += size;
+ }
+
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
if (ret)
return;
@@ -2703,7 +2744,13 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_ADDR)
perf_output_put(&handle, data->addr);
- if (sample_type & PERF_SAMPLE_ID)
+ if (sample_type & PERF_SAMPLE_ID) {
+ u64 id = primary_counter_id(counter);
+
+ perf_output_put(&handle, id);
+ }
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
perf_output_put(&handle, counter->id);
if (sample_type & PERF_SAMPLE_CPU)
@@ -2726,7 +2773,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sub != counter)
sub->pmu->read(sub);
- group_entry.id = sub->id;
+ group_entry.id = primary_counter_id(sub);
group_entry.counter = atomic64_read(&sub->count);
perf_output_put(&handle, group_entry);
@@ -2742,6 +2789,22 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
}
}
+ if (sample_type & PERF_SAMPLE_RAW) {
+ if (data->raw) {
+ perf_output_put(&handle, data->raw->size);
+ perf_output_copy(&handle, data->raw->data, data->raw->size);
+ } else {
+ struct {
+ u32 size;
+ u32 data;
+ } raw = {
+ .size = sizeof(u32),
+ .data = 0,
+ };
+ perf_output_put(&handle, raw);
+ }
+ }
+
perf_output_end(&handle);
}
@@ -2786,15 +2849,8 @@ perf_counter_read_event(struct perf_counter *counter,
}
if (counter->attr.read_format & PERF_FORMAT_ID) {
- u64 id;
-
event.header.size += sizeof(u64);
- if (counter->parent)
- id = counter->parent->id;
- else
- id = counter->id;
-
- event.format[i++] = id;
+ event.format[i++] = primary_counter_id(counter);
}
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
@@ -2806,48 +2862,56 @@ perf_counter_read_event(struct perf_counter *counter,
}
/*
- * fork tracking
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
*/
-struct perf_fork_event {
- struct task_struct *task;
+struct perf_task_event {
+ struct task_struct *task;
+ struct perf_counter_context *task_ctx;
struct {
struct perf_event_header header;
u32 pid;
u32 ppid;
+ u32 tid;
+ u32 ptid;
} event;
};
-static void perf_counter_fork_output(struct perf_counter *counter,
- struct perf_fork_event *fork_event)
+static void perf_counter_task_output(struct perf_counter *counter,
+ struct perf_task_event *task_event)
{
struct perf_output_handle handle;
- int size = fork_event->event.header.size;
- struct task_struct *task = fork_event->task;
+ int size = task_event->event.header.size;
+ struct task_struct *task = task_event->task;
int ret = perf_output_begin(&handle, counter, size, 0, 0);
if (ret)
return;
- fork_event->event.pid = perf_counter_pid(counter, task);
- fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+ task_event->event.pid = perf_counter_pid(counter, task);
+ task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
- perf_output_put(&handle, fork_event->event);
+ task_event->event.tid = perf_counter_tid(counter, task);
+ task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+
+ perf_output_put(&handle, task_event->event);
perf_output_end(&handle);
}
-static int perf_counter_fork_match(struct perf_counter *counter)
+static int perf_counter_task_match(struct perf_counter *counter)
{
- if (counter->attr.comm || counter->attr.mmap)
+ if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
return 1;
return 0;
}
-static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
- struct perf_fork_event *fork_event)
+static void perf_counter_task_ctx(struct perf_counter_context *ctx,
+ struct perf_task_event *task_event)
{
struct perf_counter *counter;
@@ -2856,51 +2920,62 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
rcu_read_lock();
list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
- if (perf_counter_fork_match(counter))
- perf_counter_fork_output(counter, fork_event);
+ if (perf_counter_task_match(counter))
+ perf_counter_task_output(counter, task_event);
}
rcu_read_unlock();
}
-static void perf_counter_fork_event(struct perf_fork_event *fork_event)
+static void perf_counter_task_event(struct perf_task_event *task_event)
{
struct perf_cpu_context *cpuctx;
- struct perf_counter_context *ctx;
+ struct perf_counter_context *ctx = task_event->task_ctx;
cpuctx = &get_cpu_var(perf_cpu_context);
- perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
+ perf_counter_task_ctx(&cpuctx->ctx, task_event);
put_cpu_var(perf_cpu_context);
rcu_read_lock();
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
- ctx = rcu_dereference(current->perf_counter_ctxp);
+ if (!ctx)
+ ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
if (ctx)
- perf_counter_fork_ctx(ctx, fork_event);
+ perf_counter_task_ctx(ctx, task_event);
rcu_read_unlock();
}
-void perf_counter_fork(struct task_struct *task)
+static void perf_counter_task(struct task_struct *task,
+ struct perf_counter_context *task_ctx,
+ int new)
{
- struct perf_fork_event fork_event;
+ struct perf_task_event task_event;
if (!atomic_read(&nr_comm_counters) &&
- !atomic_read(&nr_mmap_counters))
+ !atomic_read(&nr_mmap_counters) &&
+ !atomic_read(&nr_task_counters))
return;
- fork_event = (struct perf_fork_event){
- .task = task,
- .event = {
+ task_event = (struct perf_task_event){
+ .task = task,
+ .task_ctx = task_ctx,
+ .event = {
.header = {
- .type = PERF_EVENT_FORK,
- .size = sizeof(fork_event.event),
+ .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
+ .misc = 0,
+ .size = sizeof(task_event.event),
},
+ /* .pid */
+ /* .ppid */
+ /* .tid */
+ /* .ptid */
},
};
- perf_counter_fork_event(&fork_event);
+ perf_counter_task_event(&task_event);
+}
+
+void perf_counter_fork(struct task_struct *task)
+{
+ perf_counter_task(task, NULL, 1);
}
/*
@@ -2968,8 +3043,10 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
struct perf_cpu_context *cpuctx;
struct perf_counter_context *ctx;
unsigned int size;
- char *comm = comm_event->task->comm;
+ char comm[TASK_COMM_LEN];
+ memset(comm, 0, sizeof(comm));
+ strncpy(comm, comm_event->task->comm, sizeof(comm));
size = ALIGN(strlen(comm)+1, sizeof(u64));
comm_event->comm = comm;
@@ -3004,8 +3081,16 @@ void perf_counter_comm(struct task_struct *task)
comm_event = (struct perf_comm_event){
.task = task,
+ /* .comm */
+ /* .comm_size */
.event = {
- .header = { .type = PERF_EVENT_COMM, },
+ .header = {
+ .type = PERF_EVENT_COMM,
+ .misc = 0,
+ /* .size */
+ },
+ /* .pid */
+ /* .tid */
},
};
@@ -3088,8 +3173,15 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
char *buf = NULL;
const char *name;
+ memset(tmp, 0, sizeof(tmp));
+
if (file) {
- buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ /*
+ * d_path works from the end of the buffer backwards, so we
+ * need to add enough zero bytes after the string to handle
+ * the 64bit alignment we do later.
+ */
+ buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
if (!buf) {
name = strncpy(tmp, "//enomem", sizeof(tmp));
goto got_name;
@@ -3100,9 +3192,11 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
goto got_name;
}
} else {
- name = arch_vma_name(mmap_event->vma);
- if (name)
+ if (arch_vma_name(mmap_event->vma)) {
+ name = strncpy(tmp, arch_vma_name(mmap_event->vma),
+ sizeof(tmp));
goto got_name;
+ }
if (!vma->vm_mm) {
name = strncpy(tmp, "[vdso]", sizeof(tmp));
@@ -3147,8 +3241,16 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
mmap_event = (struct perf_mmap_event){
.vma = vma,
+ /* .file_name */
+ /* .file_size */
.event = {
- .header = { .type = PERF_EVENT_MMAP, },
+ .header = {
+ .type = PERF_EVENT_MMAP,
+ .misc = 0,
+ /* .size */
+ },
+ /* .pid */
+ /* .tid */
.start = vma->vm_start,
.len = vma->vm_end - vma->vm_start,
.pgoff = vma->vm_pgoff,
@@ -3159,49 +3261,6 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
}
/*
- * Log sample_period changes so that analyzing tools can re-normalize the
- * event flow.
- */
-
-struct freq_event {
- struct perf_event_header header;
- u64 time;
- u64 id;
- u64 period;
-};
-
-static void perf_log_period(struct perf_counter *counter, u64 period)
-{
- struct perf_output_handle handle;
- struct freq_event event;
- int ret;
-
- if (counter->hw.sample_period == period)
- return;
-
- if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
- return;
-
- event = (struct freq_event) {
- .header = {
- .type = PERF_EVENT_PERIOD,
- .misc = 0,
- .size = sizeof(event),
- },
- .time = sched_clock(),
- .id = counter->id,
- .period = period,
- };
-
- ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
- if (ret)
- return;
-
- perf_output_put(&handle, event);
- perf_output_end(&handle);
-}
-
-/*
* IRQ throttle logging
*/
@@ -3214,16 +3273,21 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
struct perf_event_header header;
u64 time;
u64 id;
+ u64 stream_id;
} throttle_event = {
.header = {
- .type = PERF_EVENT_THROTTLE + 1,
+ .type = PERF_EVENT_THROTTLE,
.misc = 0,
.size = sizeof(throttle_event),
},
- .time = sched_clock(),
- .id = counter->id,
+ .time = sched_clock(),
+ .id = primary_counter_id(counter),
+ .stream_id = counter->id,
};
+ if (enable)
+ throttle_event.header.type = PERF_EVENT_UNTHROTTLE;
+
ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
if (ret)
return;
@@ -3300,87 +3364,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
* Generic software counter infrastructure
*/
-static void perf_swcounter_update(struct perf_counter *counter)
+/*
+ * We directly increment counter->count and keep a second value in
+ * counter->hw.period_left to count intervals. This period counter
+ * is kept in the range [-sample_period, 0] so that we can use the
+ * sign as trigger.
+ */
+
+static u64 perf_swcounter_set_period(struct perf_counter *counter)
{
struct hw_perf_counter *hwc = &counter->hw;
- u64 prev, now;
- s64 delta;
+ u64 period = hwc->last_period;
+ u64 nr, offset;
+ s64 old, val;
+
+ hwc->last_period = hwc->sample_period;
again:
- prev = atomic64_read(&hwc->prev_count);
- now = atomic64_read(&hwc->count);
- if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
- goto again;
+ old = val = atomic64_read(&hwc->period_left);
+ if (val < 0)
+ return 0;
- delta = now - prev;
+ nr = div64_u64(period + val, period);
+ offset = nr * period;
+ val -= offset;
+ if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+ goto again;
- atomic64_add(delta, &counter->count);
- atomic64_sub(delta, &hwc->period_left);
+ return nr;
}
-static void perf_swcounter_set_period(struct perf_counter *counter)
+static void perf_swcounter_overflow(struct perf_counter *counter,
+ int nmi, struct perf_sample_data *data)
{
struct hw_perf_counter *hwc = &counter->hw;
- s64 left = atomic64_read(&hwc->period_left);
- s64 period = hwc->sample_period;
+ u64 overflow;
- if (unlikely(left <= -period)) {
- left = period;
- atomic64_set(&hwc->period_left, left);
- hwc->last_period = period;
- }
+ data->period = counter->hw.last_period;
+ overflow = perf_swcounter_set_period(counter);
- if (unlikely(left <= 0)) {
- left += period;
- atomic64_add(period, &hwc->period_left);
- hwc->last_period = period;
- }
+ if (hwc->interrupts == MAX_INTERRUPTS)
+ return;
- atomic64_set(&hwc->prev_count, -left);
- atomic64_set(&hwc->count, -left);
+ for (; overflow; overflow--) {
+ if (perf_counter_overflow(counter, nmi, data)) {
+ /*
+ * We inhibit the overflow from happening when
+ * hwc->interrupts == MAX_INTERRUPTS.
+ */
+ break;
+ }
+ }
}
-static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+static void perf_swcounter_unthrottle(struct perf_counter *counter)
{
- enum hrtimer_restart ret = HRTIMER_RESTART;
- struct perf_sample_data data;
- struct perf_counter *counter;
- u64 period;
-
- counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
- counter->pmu->read(counter);
-
- data.addr = 0;
- data.regs = get_irq_regs();
/*
- * In case we exclude kernel IPs or are somehow not in interrupt
- * context, provide the next best thing, the user IP.
+ * Nothing to do, we already reset hwc->interrupts.
*/
- if ((counter->attr.exclude_kernel || !data.regs) &&
- !counter->attr.exclude_user)
- data.regs = task_pt_regs(current);
+}
- if (data.regs) {
- if (perf_counter_overflow(counter, 0, &data))
- ret = HRTIMER_NORESTART;
- }
+static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
+ int nmi, struct perf_sample_data *data)
+{
+ struct hw_perf_counter *hwc = &counter->hw;
- period = max_t(u64, 10000, counter->hw.sample_period);
- hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+ atomic64_add(nr, &counter->count);
- return ret;
-}
+ if (!hwc->sample_period)
+ return;
-static void perf_swcounter_overflow(struct perf_counter *counter,
- int nmi, struct perf_sample_data *data)
-{
- data->period = counter->hw.last_period;
+ if (!data->regs)
+ return;
- perf_swcounter_update(counter);
- perf_swcounter_set_period(counter);
- if (perf_counter_overflow(counter, nmi, data))
- /* soft-disable the counter */
- ;
+ if (!atomic64_add_negative(nr, &hwc->period_left))
+ perf_swcounter_overflow(counter, nmi, data);
}
static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3444,15 +3502,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
return 1;
}
-static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
- int nmi, struct perf_sample_data *data)
-{
- int neg = atomic64_add_negative(nr, &counter->hw.count);
-
- if (counter->hw.sample_period && !neg && data->regs)
- perf_swcounter_overflow(counter, nmi, data);
-}
-
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
enum perf_type_id type,
u32 event, u64 nr, int nmi,
@@ -3531,27 +3580,66 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
static void perf_swcounter_read(struct perf_counter *counter)
{
- perf_swcounter_update(counter);
}
static int perf_swcounter_enable(struct perf_counter *counter)
{
- perf_swcounter_set_period(counter);
+ struct hw_perf_counter *hwc = &counter->hw;
+
+ if (hwc->sample_period) {
+ hwc->last_period = hwc->sample_period;
+ perf_swcounter_set_period(counter);
+ }
return 0;
}
static void perf_swcounter_disable(struct perf_counter *counter)
{
- perf_swcounter_update(counter);
}
static const struct pmu perf_ops_generic = {
.enable = perf_swcounter_enable,
.disable = perf_swcounter_disable,
.read = perf_swcounter_read,
+ .unthrottle = perf_swcounter_unthrottle,
};
/*
+ * hrtimer based swcounter callback
+ */
+
+static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+{
+ enum hrtimer_restart ret = HRTIMER_RESTART;
+ struct perf_sample_data data;
+ struct perf_counter *counter;
+ u64 period;
+
+ counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
+ counter->pmu->read(counter);
+
+ data.addr = 0;
+ data.regs = get_irq_regs();
+ /*
+ * In case we exclude kernel IPs or are somehow not in interrupt
+ * context, provide the next best thing, the user IP.
+ */
+ if ((counter->attr.exclude_kernel || !data.regs) &&
+ !counter->attr.exclude_user)
+ data.regs = task_pt_regs(current);
+
+ if (data.regs) {
+ if (perf_counter_overflow(counter, 0, &data))
+ ret = HRTIMER_NORESTART;
+ }
+
+ period = max_t(u64, 10000, counter->hw.sample_period);
+ hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+ return ret;
+}
+
+/*
* Software counter: cpu wall time clock
*/
@@ -3668,17 +3756,24 @@ static const struct pmu perf_ops_task_clock = {
};
#ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id)
+void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
+ int entry_size)
{
+ struct perf_raw_record raw = {
+ .size = entry_size,
+ .data = record,
+ };
+
struct perf_sample_data data = {
- .regs = get_irq_regs();
- .addr = 0,
+ .regs = get_irq_regs(),
+ .addr = addr,
+ .raw = &raw,
};
if (!data.regs)
data.regs = task_pt_regs(current);
- do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
+ do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
}
EXPORT_SYMBOL_GPL(perf_tpcounter_event);
@@ -3687,16 +3782,20 @@ extern void ftrace_profile_disable(int);
static void tp_perf_counter_destroy(struct perf_counter *counter)
{
- ftrace_profile_disable(perf_event_id(&counter->attr));
+ ftrace_profile_disable(counter->attr.config);
}
static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
- int event_id = perf_event_id(&counter->attr);
- int ret;
+ /*
+ * Raw tracepoint data is a severe data leak, only allow root to
+ * have these.
+ */
+ if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
- ret = ftrace_profile_enable(event_id);
- if (ret)
+ if (ftrace_profile_enable(counter->attr.config))
return NULL;
counter->destroy = tp_perf_counter_destroy;
@@ -3874,6 +3973,8 @@ done:
atomic_inc(&nr_mmap_counters);
if (counter->attr.comm)
atomic_inc(&nr_comm_counters);
+ if (counter->attr.task)
+ atomic_inc(&nr_task_counters);
}
return counter;
@@ -4235,8 +4336,10 @@ void perf_counter_exit_task(struct task_struct *child)
struct perf_counter_context *child_ctx;
unsigned long flags;
- if (likely(!child->perf_counter_ctxp))
+ if (likely(!child->perf_counter_ctxp)) {
+ perf_counter_task(child, NULL, 0);
return;
+ }
local_irq_save(flags);
/*
@@ -4255,17 +4358,20 @@ void perf_counter_exit_task(struct task_struct *child)
*/
spin_lock(&child_ctx->lock);
child->perf_counter_ctxp = NULL;
- if (child_ctx->parent_ctx) {
- /*
- * This context is a clone; unclone it so it can't get
- * swapped to another process while we're removing all
- * the counters from it.
- */
- put_ctx(child_ctx->parent_ctx);
- child_ctx->parent_ctx = NULL;
- }
- spin_unlock(&child_ctx->lock);
- local_irq_restore(flags);
+ /*
+ * If this context is a clone; unclone it so it can't get
+ * swapped to another process while we're removing all
+ * the counters from it.
+ */
+ unclone_ctx(child_ctx);
+ spin_unlock_irqrestore(&child_ctx->lock, flags);
+
+ /*
+ * Report the task dead after unscheduling the counters so that we
+ * won't get any samples after PERF_EVENT_EXIT. We can however still
+ * get a few PERF_EVENT_READ events.
+ */
+ perf_counter_task(child, child_ctx, 0);
/*
* We can recurse on the same lock type through: