diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 64 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 11 | ||||
-rw-r--r-- | kernel/workqueue.c | 110 |
3 files changed, 133 insertions, 52 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d9..7fee567153f0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1253,7 +1253,7 @@ retry: /* * Cross CPU call to disable a performance event */ -static int __perf_event_disable(void *info) +int __perf_event_disable(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; @@ -2935,12 +2935,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); /* * Called when the last reference to the file is gone. */ -static int perf_release(struct inode *inode, struct file *file) +static void put_event(struct perf_event *event) { - struct perf_event *event = file->private_data; struct task_struct *owner; - file->private_data = NULL; + if (!atomic_long_dec_and_test(&event->refcount)) + return; rcu_read_lock(); owner = ACCESS_ONCE(event->owner); @@ -2975,7 +2975,13 @@ static int perf_release(struct inode *inode, struct file *file) put_task_struct(owner); } - return perf_event_release_kernel(event); + perf_event_release_kernel(event); +} + +static int perf_release(struct inode *inode, struct file *file) +{ + put_event(file->private_data); + return 0; } u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) @@ -3227,7 +3233,7 @@ unlock: static const struct file_operations perf_fops; -static struct perf_event *perf_fget_light(int fd, int *fput_needed) +static struct file *perf_fget_light(int fd, int *fput_needed) { struct file *file; @@ -3241,7 +3247,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed) return ERR_PTR(-EBADF); } - return file->private_data; + return file; } static int perf_event_set_output(struct perf_event *event, @@ -3273,19 +3279,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_SET_OUTPUT: { + struct file *output_file = NULL; struct perf_event *output_event = NULL; int fput_needed = 0; int ret; if (arg != -1) { - output_event = perf_fget_light(arg, &fput_needed); - if (IS_ERR(output_event)) - return PTR_ERR(output_event); + output_file = perf_fget_light(arg, &fput_needed); + if (IS_ERR(output_file)) + return PTR_ERR(output_file); + output_event = output_file->private_data; } ret = perf_event_set_output(event, output_event); if (output_event) - fput_light(output_event->filp, fput_needed); + fput_light(output_file, fput_needed); return ret; } @@ -5950,6 +5958,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, mutex_init(&event->mmap_mutex); + atomic_long_set(&event->refcount, 1); event->cpu = cpu; event->attr = *attr; event->group_leader = group_leader; @@ -6260,12 +6269,12 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; if (group_fd != -1) { - group_leader = perf_fget_light(group_fd, &fput_needed); - if (IS_ERR(group_leader)) { - err = PTR_ERR(group_leader); + group_file = perf_fget_light(group_fd, &fput_needed); + if (IS_ERR(group_file)) { + err = PTR_ERR(group_file); goto err_fd; } - group_file = group_leader->filp; + group_leader = group_file->private_data; if (flags & PERF_FLAG_FD_OUTPUT) output_event = group_leader; if (flags & PERF_FLAG_FD_NO_GROUP) @@ -6402,7 +6411,6 @@ SYSCALL_DEFINE5(perf_event_open, put_ctx(gctx); } - event->filp = event_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); @@ -6496,7 +6504,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err_free; } - event->filp = NULL; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_install_in_context(ctx, event, cpu); @@ -6578,7 +6585,7 @@ static void sync_child_event(struct perf_event *child_event, * Release the parent event, if this was the last * reference to it. */ - fput(parent_event->filp); + put_event(parent_event); } static void @@ -6654,9 +6661,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) * * __perf_event_exit_task() * sync_child_event() - * fput(parent_event->filp) - * perf_release() - * mutex_lock(&ctx->mutex) + * put_event() + * mutex_lock(&ctx->mutex) * * But since its the parent context it won't be the same instance. */ @@ -6724,7 +6730,7 @@ static void perf_free_event(struct perf_event *event, list_del_init(&event->child_list); mutex_unlock(&parent->child_mutex); - fput(parent->filp); + put_event(parent); perf_group_detach(event); list_del_event(event, ctx); @@ -6804,6 +6810,12 @@ inherit_event(struct perf_event *parent_event, NULL, NULL); if (IS_ERR(child_event)) return child_event; + + if (!atomic_long_inc_not_zero(&parent_event->refcount)) { + free_event(child_event); + return NULL; + } + get_ctx(child_ctx); /* @@ -6845,14 +6857,6 @@ inherit_event(struct perf_event *parent_event, raw_spin_unlock_irqrestore(&child_ctx->lock, flags); /* - * Get a reference to the parent filp - we will fput it - * when the child event exits. This is safe to do because - * we are in the parent and we know that the filp still - * exists and has a nonzero count: - */ - atomic_long_inc(&parent_event->filp->f_count); - - /* * Link this into the parent event's child list */ WARN_ON_ONCE(parent_event->ctx->parent_ctx); diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index bb38c4d3ee12..9a7b487c6fe2 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -453,7 +453,16 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att int old_type = bp->attr.bp_type; int err = 0; - perf_event_disable(bp); + /* + * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it + * will not be possible to raise IPIs that invoke __perf_event_disable. + * So call the function directly after making sure we are targeting the + * current task. + */ + if (irqs_disabled() && bp->ctx && bp->ctx->task == current) + __perf_event_disable(bp); + else + perf_event_disable(bp); bp->attr.bp_addr = attr->bp_addr; bp->attr.bp_type = attr->bp_type; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..1e1373bcb3e3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -66,6 +66,7 @@ enum { /* pool flags */ POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ + POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ /* worker flags */ WORKER_STARTED = 1 << 0, /* started */ @@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool) /* Do we have too many workers and should some go away? */ static bool too_many_workers(struct worker_pool *pool) { - bool managing = mutex_is_locked(&pool->manager_mutex); + bool managing = pool->flags & POOL_MANAGING_WORKERS; int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_busy = pool->nr_workers - nr_idle; @@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker) /* we did our part, wait for rebind_workers() to finish up */ wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); + + /* + * rebind_workers() shouldn't finish until all workers passed the + * above WORKER_REBIND wait. Tell it when done. + */ + spin_lock_irq(&worker->pool->gcwq->lock); + if (!--worker->idle_rebind->cnt) + complete(&worker->idle_rebind->done); + spin_unlock_irq(&worker->pool->gcwq->lock); } /* @@ -1396,12 +1406,15 @@ retry: /* set REBIND and kick idle ones, we'll wait for these later */ for_each_worker_pool(pool, gcwq) { list_for_each_entry(worker, &pool->idle_list, entry) { + unsigned long worker_flags = worker->flags; + if (worker->flags & WORKER_REBIND) continue; - /* morph UNBOUND to REBIND */ - worker->flags &= ~WORKER_UNBOUND; - worker->flags |= WORKER_REBIND; + /* morph UNBOUND to REBIND atomically */ + worker_flags &= ~WORKER_UNBOUND; + worker_flags |= WORKER_REBIND; + ACCESS_ONCE(worker->flags) = worker_flags; idle_rebind.cnt++; worker->idle_rebind = &idle_rebind; @@ -1419,25 +1432,15 @@ retry: goto retry; } - /* - * All idle workers are rebound and waiting for %WORKER_REBIND to - * be cleared inside idle_worker_rebind(). Clear and release. - * Clearing %WORKER_REBIND from this foreign context is safe - * because these workers are still guaranteed to be idle. - */ - for_each_worker_pool(pool, gcwq) - list_for_each_entry(worker, &pool->idle_list, entry) - worker->flags &= ~WORKER_REBIND; - - wake_up_all(&gcwq->rebind_hold); - - /* rebind busy workers */ + /* all idle workers are rebound, rebind busy workers */ for_each_busy_worker(worker, i, pos, gcwq) { struct work_struct *rebind_work = &worker->rebind_work; + unsigned long worker_flags = worker->flags; - /* morph UNBOUND to REBIND */ - worker->flags &= ~WORKER_UNBOUND; - worker->flags |= WORKER_REBIND; + /* morph UNBOUND to REBIND atomically */ + worker_flags &= ~WORKER_UNBOUND; + worker_flags |= WORKER_REBIND; + ACCESS_ONCE(worker->flags) = worker_flags; if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(rebind_work))) @@ -1449,6 +1452,34 @@ retry: worker->scheduled.next, work_color_to_flags(WORK_NO_COLOR)); } + + /* + * All idle workers are rebound and waiting for %WORKER_REBIND to + * be cleared inside idle_worker_rebind(). Clear and release. + * Clearing %WORKER_REBIND from this foreign context is safe + * because these workers are still guaranteed to be idle. + * + * We need to make sure all idle workers passed WORKER_REBIND wait + * in idle_worker_rebind() before returning; otherwise, workers can + * get stuck at the wait if hotplug cycle repeats. + */ + idle_rebind.cnt = 1; + INIT_COMPLETION(idle_rebind.done); + + for_each_worker_pool(pool, gcwq) { + list_for_each_entry(worker, &pool->idle_list, entry) { + worker->flags &= ~WORKER_REBIND; + idle_rebind.cnt++; + } + } + + wake_up_all(&gcwq->rebind_hold); + + if (--idle_rebind.cnt) { + spin_unlock_irq(&gcwq->lock); + wait_for_completion(&idle_rebind.done); + spin_lock_irq(&gcwq->lock); + } } static struct worker *alloc_worker(void) @@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker) struct worker_pool *pool = worker->pool; bool ret = false; - if (!mutex_trylock(&pool->manager_mutex)) + if (pool->flags & POOL_MANAGING_WORKERS) return ret; + pool->flags |= POOL_MANAGING_WORKERS; + + /* + * To simplify both worker management and CPU hotplug, hold off + * management while hotplug is in progress. CPU hotplug path can't + * grab %POOL_MANAGING_WORKERS to achieve this because that can + * lead to idle worker depletion (all become busy thinking someone + * else is managing) which in turn can result in deadlock under + * extreme circumstances. Use @pool->manager_mutex to synchronize + * manager against CPU hotplug. + * + * manager_mutex would always be free unless CPU hotplug is in + * progress. trylock first without dropping @gcwq->lock. + */ + if (unlikely(!mutex_trylock(&pool->manager_mutex))) { + spin_unlock_irq(&pool->gcwq->lock); + mutex_lock(&pool->manager_mutex); + /* + * CPU hotplug could have happened while we were waiting + * for manager_mutex. Hotplug itself can't handle us + * because manager isn't either on idle or busy list, and + * @gcwq's state and ours could have deviated. + * + * As hotplug is now excluded via manager_mutex, we can + * simply try to bind. It will succeed or fail depending + * on @gcwq's current state. Try it and adjust + * %WORKER_UNBOUND accordingly. + */ + if (worker_maybe_bind_and_lock(worker)) + worker->flags &= ~WORKER_UNBOUND; + else + worker->flags |= WORKER_UNBOUND; + + ret = true; + } + pool->flags &= ~POOL_MANAGE_WORKERS; /* @@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker) ret |= maybe_destroy_workers(pool); ret |= maybe_create_worker(pool); + pool->flags &= ~POOL_MANAGING_WORKERS; mutex_unlock(&pool->manager_mutex); return ret; } |