summaryrefslogtreecommitdiff
path: root/security/landlock/tsync.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/landlock/tsync.c')
-rw-r--r--security/landlock/tsync.c204
1 files changed, 131 insertions, 73 deletions
diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c
index de01aa899751..c5730bbd9ed3 100644
--- a/security/landlock/tsync.c
+++ b/security/landlock/tsync.c
@@ -85,12 +85,14 @@ static void restrict_one_thread(struct tsync_shared_context *ctx)
/*
* Switch out old_cred with new_cred, if possible.
*
- * In the common case, where all threads initially point to the same
- * struct cred, this optimization avoids creating separate redundant
- * credentials objects for each, which would all have the same contents.
+ * In the common case, where all threads initially point to the
+ * same struct cred, this optimization avoids creating separate
+ * redundant credentials objects for each, which would all have
+ * the same contents.
*
- * Note: We are intentionally dropping the const qualifier here, because
- * it is required by commit_creds() and abort_creds().
+ * Note: We are intentionally dropping the const qualifier
+ * here, because it is required by commit_creds() and
+ * abort_creds().
*/
cred = (struct cred *)get_cred(ctx->new_cred);
} else {
@@ -101,8 +103,8 @@ static void restrict_one_thread(struct tsync_shared_context *ctx)
atomic_set(&ctx->preparation_error, -ENOMEM);
/*
- * Even on error, we need to adhere to the protocol and coordinate
- * with concurrently running invocations.
+ * Even on error, we need to adhere to the protocol and
+ * coordinate with concurrently running invocations.
*/
if (atomic_dec_return(&ctx->num_preparing) == 0)
complete_all(&ctx->all_prepared);
@@ -135,9 +137,9 @@ static void restrict_one_thread(struct tsync_shared_context *ctx)
}
/*
- * Make sure that all sibling tasks fulfill the no_new_privs prerequisite.
- * (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in
- * kernel/seccomp.c)
+ * Make sure that all sibling tasks fulfill the no_new_privs
+ * prerequisite. (This is in line with Seccomp's
+ * SECCOMP_FILTER_FLAG_TSYNC logic in kernel/seccomp.c)
*/
if (ctx->set_no_new_privs)
task_set_no_new_privs(current);
@@ -183,10 +185,8 @@ struct tsync_works {
* capacity. This can legitimately happen if new threads get started after we
* grew the capacity.
*
- * Returns:
- * A pointer to the preallocated context struct, with task filled in.
- *
- * NULL, if we ran out of preallocated context structs.
+ * Return: A pointer to the preallocated context struct with task filled in, or
+ * NULL if preallocated context structs ran out.
*/
static struct tsync_work *tsync_works_provide(struct tsync_works *s,
struct task_struct *task)
@@ -203,17 +203,49 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s,
return ctx;
}
+/**
+ * tsync_works_trim - Put the last tsync_work element
+ *
+ * @s: TSYNC works to trim.
+ *
+ * Put the last task and decrement the size of @s.
+ *
+ * This helper does not cancel a running task, but just reset the last element
+ * to zero.
+ */
+static void tsync_works_trim(struct tsync_works *s)
+{
+ struct tsync_work *ctx;
+
+ if (WARN_ON_ONCE(s->size <= 0))
+ return;
+
+ ctx = s->works[s->size - 1];
+
+ /*
+ * For consistency, remove the task from ctx so that it does not look
+ * like we handed it a task_work.
+ */
+ put_task_struct(ctx->task);
+ *ctx = (typeof(*ctx)){};
+
+ /*
+ * Cancel the tsync_works_provide() change to recycle the reserved
+ * memory for the next thread, if any. This also ensures that
+ * cancel_tsync_works() and tsync_works_release() do not see any NULL
+ * task pointers.
+ */
+ s->size--;
+}
+
/*
* tsync_works_grow_by - preallocates space for n more contexts in s
*
* On a successful return, the subsequent n calls to tsync_works_provide() are
* guaranteed to succeed. (size + n <= capacity)
*
- * Returns:
- * -ENOMEM if the (re)allocation fails
-
- * 0 if the allocation succeeds, partially succeeds, or no reallocation
- * was needed
+ * Return: 0 if sufficient space for n more elements could be provided, -ENOMEM
+ * on allocation errors, -EOVERFLOW in case of integer overflow.
*/
static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
{
@@ -256,13 +288,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
* tsync_works_contains - checks for presence of task in s
*/
static bool tsync_works_contains_task(const struct tsync_works *s,
- struct task_struct *task)
+ const struct task_struct *task)
{
size_t i;
for (i = 0; i < s->size; i++)
if (s->works[i]->task == task)
return true;
+
return false;
}
@@ -276,7 +309,7 @@ static void tsync_works_release(struct tsync_works *s)
size_t i;
for (i = 0; i < s->size; i++) {
- if (!s->works[i]->task)
+ if (WARN_ON_ONCE(!s->works[i]->task))
continue;
put_task_struct(s->works[i]->task);
@@ -284,6 +317,7 @@ static void tsync_works_release(struct tsync_works *s)
for (i = 0; i < s->capacity; i++)
kfree(s->works[i]);
+
kfree(s->works);
s->works = NULL;
s->size = 0;
@@ -295,7 +329,7 @@ static void tsync_works_release(struct tsync_works *s)
*/
static size_t count_additional_threads(const struct tsync_works *works)
{
- struct task_struct *thread, *caller;
+ const struct task_struct *caller, *thread;
size_t n = 0;
caller = current;
@@ -327,14 +361,15 @@ static size_t count_additional_threads(const struct tsync_works *works)
* For each added task_work, atomically increments shared_ctx->num_preparing and
* shared_ctx->num_unfinished.
*
- * Returns:
- * true, if at least one eligible sibling thread was found
+ * Return: True if at least one eligible sibling thread was found, false
+ * otherwise.
*/
static bool schedule_task_work(struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
int err;
- struct task_struct *thread, *caller;
+ const struct task_struct *caller;
+ struct task_struct *thread;
struct tsync_work *ctx;
bool found_more_threads = false;
@@ -356,17 +391,17 @@ static bool schedule_task_work(struct tsync_works *works,
continue;
/*
- * We found a sibling thread that is not doing its task_work yet, and
- * which might spawn new threads before our task work runs, so we need
- * at least one more round in the outer loop.
+ * We found a sibling thread that is not doing its task_work
+ * yet, and which might spawn new threads before our task work
+ * runs, so we need at least one more round in the outer loop.
*/
found_more_threads = true;
ctx = tsync_works_provide(works, thread);
if (!ctx) {
/*
- * We ran out of preallocated contexts -- we need to try again with
- * this thread at a later time!
+ * We ran out of preallocated contexts -- we need to
+ * try again with this thread at a later time!
* found_more_threads is already true at this point.
*/
break;
@@ -379,16 +414,14 @@ static bool schedule_task_work(struct tsync_works *works,
init_task_work(&ctx->work, restrict_one_thread_callback);
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
- if (err) {
+ if (unlikely(err)) {
/*
- * task_work_add() only fails if the task is about to exit. We
- * checked that earlier, but it can happen as a race. Resume
- * without setting an error, as the task is probably gone in the
- * next loop iteration. For consistency, remove the task from ctx
- * so that it does not look like we handed it a task_work.
+ * task_work_add() only fails if the task is about to
+ * exit. We checked that earlier, but it can happen as
+ * a race. Resume without setting an error, as the
+ * task is probably gone in the next loop iteration.
*/
- put_task_struct(ctx->task);
- ctx->task = NULL;
+ tsync_works_trim(works);
atomic_dec(&shared_ctx->num_preparing);
atomic_dec(&shared_ctx->num_unfinished);
@@ -406,12 +439,15 @@ static bool schedule_task_work(struct tsync_works *works,
* shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
* completions if needed, as if the task was never scheduled.
*/
-static void cancel_tsync_works(struct tsync_works *works,
+static void cancel_tsync_works(const struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
- int i;
+ size_t i;
for (i = 0; i < works->size; i++) {
+ if (WARN_ON_ONCE(!works->works[i]->task))
+ continue;
+
if (!task_work_cancel(works->works[i]->task,
&works->works[i]->work))
continue;
@@ -448,6 +484,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
shared_ctx.set_no_new_privs = task_no_new_privs(current);
/*
+ * Serialize concurrent TSYNC operations to prevent deadlocks when
+ * multiple threads call landlock_restrict_self() simultaneously.
+ * If the lock is already held, we gracefully yield by restarting the
+ * syscall. This allows the current thread to process pending
+ * task_works before retrying.
+ */
+ if (!down_write_trylock(&current->signal->exec_update_lock))
+ return restart_syscall();
+
+ /*
* We schedule a pseudo-signal task_work for each of the calling task's
* sibling threads. In the task work, each thread:
*
@@ -464,24 +510,25 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
* After this barrier is reached, it's safe to read
* shared_ctx.preparation_error.
*
- * 4) reads shared_ctx.preparation_error and then either does commit_creds()
- * or abort_creds().
+ * 4) reads shared_ctx.preparation_error and then either does
+ * commit_creds() or abort_creds().
*
* 5) signals that it's done altogether (barrier synchronization
* "all_finished")
*
- * Unlike seccomp, which modifies sibling tasks directly, we do not need to
- * acquire the cred_guard_mutex and sighand->siglock:
+ * Unlike seccomp, which modifies sibling tasks directly, we do not
+ * need to acquire the cred_guard_mutex and sighand->siglock:
*
- * - As in our case, all threads are themselves exchanging their own struct
- * cred through the credentials API, no locks are needed for that.
+ * - As in our case, all threads are themselves exchanging their own
+ * struct cred through the credentials API, no locks are needed for
+ * that.
* - Our for_each_thread() loops are protected by RCU.
- * - We do not acquire a lock to keep the list of sibling threads stable
- * between our for_each_thread loops. If the list of available sibling
- * threads changes between these for_each_thread loops, we make up for
- * that by continuing to look for threads until they are all discovered
- * and have entered their task_work, where they are unable to spawn new
- * threads.
+ * - We do not acquire a lock to keep the list of sibling threads
+ * stable between our for_each_thread loops. If the list of
+ * available sibling threads changes between these for_each_thread
+ * loops, we make up for that by continuing to look for threads until
+ * they are all discovered and have entered their task_work, where
+ * they are unable to spawn new threads.
*/
do {
/* In RCU read-lock, count the threads we need. */
@@ -498,64 +545,75 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
}
/*
- * The "all_prepared" barrier is used locally to the loop body, this use
- * of for_each_thread(). We can reset it on each loop iteration because
- * all previous loop iterations are done with it already.
+ * The "all_prepared" barrier is used locally to the loop body,
+ * this use of for_each_thread(). We can reset it on each loop
+ * iteration because all previous loop iterations are done with
+ * it already.
*
- * num_preparing is initialized to 1 so that the counter can not go to 0
- * and mark the completion as done before all task works are registered.
- * We decrement it at the end of the loop body.
+ * num_preparing is initialized to 1 so that the counter can
+ * not go to 0 and mark the completion as done before all task
+ * works are registered. We decrement it at the end of the
+ * loop body.
*/
atomic_set(&shared_ctx.num_preparing, 1);
reinit_completion(&shared_ctx.all_prepared);
/*
- * In RCU read-lock, schedule task work on newly discovered sibling
- * tasks.
+ * In RCU read-lock, schedule task work on newly discovered
+ * sibling tasks.
*/
found_more_threads = schedule_task_work(&works, &shared_ctx);
/*
- * Decrement num_preparing for current, to undo that we initialized it
- * to 1 a few lines above.
+ * Decrement num_preparing for current, to undo that we
+ * initialized it to 1 a few lines above.
*/
if (atomic_dec_return(&shared_ctx.num_preparing) > 0) {
if (wait_for_completion_interruptible(
&shared_ctx.all_prepared)) {
- /* In case of interruption, we need to retry the system call. */
+ /*
+ * In case of interruption, we need to retry
+ * the system call.
+ */
atomic_set(&shared_ctx.preparation_error,
-ERESTARTNOINTR);
/*
- * Cancel task works for tasks that did not start running yet,
- * and decrement all_prepared and num_unfinished accordingly.
+ * Opportunistic improvement: try to cancel task
+ * works for tasks that did not start running
+ * yet. We do not have a guarantee that it
+ * cancels any of the enqueued task works
+ * because task_work_run() might already have
+ * dequeued them.
*/
cancel_tsync_works(&works, &shared_ctx);
/*
- * The remaining task works have started running, so waiting for
- * their completion will finish.
+ * Break the loop with error. The cleanup code
+ * after the loop unblocks the remaining
+ * task_works.
*/
- wait_for_completion(&shared_ctx.all_prepared);
+ break;
}
}
} while (found_more_threads &&
!atomic_read(&shared_ctx.preparation_error));
/*
- * We now have all sibling threads blocking and in "prepared" state in the
- * task work. Ask all threads to commit.
+ * We now have either (a) all sibling threads blocking and in "prepared"
+ * state in the task work, or (b) the preparation error is set. Ask all
+ * threads to commit (or abort).
*/
complete_all(&shared_ctx.ready_to_commit);
/*
- * Decrement num_unfinished for current, to undo that we initialized it to 1
- * at the beginning.
+ * Decrement num_unfinished for current, to undo that we initialized it
+ * to 1 at the beginning.
*/
if (atomic_dec_return(&shared_ctx.num_unfinished) > 0)
wait_for_completion(&shared_ctx.all_finished);
tsync_works_release(&works);
-
+ up_write(&current->signal->exec_update_lock);
return atomic_read(&shared_ctx.preparation_error);
}