summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorVignesh Raghavendra <vigneshr@ti.com>2023-01-28 13:58:28 +0530
committerVignesh Raghavendra <vigneshr@ti.com>2023-01-28 13:58:37 +0530
commit0e60f588dbe43d7b2353e75f943e75e0cc5bb59c (patch)
treeab54e92e77ec08fba74cb45ab838e1d55a99e2cc /include
parentd4b479823b2fc569ff0219ea72c940079df404b5 (diff)
parent0fe4548663f7dc2c3b549ef54ce9bb6d40a235be (diff)
Merge tag 'v5.10.162' of https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux into ti-linux-5.10.y-cicd
This is the 5.10.162 stable release * tag 'v5.10.162' of https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux: (198 commits) Linux 5.10.162 io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups eventfd: provide a eventfd_signal_mask() helper eventpoll: add EPOLL_URING_WAKE poll wakeup flag Revert "proc: don't allow async path resolution of /proc/self components" Revert "proc: don't allow async path resolution of /proc/thread-self components" net: remove cmsg restriction from io_uring based send/recvmsg calls task_work: unconditionally run task_work from get_signal() signal: kill JOBCTL_TASK_WORK io_uring: import 5.15-stable io_uring task_work: add helper for more targeted task_work canceling kernel: don't call do_exit() for PF_IO_WORKER threads kernel: stop masking signals in create_io_thread() x86/process: setup io_threads more like normal user space threads arch: ensure parisc/powerpc handle PF_IO_WORKER in copy_thread() arch: setup PF_IO_WORKER threads like PF_KTHREAD entry/kvm: Exit to user mode when TIF_NOTIFY_SIGNAL is set kernel: allow fork with TIF_NOTIFY_SIGNAL pending coredump: Limit what can interrupt coredumps kernel: remove checking for TIF_NOTIFY_SIGNAL ... Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/tlb.h4
-rw-r--r--include/linux/can/platform/sja1000.h2
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/entry-common.h7
-rw-r--r--include/linux/entry-kvm.h4
-rw-r--r--include/linux/eventfd.h7
-rw-r--r--include/linux/fcntl.h2
-rw-r--r--include/linux/fdtable.h2
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/io_uring.h46
-rw-r--r--include/linux/mc146818rtc.h6
-rw-r--r--include/linux/namei.h1
-rw-r--r--include/linux/net.h3
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/sched/jobctl.h4
-rw-r--r--include/linux/sched/signal.h18
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/socket.h4
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/task_work.h2
-rw-r--r--include/linux/tracehook.h23
-rw-r--r--include/linux/uio.h15
-rw-r--r--include/trace/events/io_uring.h121
-rw-r--r--include/uapi/linux/eventpoll.h6
-rw-r--r--include/uapi/linux/io_uring.h115
-rw-r--r--include/uapi/linux/openat2.h4
27 files changed, 302 insertions, 118 deletions
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index a0c4b99d2899..f40c9534f20b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -205,12 +205,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#define tlb_needs_table_invalidate() (true)
#endif
+void tlb_remove_table_sync_one(void);
+
#else
#ifdef tlb_needs_table_invalidate
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
#endif
+static inline void tlb_remove_table_sync_one(void) { }
+
#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 5755ae5a4712..6a869682c120 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -14,7 +14,7 @@
#define OCR_MODE_TEST 0x01
#define OCR_MODE_NORMAL 0x02
#define OCR_MODE_CLOCK 0x03
-#define OCR_MODE_MASK 0x07
+#define OCR_MODE_MASK 0x03
#define OCR_TX0_INVERT 0x04
#define OCR_TX0_PULLDOWN 0x08
#define OCR_TX0_PULLUP 0x10
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 618838c48313..959b370733f0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -68,6 +68,7 @@ struct css_task_iter {
struct list_head iters_node; /* css_set->task_iters */
};
+extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 7dff07713a07..46c42479f950 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -69,7 +69,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \
+ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
@@ -259,12 +259,13 @@ static __always_inline void arch_exit_to_user_mode(void) { }
#endif
/**
- * arch_do_signal - Architecture specific signal delivery function
+ * arch_do_signal_or_restart - Architecture specific signal delivery function
* @regs: Pointer to currents pt_regs
+ * @has_signal: actual signal to handle
*
* Invoked from exit_to_user_mode_loop().
*/
-void arch_do_signal(struct pt_regs *regs);
+void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
/**
* arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 0cef17afb41a..9b93f8584ff7 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -11,8 +11,8 @@
# define ARCH_XFER_TO_GUEST_MODE_WORK (0)
#endif
-#define XFER_TO_GUEST_MODE_WORK \
- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define XFER_TO_GUEST_MODE_WORK \
+ (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu;
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index dc4fd8a6644d..ce1cf42740bf 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -39,6 +39,7 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);
@@ -66,6 +67,12 @@ static inline int eventfd_signal(struct eventfd_ctx *ctx, int n)
return -ENOSYS;
}
+static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
+ unsigned mask)
+{
+ return -ENOSYS;
+}
+
static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
{
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 921e750843e6..766fcd973beb 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -19,7 +19,7 @@
/* List of all valid flags for the how->resolve argument: */
#define VALID_RESOLVE_FLAGS \
(RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \
- RESOLVE_BENEATH | RESOLVE_IN_ROOT)
+ RESOLVE_BENEATH | RESOLVE_IN_ROOT | RESOLVE_CACHED)
/* List of all open_how "versions". */
#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index a32bf47c593e..f1a99d3e5570 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -123,7 +123,7 @@ extern void __fd_install(struct files_struct *files,
extern int __close_fd(struct files_struct *files,
unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
-extern int __close_fd_get_file(unsigned int fd, struct file **res);
+extern int close_fd_get_file(unsigned int fd, struct file **res);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index df54acdd3554..ebfc0b2b4969 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1817,6 +1817,14 @@ struct dir_context {
*/
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
+/*
+ * These flags control the behavior of vfs_copy_file_range().
+ * They are not available to the user via syscall.
+ *
+ * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
+ */
+#define COPY_FILE_SPLICE (1 << 0)
+
struct iov_iter;
struct file_operations {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b9fbb6d4150e..955b19dc28a8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -174,8 +174,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
struct page *follow_huge_pd(struct vm_area_struct *vma,
unsigned long address, hugepd_t hpd,
int flags, int pdshift);
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int flags);
+struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address,
+ int flags);
struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int flags);
struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
@@ -261,8 +261,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma,
return NULL;
}
-static inline struct page *follow_huge_pmd(struct mm_struct *mm,
- unsigned long address, pmd_t *pmd, int flags)
+static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma,
+ unsigned long address, int flags)
{
return NULL;
}
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 35b2d845704d..649a4d7c241b 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -5,50 +5,20 @@
#include <linux/sched.h>
#include <linux/xarray.h>
-struct io_identity {
- struct files_struct *files;
- struct mm_struct *mm;
-#ifdef CONFIG_BLK_CGROUP
- struct cgroup_subsys_state *blkcg_css;
-#endif
- const struct cred *creds;
- struct nsproxy *nsproxy;
- struct fs_struct *fs;
- unsigned long fsize;
-#ifdef CONFIG_AUDIT
- kuid_t loginuid;
- unsigned int sessionid;
-#endif
- refcount_t count;
-};
-
-struct io_uring_task {
- /* submission side */
- struct xarray xa;
- struct wait_queue_head wait;
- struct file *last;
- struct percpu_counter inflight;
- struct io_identity __identity;
- struct io_identity *identity;
- atomic_t in_idle;
- bool sqpoll;
-};
-
#if defined(CONFIG_IO_URING)
struct sock *io_uring_get_socket(struct file *file);
-void __io_uring_task_cancel(void);
-void __io_uring_files_cancel(struct files_struct *files);
+void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
-static inline void io_uring_task_cancel(void)
+static inline void io_uring_files_cancel(void)
{
- if (current->io_uring && !xa_empty(&current->io_uring->xa))
- __io_uring_task_cancel();
+ if (current->io_uring)
+ __io_uring_cancel(false);
}
-static inline void io_uring_files_cancel(struct files_struct *files)
+static inline void io_uring_task_cancel(void)
{
- if (current->io_uring && !xa_empty(&current->io_uring->xa))
- __io_uring_files_cancel(files);
+ if (current->io_uring)
+ __io_uring_cancel(true);
}
static inline void io_uring_free(struct task_struct *tsk)
{
@@ -63,7 +33,7 @@ static inline struct sock *io_uring_get_socket(struct file *file)
static inline void io_uring_task_cancel(void)
{
}
-static inline void io_uring_files_cancel(struct files_struct *files)
+static inline void io_uring_files_cancel(void)
{
}
static inline void io_uring_free(struct task_struct *tsk)
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 1e0205811394..b0da04fe087b 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -125,7 +125,11 @@ struct cmos_rtc_board_info {
#define RTC_IO_EXTENT_USED RTC_IO_EXTENT
#endif /* ARCH_RTC_LOCATION */
-unsigned int mc146818_get_time(struct rtc_time *time);
+bool mc146818_does_rtc_work(void);
+int mc146818_get_time(struct rtc_time *time);
int mc146818_set_time(struct rtc_time *time);
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+ void *param);
+
#endif /* _MC146818RTC_H */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a4bb992623c4..b9605b2b46e7 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */
#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
+#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)
diff --git a/include/linux/net.h b/include/linux/net.h
index 0dcd51feef02..ae713c851342 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,8 +42,6 @@ struct net;
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
-#define PROTO_CMSG_DATA_ONLY 0x0001
-
#ifndef ARCH_HAS_SOCKET_TYPES
/**
* enum sock_type - Socket types
@@ -138,7 +136,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
struct proto_ops {
int family;
- unsigned int flags;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b055c217eb0b..5da4b3c89f63 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -885,6 +885,9 @@ struct task_struct {
/* CLONE_CHILD_CLEARTID: */
int __user *clear_child_tid;
+ /* PF_IO_WORKER */
+ void *pf_io_worker;
+
u64 utime;
u64 stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index d2b4204ba4d3..fa067de9f1a9 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -19,7 +19,6 @@ struct task_struct;
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
-#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */
#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
@@ -29,10 +28,9 @@ struct task_struct;
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
-#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT)
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
-#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK)
+#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
extern void task_clear_jobctl_trapping(struct task_struct *task);
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 657640015b33..ae60f838ebb9 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -354,11 +354,23 @@ static inline int restart_syscall(void)
return -ERESTARTNOINTR;
}
-static inline int signal_pending(struct task_struct *p)
+static inline int task_sigpending(struct task_struct *p)
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
+static inline int signal_pending(struct task_struct *p)
+{
+ /*
+ * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
+ * behavior in terms of ensuring that we break out of wait loops
+ * so that notify signal callbacks can be processed.
+ */
+ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
+ return 1;
+ return task_sigpending(p);
+}
+
static inline int __fatal_signal_pending(struct task_struct *p)
{
return unlikely(sigismember(&p->pending.signal, SIGKILL));
@@ -366,7 +378,7 @@ static inline int __fatal_signal_pending(struct task_struct *p)
static inline int fatal_signal_pending(struct task_struct *p)
{
- return signal_pending(p) && __fatal_signal_pending(p);
+ return task_sigpending(p) && __fatal_signal_pending(p);
}
static inline int signal_pending_state(long state, struct task_struct *p)
@@ -503,7 +515,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize);
static inline void restore_saved_sigmask_unless(bool interrupted)
{
if (interrupted)
- WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+ WARN_ON(!signal_pending(current));
else
restore_saved_sigmask();
}
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index eeacb4a16fe3..4ce511437a8a 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -31,6 +31,7 @@ struct kernel_clone_args {
/* Number of elements in *set_tid */
size_t set_tid_size;
int cgroup;
+ int io_thread;
struct cgroup *cgrp;
struct css_set *cset;
};
@@ -85,6 +86,7 @@ extern void exit_files(struct task_struct *);
extern void exit_itimers(struct task_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9a6dd92e60c0..72794d115364 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -424,6 +424,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags,
unsigned long nofile);
+extern struct file *do_accept(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
@@ -439,5 +442,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec);
+extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how);
#endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index aea0ce9f3b74..a058c96cf213 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -341,7 +341,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
struct io_uring_params __user *p);
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
u32 min_complete, u32 flags,
- const sigset_t __user *sig, size_t sigsz);
+ const void __user *argp, size_t argsz);
asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
void __user *arg, unsigned int nr_args);
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 0d848a1e9e62..5b8a93f288bb 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -22,6 +22,8 @@ enum task_work_notify_mode {
int task_work_add(struct task_struct *task, struct callback_head *twork,
enum task_work_notify_mode mode);
+struct callback_head *task_work_cancel_match(struct task_struct *task,
+ bool (*match)(struct callback_head *, void *data), void *data);
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void);
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b480e1a07ed8..ee9ab7dbc8c3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -198,4 +198,27 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
blkcg_maybe_throttle_current();
}
+/*
+ * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This
+ * is currently used by TWA_SIGNAL based task_work, which requires breaking
+ * wait loops to ensure that task_work is noticed and run.
+ */
+static inline void tracehook_notify_signal(void)
+{
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
+ smp_mb__after_atomic();
+ if (current->task_works)
+ task_work_run();
+}
+
+/*
+ * Called when we have work to process from exit_to_user_mode_loop()
+ */
+static inline void set_notify_signal(struct task_struct *task)
+{
+ if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+ !wake_up_state(task, TASK_INTERRUPTIBLE))
+ kick_process(task);
+}
+
#endif /* <linux/tracehook.h> */
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 27ff8eb786dc..cedb68e49e4f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -26,6 +26,12 @@ enum iter_type {
ITER_DISCARD = 64,
};
+struct iov_iter_state {
+ size_t iov_offset;
+ size_t count;
+ unsigned long nr_segs;
+};
+
struct iov_iter {
/*
* Bit 0 is the read/write bit, set if we're writing.
@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
return i->type & ~(READ | WRITE);
}
+static inline void iov_iter_save_state(struct iov_iter *iter,
+ struct iov_iter_state *state)
+{
+ state->iov_offset = iter->iov_offset;
+ state->count = iter->count;
+ state->nr_segs = iter->nr_segs;
+}
+
static inline bool iter_is_iovec(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_IOVEC;
@@ -226,6 +240,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 9f0d3b7d56b0..0dd30de00e5b 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -12,11 +12,11 @@ struct io_wq_work;
/**
* io_uring_create - called after a new io_uring context was prepared
*
- * @fd: corresponding file descriptor
- * @ctx: pointer to a ring context structure
+ * @fd: corresponding file descriptor
+ * @ctx: pointer to a ring context structure
* @sq_entries: actual SQ size
* @cq_entries: actual CQ size
- * @flags: SQ ring flags, provided to io_uring_setup(2)
+ * @flags: SQ ring flags, provided to io_uring_setup(2)
*
* Allows to trace io_uring creation and provide pointer to a context, that can
* be used later to find correlated events.
@@ -49,15 +49,15 @@ TRACE_EVENT(io_uring_create,
);
/**
- * io_uring_register - called after a buffer/file/eventfd was succesfully
+ * io_uring_register - called after a buffer/file/eventfd was successfully
* registered for a ring
*
- * @ctx: pointer to a ring context structure
- * @opcode: describes which operation to perform
+ * @ctx: pointer to a ring context structure
+ * @opcode: describes which operation to perform
* @nr_user_files: number of registered files
* @nr_user_bufs: number of registered buffers
* @cq_ev_fd: whether eventfs registered or not
- * @ret: return code
+ * @ret: return code
*
* Allows to trace fixed files/buffers/eventfds, that could be registered to
* avoid an overhead of getting references to them for every operation. This
@@ -142,16 +142,16 @@ TRACE_EVENT(io_uring_queue_async_work,
TP_ARGS(ctx, rw, req, work, flags),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( int, rw )
- __field( void *, req )
+ __field( void *, ctx )
+ __field( int, rw )
+ __field( void *, req )
__field( struct io_wq_work *, work )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->ctx = ctx;
- __entry->rw = rw;
+ __entry->rw = rw;
__entry->req = req;
__entry->work = work;
__entry->flags = flags;
@@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer,
/**
* io_uring_link - called before the io_uring request added into link_list of
- * another request
+ * another request
*
- * @ctx: pointer to a ring context structure
- * @req: pointer to a linked request
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to a linked request
* @target_req: pointer to a previous request, that would contain @req
*
* Allows to track linked requests, to understand dependencies between requests
@@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link,
TP_ARGS(ctx, req, target_req),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( void *, req )
+ __field( void *, ctx )
+ __field( void *, req )
__field( void *, target_req )
),
@@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait,
TP_ARGS(ctx, min_events),
TP_STRUCT__entry (
- __field( void *, ctx )
+ __field( void *, ctx )
__field( int, min_events )
),
@@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link,
TP_ARGS(req, link),
TP_STRUCT__entry (
- __field( void *, req )
+ __field( void *, req )
__field( void *, link )
),
@@ -290,38 +290,42 @@ TRACE_EVENT(io_uring_fail_link,
* @ctx: pointer to a ring context structure
* @user_data: user data associated with the request
* @res: result of the request
+ * @cflags: completion flags
*
*/
TRACE_EVENT(io_uring_complete,
- TP_PROTO(void *ctx, u64 user_data, long res),
+ TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
- TP_ARGS(ctx, user_data, res),
+ TP_ARGS(ctx, user_data, res, cflags),
TP_STRUCT__entry (
__field( void *, ctx )
__field( u64, user_data )
- __field( long, res )
+ __field( int, res )
+ __field( unsigned, cflags )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->user_data = user_data;
__entry->res = res;
+ __entry->cflags = cflags;
),
- TP_printk("ring %p, user_data 0x%llx, result %ld",
+ TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
__entry->ctx, (unsigned long long)__entry->user_data,
- __entry->res)
+ __entry->res, __entry->cflags)
);
-
/**
* io_uring_submit_sqe - called before submitting one SQE
*
* @ctx: pointer to a ring context structure
+ * @req: pointer to a submitted request
* @opcode: opcode of request
* @user_data: user data associated with the request
+ * @flags request flags
* @force_nonblock: whether a context blocking or not
* @sq_thread: true if sq_thread has submitted this SQE
*
@@ -330,41 +334,60 @@ TRACE_EVENT(io_uring_complete,
*/
TRACE_EVENT(io_uring_submit_sqe,
- TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock,
- bool sq_thread),
+ TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
+ bool force_nonblock, bool sq_thread),
- TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread),
+ TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u8, opcode )
__field( u64, user_data )
+ __field( u32, flags )
__field( bool, force_nonblock )
__field( bool, sq_thread )
),
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data;
+ __entry->flags = flags;
__entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread;
),
- TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data,
- __entry->force_nonblock, __entry->sq_thread)
+ TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
+ "non block %d, sq_thread %d", __entry->ctx, __entry->req,
+ __entry->opcode, (unsigned long long)__entry->user_data,
+ __entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
+/*
+ * io_uring_poll_arm - called after arming a poll wait if successful
+ *
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to the armed request
+ * @opcode: opcode of request
+ * @user_data: user data associated with the request
+ * @mask: request poll events mask
+ * @events: registered events of interest
+ *
+ * Allows to track which fds are waiting for and what are the events of
+ * interest.
+ */
TRACE_EVENT(io_uring_poll_arm,
- TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
+ TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
+ int mask, int events),
- TP_ARGS(ctx, opcode, user_data, mask, events),
+ TP_ARGS(ctx, req, opcode, user_data, mask, events),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u8, opcode )
__field( u64, user_data )
__field( int, mask )
@@ -373,16 +396,17 @@ TRACE_EVENT(io_uring_poll_arm,
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data;
__entry->mask = mask;
__entry->events = events;
),
- TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data,
- __entry->mask, __entry->events)
+ TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->req, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask, __entry->events)
);
TRACE_EVENT(io_uring_poll_wake,
@@ -437,27 +461,40 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask)
);
+/*
+ * io_uring_task_run - called when task_work_run() executes the poll events
+ * notification callbacks
+ *
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to the armed request
+ * @opcode: opcode of request
+ * @user_data: user data associated with the request
+ *
+ * Allows to track when notified poll events are processed
+ */
TRACE_EVENT(io_uring_task_run,
- TP_PROTO(void *ctx, u8 opcode, u64 user_data),
+ TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
- TP_ARGS(ctx, opcode, user_data),
+ TP_ARGS(ctx, req, opcode, user_data),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u8, opcode )
__field( u64, user_data )
),
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data;
),
- TP_printk("ring %p, op %d, data 0x%llx",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data)
+ TP_printk("ring %p, req %p, op %d, data 0x%llx",
+ __entry->ctx, __entry->req, __entry->opcode,
+ (unsigned long long) __entry->user_data)
);
#endif /* _TRACE_IO_URING_H */
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 8a3432d0f0dc..e687658843b1 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -41,6 +41,12 @@
#define EPOLLMSG (__force __poll_t)0x00000400
#define EPOLLRDHUP (__force __poll_t)0x00002000
+/*
+ * Internal flag - wakeup generated by io_uring, used to detect recursion back
+ * into the io_uring poll handler.
+ */
+#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27))
+
/* Set exclusive wakeup mode for the target file descriptor */
#define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28))
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 98d8e06dea22..6481db937002 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -42,23 +42,25 @@ struct io_uring_sqe {
__u32 statx_flags;
__u32 fadvise_advice;
__u32 splice_flags;
+ __u32 rename_flags;
+ __u32 unlink_flags;
+ __u32 hardlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
+ /* pack this to avoid bogus arm OABI complaints */
union {
- struct {
- /* pack this to avoid bogus arm OABI complaints */
- union {
- /* index into fixed buffers, if used */
- __u16 buf_index;
- /* for grouped buffer selection */
- __u16 buf_group;
- } __attribute__((packed));
- /* personality to use, if used */
- __u16 personality;
- __s32 splice_fd_in;
- };
- __u64 __pad2[3];
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
+ /* personality to use, if used */
+ __u16 personality;
+ union {
+ __s32 splice_fd_in;
+ __u32 file_index;
};
+ __u64 __pad2[2];
};
enum {
@@ -132,6 +134,9 @@ enum {
IORING_OP_PROVIDE_BUFFERS,
IORING_OP_REMOVE_BUFFERS,
IORING_OP_TEE,
+ IORING_OP_SHUTDOWN,
+ IORING_OP_RENAMEAT,
+ IORING_OP_UNLINKAT,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -145,8 +150,13 @@ enum {
/*
* sqe->timeout_flags
*/
-#define IORING_TIMEOUT_ABS (1U << 0)
-
+#define IORING_TIMEOUT_ABS (1U << 0)
+#define IORING_TIMEOUT_UPDATE (1U << 1)
+#define IORING_TIMEOUT_BOOTTIME (1U << 2)
+#define IORING_TIMEOUT_REALTIME (1U << 3)
+#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
+#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
* sqe->splice_flags
* extends splice(2) flags
@@ -154,6 +164,21 @@ enum {
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
/*
+ * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
+ * command flags for POLL_ADD are stored in sqe->len.
+ *
+ * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
+ * the poll handler will continue to report
+ * CQEs on behalf of the same SQE.
+ *
+ * IORING_POLL_UPDATE Update existing poll request, matching
+ * sqe->addr as the old user_data field.
+ */
+#define IORING_POLL_ADD_MULTI (1U << 0)
+#define IORING_POLL_UPDATE_EVENTS (1U << 1)
+#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
+
+/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
@@ -166,8 +191,10 @@ struct io_uring_cqe {
* cqe->flags
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
*/
#define IORING_CQE_F_BUFFER (1U << 0)
+#define IORING_CQE_F_MORE (1U << 1)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
@@ -226,6 +253,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2)
+#define IORING_ENTER_EXT_ARG (1U << 3)
/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -253,6 +281,10 @@ struct io_uring_params {
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5)
#define IORING_FEAT_POLL_32BITS (1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
+#define IORING_FEAT_EXT_ARG (1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
+#define IORING_FEAT_RSRC_TAGS (1U << 10)
/*
* io_uring_register(2) opcodes and arguments
@@ -272,16 +304,62 @@ enum {
IORING_REGISTER_RESTRICTIONS = 11,
IORING_REGISTER_ENABLE_RINGS = 12,
+ /* extended with tagging */
+ IORING_REGISTER_FILES2 = 13,
+ IORING_REGISTER_FILES_UPDATE2 = 14,
+ IORING_REGISTER_BUFFERS2 = 15,
+ IORING_REGISTER_BUFFERS_UPDATE = 16,
+
+ /* set/clear io-wq thread affinities */
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,
+
+ /* set/get max number of io-wq workers */
+ IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+
/* this goes last */
IORING_REGISTER_LAST
};
+/* io-wq worker categories */
+enum {
+ IO_WQ_BOUND,
+ IO_WQ_UNBOUND,
+};
+
+/* deprecated, see struct io_uring_rsrc_update */
struct io_uring_files_update {
__u32 offset;
__u32 resv;
__aligned_u64 /* __s32 * */ fds;
};
+struct io_uring_rsrc_register {
+ __u32 nr;
+ __u32 resv;
+ __u64 resv2;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+};
+
+struct io_uring_rsrc_update {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+};
+
+struct io_uring_rsrc_update2 {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+ __u32 nr;
+ __u32 resv2;
+};
+
+/* Skip updating fd indexes set to this value in the fd table */
+#define IORING_REGISTER_FILES_SKIP (-2)
+
#define IO_URING_OP_SUPPORTED (1U << 0)
struct io_uring_probe_op {
@@ -329,4 +407,11 @@ enum {
IORING_RESTRICTION_LAST
};
+struct io_uring_getevents_arg {
+ __u64 sigmask;
+ __u32 sigmask_sz;
+ __u32 pad;
+ __u64 ts;
+};
+
#endif
diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h
index 58b1eb711360..a5feb7604948 100644
--- a/include/uapi/linux/openat2.h
+++ b/include/uapi/linux/openat2.h
@@ -35,5 +35,9 @@ struct open_how {
#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
be scoped inside the dirfd
(similar to chroot(2)). */
+#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
+ completed through cached lookup. May
+ return -EAGAIN if that's not
+ possible. */
#endif /* _UAPI_LINUX_OPENAT2_H */