summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/blk-mq.h32
-rw-r--r--include/linux/blkdev.h8
-rw-r--r--include/linux/call_once.h47
-rw-r--r--include/linux/cgroup-defs.h6
-rw-r--r--include/linux/cleanup.h2
-rw-r--r--include/linux/compaction.h5
-rw-r--r--include/linux/compiler.h34
-rw-r--r--include/linux/cred.h10
-rw-r--r--include/linux/damon.h5
-rw-r--r--include/linux/dcache.h39
-rw-r--r--include/linux/device/faux.h69
-rw-r--r--include/linux/efi.h31
-rw-r--r--include/linux/eventpoll.h4
-rw-r--r--include/linux/fanotify.h12
-rw-r--r--include/linux/fs.h61
-rw-r--r--include/linux/fs_context.h2
-rw-r--r--include/linux/fsnotify.h45
-rw-r--r--include/linux/fsnotify_backend.h42
-rw-r--r--include/linux/hrtimer_defs.h1
-rw-r--r--include/linux/hugetlb.h9
-rw-r--r--include/linux/i2c.h10
-rw-r--r--include/linux/iomap.h116
-rw-r--r--include/linux/jiffies.h2
-rw-r--r--include/linux/key.h1
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/lockref.h7
-rw-r--r--include/linux/log2.h2
-rw-r--r--include/linux/mm.h11
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mnt_idmapping.h5
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/namei.h45
-rw-r--r--include/linux/netdevice.h11
-rw-r--r--include/linux/netfs.h2
-rw-r--r--include/linux/nfs4.h1
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/linux/nvme-tcp.h2
-rw-r--r--include/linux/nvme.h40
-rw-r--r--include/linux/pagemap.h20
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/pidfs.h1
-rw-r--r--include/linux/pipe_fs_i.h90
-rw-r--r--include/linux/platform_profile.h3
-rw-r--r--include/linux/proc_fs.h7
-rw-r--r--include/linux/psp-sev.h9
-rw-r--r--include/linux/rcuref.h9
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/task.h1
-rw-r--r--include/linux/skmsg.h2
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/linux/string.h12
-rw-r--r--include/linux/sunrpc/sched.h3
-rw-r--r--include/linux/swap_cgroup.h4
-rw-r--r--include/linux/syscalls.h8
-rw-r--r--include/linux/sysv_fs.h214
-rw-r--r--include/linux/uidgid.h6
-rw-r--r--include/linux/vfsdebug.h45
-rw-r--r--include/linux/wait.h3
59 files changed, 739 insertions, 439 deletions
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 9ebb53f031cd..aba9c24486aa 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -28,7 +28,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);
typedef __u32 __bitwise req_flags_t;
/* Keep rqf_name[] in sync with the definitions below */
-enum {
+enum rqf_flags {
/* drive already may have started this one */
__RQF_STARTED,
/* request for flush sequence */
@@ -852,21 +852,39 @@ static inline bool blk_mq_is_reserved_rq(struct request *rq)
return rq->rq_flags & RQF_RESV;
}
-/*
+/**
+ * blk_mq_add_to_batch() - add a request to the completion batch
+ * @req: The request to add to batch
+ * @iob: The batch to add the request
+ * @is_error: Specify true if the request failed with an error
+ * @complete: The completaion handler for the request
+ *
* Batched completions only work when there is no I/O error and no special
* ->end_io handler.
+ *
+ * Return: true when the request was added to the batch, otherwise false
*/
static inline bool blk_mq_add_to_batch(struct request *req,
- struct io_comp_batch *iob, int ioerror,
+ struct io_comp_batch *iob, bool is_error,
void (*complete)(struct io_comp_batch *))
{
/*
- * blk_mq_end_request_batch() can't end request allocated from
- * sched tags
+ * Check various conditions that exclude batch processing:
+ * 1) No batch container
+ * 2) Has scheduler data attached
+ * 3) Not a passthrough request and end_io set
+ * 4) Not a passthrough request and failed with an error
*/
- if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror ||
- (req->end_io && !blk_rq_is_passthrough(req)))
+ if (!iob)
+ return false;
+ if (req->rq_flags & RQF_SCHED_TAGS)
return false;
+ if (!blk_rq_is_passthrough(req)) {
+ if (req->end_io)
+ return false;
+ if (is_error)
+ return false;
+ }
if (!iob->complete)
iob->complete = complete;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a97428e8bbbe..1c0cf6af392c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,10 +196,11 @@ struct gendisk {
unsigned int zone_capacity;
unsigned int last_zone_capacity;
unsigned long __rcu *conv_zones_bitmap;
- unsigned int zone_wplugs_hash_bits;
- spinlock_t zone_wplugs_lock;
+ unsigned int zone_wplugs_hash_bits;
+ atomic_t nr_zone_wplugs;
+ spinlock_t zone_wplugs_lock;
struct mempool_s *zone_wplugs_pool;
- struct hlist_head *zone_wplugs_hash;
+ struct hlist_head *zone_wplugs_hash;
struct workqueue_struct *zone_wplugs_wq;
#endif /* CONFIG_BLK_DEV_ZONED */
@@ -373,6 +374,7 @@ struct queue_limits {
unsigned int max_sectors;
unsigned int max_user_sectors;
unsigned int max_segment_size;
+ unsigned int min_segment_size;
unsigned int physical_block_size;
unsigned int logical_block_size;
unsigned int alignment_offset;
diff --git a/include/linux/call_once.h b/include/linux/call_once.h
index 6261aa0b3fb0..13cd6469e7e5 100644
--- a/include/linux/call_once.h
+++ b/include/linux/call_once.h
@@ -26,20 +26,41 @@ do { \
__once_init((once), #once, &__key); \
} while (0)
-static inline void call_once(struct once *once, void (*cb)(struct once *))
+/*
+ * call_once - Ensure a function has been called exactly once
+ *
+ * @once: Tracking struct
+ * @cb: Function to be called
+ *
+ * If @once has never completed successfully before, call @cb and, if
+ * it returns a zero or positive value, mark @once as completed. Return
+ * the value returned by @cb
+ *
+ * If @once has completed succesfully before, return 0.
+ *
+ * The call to @cb is implicitly surrounded by a mutex, though for
+ * efficiency the * function avoids taking it after the first call.
+ */
+static inline int call_once(struct once *once, int (*cb)(struct once *))
{
- /* Pairs with atomic_set_release() below. */
- if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
- return;
-
- guard(mutex)(&once->lock);
- WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
- if (atomic_read(&once->state) != ONCE_NOT_STARTED)
- return;
-
- atomic_set(&once->state, ONCE_RUNNING);
- cb(once);
- atomic_set_release(&once->state, ONCE_COMPLETED);
+ int r, state;
+
+ /* Pairs with atomic_set_release() below. */
+ if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
+ return 0;
+
+ guard(mutex)(&once->lock);
+ state = atomic_read(&once->state);
+ if (unlikely(state != ONCE_NOT_STARTED))
+ return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0;
+
+ atomic_set(&once->state, ONCE_RUNNING);
+ r = cb(once);
+ if (r < 0)
+ atomic_set(&once->state, ONCE_NOT_STARTED);
+ else
+ atomic_set_release(&once->state, ONCE_COMPLETED);
+ return r;
}
#endif /* _LINUX_CALL_ONCE_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1b20d2d8ef7c..17960a1e858d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -71,9 +71,6 @@ enum {
/* Cgroup is frozen. */
CGRP_FROZEN,
-
- /* Control group has to be killed. */
- CGRP_KILL,
};
/* cgroup_root->flags */
@@ -461,6 +458,9 @@ struct cgroup {
int nr_threaded_children; /* # of live threaded child cgroups */
+ /* sequence number for cgroup.kill, serialized by css_set_lock. */
+ unsigned int kill_seq;
+
struct kernfs_node *kn; /* cgroup kernfs entry */
struct cgroup_file procs_file; /* handle for "cgroup.procs" */
struct cgroup_file events_file; /* handle for "cgroup.events" */
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index ec00e3f7af2b..ee2614adb785 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -212,7 +212,7 @@ const volatile void * __must_check_fn(const volatile void *val)
{ return val; }
#define no_free_ptr(p) \
- ((typeof(p)) __must_check_fn(__get_and_null(p, NULL)))
+ ((typeof(p)) __must_check_fn((__force const volatile void *)__get_and_null(p, NULL)))
#define return_ptr(p) return no_free_ptr(p)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index e94776496049..7bf0c521db63 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order)
return 2UL << order;
}
+static inline int current_is_kcompactd(void)
+{
+ return current->flags & PF_KCOMPACTD;
+}
+
#ifdef CONFIG_COMPACTION
extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order);
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b087de2f3e94..155385754824 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -110,7 +110,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
/* Unreachable code */
#ifdef CONFIG_OBJTOOL
/* Annotate a C jump table to allow objtool to follow the code flow */
-#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #")
+#define __annotate_jump_table __section(".data.rel.ro.c_jump_table")
#else /* !CONFIG_OBJTOOL */
#define __annotate_jump_table
#endif /* CONFIG_OBJTOOL */
@@ -191,6 +191,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__v; \
})
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
+#else /* __CHECKER__ */
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
+#endif /* __CHECKER__ */
+
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __is_array(a) (!__same_type((a), &(a)[0]))
+#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \
+ "must be array")
+
+#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1)
+#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \
+ "must be byte array")
+
+/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
+#define __must_be_cstr(p) \
+ __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)")
+
#endif /* __KERNEL__ */
/**
@@ -231,19 +250,6 @@ static inline void *offset_to_ptr(const int *off)
#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym))
-#ifdef __CHECKER__
-#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
-#else /* __CHECKER__ */
-#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
-#endif /* __CHECKER__ */
-
-/* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array")
-
-/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
-#define __must_be_cstr(p) \
- __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)")
-
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 0c3c4b16b469..5658a3bfe803 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -172,18 +172,12 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred)
static inline const struct cred *override_creds(const struct cred *override_cred)
{
- const struct cred *old = current->cred;
-
- rcu_assign_pointer(current->cred, override_cred);
- return old;
+ return rcu_replace_pointer(current->cred, override_cred, 1);
}
static inline const struct cred *revert_creds(const struct cred *revert_cred)
{
- const struct cred *override_cred = current->cred;
-
- rcu_assign_pointer(current->cred, revert_cred);
- return override_cred;
+ return rcu_replace_pointer(current->cred, revert_cred, 1);
}
/**
diff --git a/include/linux/damon.h b/include/linux/damon.h
index af525252b853..c9074d569596 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -470,6 +470,11 @@ struct damos {
unsigned long next_apply_sis;
/* informs if ongoing DAMOS walk for this scheme is finished */
bool walk_completed;
+ /*
+ * If the current region in the filtering stage is allowed by core
+ * layer-handled filters. If true, operations layer allows it, too.
+ */
+ bool core_filters_allowed;
/* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4afb60365675..45bff10d3773 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -203,34 +203,34 @@ struct dentry_operations {
#define DCACHE_NFSFS_RENAMED BIT(12)
/* this dentry has been "silly renamed" and has to be deleted on the last
* dput() */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14)
+#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13)
/* Parent inode is watched by some fsnotify listener */
-#define DCACHE_DENTRY_KILLED BIT(15)
+#define DCACHE_DENTRY_KILLED BIT(14)
-#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */
-#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */
-#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */
+#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */
#define DCACHE_MANAGED_DENTRY \
(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
-#define DCACHE_LRU_LIST BIT(19)
+#define DCACHE_LRU_LIST BIT(18)
-#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */
-#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */
-#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */
-#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */
-#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */
-#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */
-#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */
-#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */
+#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */
+#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */
+#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */
+#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */
+#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */
+#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */
+#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */
+#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */
-#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */
-#define DCACHE_OP_REAL BIT(26)
+#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */
+#define DCACHE_OP_REAL BIT(23)
-#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */
-#define DCACHE_DENTRY_CURSOR BIT(29)
-#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */
+#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */
+#define DCACHE_DENTRY_CURSOR BIT(25)
+#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */
extern seqlock_t rename_lock;
@@ -253,7 +253,6 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
const struct qstr *name);
-extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
extern struct dentry *d_find_any_alias(struct inode *inode);
extern struct dentry * d_obtain_alias(struct inode *);
extern struct dentry * d_obtain_root(struct inode *);
diff --git a/include/linux/device/faux.h b/include/linux/device/faux.h
new file mode 100644
index 000000000000..9f43c0e46aa4
--- /dev/null
+++ b/include/linux/device/faux.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (c) 2025 The Linux Foundation
+ *
+ * A "simple" faux bus that allows devices to be created and added
+ * automatically to it. This is to be used whenever you need to create a
+ * device that is not associated with any "real" system resources, and do
+ * not want to have to deal with a bus/driver binding logic. It is
+ * intended to be very simple, with only a create and a destroy function
+ * available.
+ */
+#ifndef _FAUX_DEVICE_H_
+#define _FAUX_DEVICE_H_
+
+#include <linux/container_of.h>
+#include <linux/device.h>
+
+/**
+ * struct faux_device - a "faux" device
+ * @dev: internal struct device of the object
+ *
+ * A simple faux device that can be created/destroyed. To be used when a
+ * driver only needs to have a device to "hang" something off. This can be
+ * used for downloading firmware or other basic tasks. Use this instead of
+ * a struct platform_device if the device has no resources assigned to
+ * it at all.
+ */
+struct faux_device {
+ struct device dev;
+};
+#define to_faux_device(x) container_of_const((x), struct faux_device, dev)
+
+/**
+ * struct faux_device_ops - a set of callbacks for a struct faux_device
+ * @probe: called when a faux device is probed by the driver core
+ * before the device is fully bound to the internal faux bus
+ * code. If probe succeeds, return 0, otherwise return a
+ * negative error number to stop the probe sequence from
+ * succeeding.
+ * @remove: called when a faux device is removed from the system
+ *
+ * Both @probe and @remove are optional, if not needed, set to NULL.
+ */
+struct faux_device_ops {
+ int (*probe)(struct faux_device *faux_dev);
+ void (*remove)(struct faux_device *faux_dev);
+};
+
+struct faux_device *faux_device_create(const char *name,
+ struct device *parent,
+ const struct faux_device_ops *faux_ops);
+struct faux_device *faux_device_create_with_groups(const char *name,
+ struct device *parent,
+ const struct faux_device_ops *faux_ops,
+ const struct attribute_group **groups);
+void faux_device_destroy(struct faux_device *faux_dev);
+
+static inline void *faux_device_get_drvdata(const struct faux_device *faux_dev)
+{
+ return dev_get_drvdata(&faux_dev->dev);
+}
+
+static inline void faux_device_set_drvdata(struct faux_device *faux_dev, void *data)
+{
+ dev_set_drvdata(&faux_dev->dev, data);
+}
+
+#endif /* _FAUX_DEVICE_H_ */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 053c57e61869..7d63d1d75f22 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -114,21 +114,22 @@ typedef struct {
#define EFI_MAX_MEMORY_TYPE 16
/* Attribute values: */
-#define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */
-#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */
-#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */
-#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */
-#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */
-#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */
-#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */
-#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */
-#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */
-#define EFI_MEMORY_MORE_RELIABLE \
- ((u64)0x0000000000010000ULL) /* higher reliability */
-#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */
-#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */
-#define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */
-#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */
+#define EFI_MEMORY_UC BIT_ULL(0) /* uncached */
+#define EFI_MEMORY_WC BIT_ULL(1) /* write-coalescing */
+#define EFI_MEMORY_WT BIT_ULL(2) /* write-through */
+#define EFI_MEMORY_WB BIT_ULL(3) /* write-back */
+#define EFI_MEMORY_UCE BIT_ULL(4) /* uncached, exported */
+#define EFI_MEMORY_WP BIT_ULL(12) /* write-protect */
+#define EFI_MEMORY_RP BIT_ULL(13) /* read-protect */
+#define EFI_MEMORY_XP BIT_ULL(14) /* execute-protect */
+#define EFI_MEMORY_NV BIT_ULL(15) /* non-volatile */
+#define EFI_MEMORY_MORE_RELIABLE BIT_ULL(16) /* higher reliability */
+#define EFI_MEMORY_RO BIT_ULL(17) /* read-only */
+#define EFI_MEMORY_SP BIT_ULL(18) /* soft reserved */
+#define EFI_MEMORY_CPU_CRYPTO BIT_ULL(19) /* supports encryption */
+#define EFI_MEMORY_HOT_PLUGGABLE BIT_ULL(20) /* supports unplugging at runtime */
+#define EFI_MEMORY_RUNTIME BIT_ULL(63) /* range requires runtime mapping */
+
#define EFI_MEMORY_DESCRIPTOR_VERSION 1
#define EFI_PAGE_SHIFT 12
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 0c0d00fcd131..ccb478eb174b 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t
/* Used to release the epoll bits inside the "struct file" */
void eventpoll_release_file(struct file *file);
+/* Copy ready events to userspace */
+int epoll_sendevents(struct file *file, struct epoll_event __user *events,
+ int maxevents);
+
/*
* This is called from inside fs/file_table.c:__fput() to unlink files
* from the eventpoll interface. We need to have this facility to cleanup
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 78f660ebc318..3c817dc6292e 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -25,7 +25,7 @@
#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
-#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
+#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
@@ -38,7 +38,8 @@
FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \
- FAN_UNLIMITED_MARKS)
+ FAN_UNLIMITED_MARKS | \
+ FAN_REPORT_MNT)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@@ -58,7 +59,7 @@
#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
- FAN_MARK_FILESYSTEM)
+ FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
FAN_MARK_FLUSH)
@@ -109,10 +110,13 @@
/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
+#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
+
/* Events that user can request to be notified on */
#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
FANOTIFY_INODE_EVENTS | \
- FANOTIFY_ERROR_EVENTS)
+ FANOTIFY_ERROR_EVENTS | \
+ FANOTIFY_MOUNT_EVENTS)
/* Extra flags that may be reported with event or control handling of events */
#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f47fbe16b101..1a0e23a5d02d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
+#include <linux/vfsdebug.h>
#include <linux/linkage.h>
#include <linux/wait_bit.h>
#include <linux/kdev_t.h>
@@ -222,7 +223,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_FSNOTIFY_HSM(mode) 0
#endif
-
/*
* Attribute flags. These should be or-ed together to figure out what
* has been changed!
@@ -791,6 +791,8 @@ struct inode {
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
+ VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
+ VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode);
inode->i_link = link;
inode->i_linklen = linklen;
inode->i_opflags |= IOP_CACHED_LINK;
@@ -1055,7 +1057,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
/**
* struct file - Represents a file
- * @f_ref: reference count
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
* @f_mode: FMODE_* flags often used in hotpaths
* @f_op: file operations
@@ -1065,12 +1066,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_flags: file flags
* @f_iocb_flags: iocb flags
* @f_cred: stashed credentials of creator/opener
+ * @f_owner: file owner
* @f_path: path of the file
* @f_pos_lock: lock protecting file position
* @f_pipe: specific to pipes
* @f_pos: file position
* @f_security: LSM security context of this file
- * @f_owner: file owner
* @f_wb_err: writeback error
* @f_sb_err: per sb writeback errors
* @f_ep: link of all epoll hooks for this file
@@ -1078,9 +1079,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_llist: work queue entrypoint
* @f_ra: file's readahead state
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
+ * @f_ref: reference count
*/
struct file {
- file_ref_t f_ref;
spinlock_t f_lock;
fmode_t f_mode;
const struct file_operations *f_op;
@@ -1090,6 +1091,7 @@ struct file {
unsigned int f_flags;
unsigned int f_iocb_flags;
const struct cred *f_cred;
+ struct fown_struct *f_owner;
/* --- cacheline 1 boundary (64 bytes) --- */
struct path f_path;
union {
@@ -1103,7 +1105,6 @@ struct file {
void *f_security;
#endif
/* --- cacheline 2 boundary (128 bytes) --- */
- struct fown_struct *f_owner;
errseq_t f_wb_err;
errseq_t f_sb_err;
#ifdef CONFIG_EPOLL
@@ -1115,6 +1116,7 @@ struct file {
struct file_ra_state f_ra;
freeptr_t f_freeptr;
};
+ file_ref_t f_ref;
/* --- cacheline 3 boundary (192 bytes) --- */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
@@ -1969,8 +1971,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap,
*/
int vfs_create(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool);
-int vfs_mkdir(struct mnt_idmap *, struct inode *,
- struct dentry *, umode_t);
+struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
+ struct dentry *, umode_t);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
int vfs_symlink(struct mnt_idmap *, struct inode *,
@@ -2027,7 +2029,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
int vfs_fchmod(struct file *file, umode_t mode);
int vfs_utimes(const struct path *path, struct timespec64 *times);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
@@ -2199,8 +2201,8 @@ struct inode_operations {
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
const char *);
- int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
- umode_t);
+ struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *,
+ struct dentry *, umode_t);
int (*rmdir) (struct inode *,struct dentry *);
int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t,dev_t);
@@ -2642,9 +2644,6 @@ static inline bool is_mgtime(const struct inode *inode)
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_single(struct file_system_type *fs_type,
- int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2783,13 +2782,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
return mnt_idmap(mnt) != &nop_mnt_idmap;
}
-extern long vfs_truncate(const struct path *, loff_t);
+int vfs_truncate(const struct path *, loff_t);
int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
unsigned int time_attrs, struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
+int do_sys_open(int dfd, const char __user *filename, int flags,
+ umode_t mode);
extern struct file *file_open_name(struct filename *, int, umode_t);
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(const struct path *,
@@ -2840,7 +2839,10 @@ extern int filp_close(struct file *, fl_owner_t id);
extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
-extern struct filename *getname(const char __user *);
+static inline struct filename *getname(const char __user *name)
+{
+ return getname_flags(name, 0);
+}
extern struct filename *getname_kernel(const char *);
extern struct filename *__getname_maybe_null(const char __user *);
static inline struct filename *getname_maybe_null(const char __user *name, int flags)
@@ -2853,6 +2855,13 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f
return __getname_maybe_null(name);
}
extern void putname(struct filename *name);
+DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T))
+
+static inline struct filename *refname(struct filename *name)
+{
+ atomic_inc(&name->refcnt);
+ return name;
+}
extern int finish_open(struct file *file, struct dentry *dentry,
int (*open)(struct inode *, struct file *));
@@ -2964,8 +2973,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
} else if (iocb->ki_flags & IOCB_DONTCACHE) {
struct address_space *mapping = iocb->ki_filp->f_mapping;
- filemap_fdatawrite_range_kick(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
+ filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count,
+ iocb->ki_pos - 1);
}
return count;
@@ -3141,6 +3150,12 @@ static inline void exe_file_allow_write_access(struct file *exe_file)
allow_write_access(exe_file);
}
+static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode)
+{
+ file->f_mode &= ~FMODE_FSNOTIFY_MASK;
+ file->f_mode |= mode;
+}
+
static inline bool inode_is_open_for_write(const struct inode *inode)
{
return atomic_read(&inode->i_writecount) > 0;
@@ -3280,7 +3295,11 @@ static inline void __iget(struct inode *inode)
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode_pseudo(struct super_block *sb);
+struct inode *alloc_inode(struct super_block *sb);
+static inline struct inode *new_inode_pseudo(struct super_block *sb)
+{
+ return alloc_inode(sb);
+}
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
@@ -3435,6 +3454,8 @@ extern const struct file_operations generic_ro_fops;
extern int readlink_copy(char __user *, int, const char *, int);
extern int page_readlink(struct dentry *, char __user *, int);
+extern const char *page_get_link_raw(struct dentry *, struct inode *,
+ struct delayed_call *);
extern const char *page_get_link(struct dentry *, struct inode *,
struct delayed_call *);
extern void page_put_link(void *);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 4b4bfef6f053..a19e4bd32e4d 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -144,8 +144,6 @@ extern void put_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param_source(struct fs_context *fc,
struct fs_parameter *param);
extern void fc_drop_locked(struct fs_context *fc);
-int reconfigure_single(struct super_block *s,
- int flags, void *data);
extern int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 1a9ef8f6784d..454d8e466958 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -129,7 +129,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
-void file_set_fsnotify_mode(struct file *file);
+void file_set_fsnotify_mode_from_watchers(struct file *file);
/*
* fsnotify_file_area_perm - permission hook before access to file range
@@ -171,6 +171,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
}
/*
+ * fsnotify_mmap_perm - permission hook before mmap of file range
+ */
+static inline int fsnotify_mmap_perm(struct file *file, int prot,
+ const loff_t off, size_t len)
+{
+ /*
+ * mmap() generates only pre-content events.
+ */
+ if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode)))
+ return 0;
+
+ return fsnotify_pre_content(&file->f_path, &off, len);
+}
+
+/*
* fsnotify_truncate_perm - permission hook before file truncate
*/
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
@@ -213,7 +228,7 @@ static inline int fsnotify_open_perm(struct file *file)
}
#else
-static inline void file_set_fsnotify_mode(struct file *file)
+static inline void file_set_fsnotify_mode_from_watchers(struct file *file)
{
}
@@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
return 0;
}
+static inline int fsnotify_mmap_perm(struct file *file, int prot,
+ const loff_t off, size_t len)
+{
+ return 0;
+}
+
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
{
return 0;
@@ -299,6 +320,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
__fsnotify_vfsmount_delete(mnt);
}
+static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+ __fsnotify_mntns_delete(mntns);
+}
+
/*
* fsnotify_inoderemove - an inode is going away
*/
@@ -507,4 +533,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
NULL, NULL, NULL, 0);
}
+static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_DETACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
+}
+
#endif /* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0d24a21a8e60..6cd8d1d28b8b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -59,6 +59,10 @@
#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FS_MNT_DETACH 0x02000000 /* Mount was detached */
+#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
+
/*
* Set on inode mark that cares about things that happen to its children.
* Always set for dnotify and inotify.
@@ -80,6 +84,9 @@
*/
#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
+/* Mount namespace events */
+#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH)
+
/* Content events can be used to inspect file content */
#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \
FS_ACCESS_PERM)
@@ -108,6 +115,7 @@
/* Events that can be reported to backends */
#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+ FSNOTIFY_MNT_EVENTS | \
FS_EVENTS_POSS_ON_CHILD | \
FS_DELETE_SELF | FS_MOVE_SELF | \
FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
@@ -298,6 +306,7 @@ enum fsnotify_data_type {
FSNOTIFY_EVENT_PATH,
FSNOTIFY_EVENT_INODE,
FSNOTIFY_EVENT_DENTRY,
+ FSNOTIFY_EVENT_MNT,
FSNOTIFY_EVENT_ERROR,
};
@@ -318,6 +327,11 @@ static inline const struct path *file_range_path(const struct file_range *range)
return range->path;
}
+struct fsnotify_mnt {
+ const struct mnt_namespace *ns;
+ u64 mnt_id;
+};
+
static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
{
switch (data_type) {
@@ -383,6 +397,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
}
}
+static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data,
+ int data_type)
+{
+ switch (data_type) {
+ case FSNOTIFY_EVENT_MNT:
+ return data;
+ default:
+ return NULL;
+ }
+}
+
+static inline u64 fsnotify_data_mnt_id(const void *data, int data_type)
+{
+ const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+
+ return mnt_data ? mnt_data->mnt_id : 0;
+}
+
static inline struct fs_error_report *fsnotify_data_error_report(
const void *data,
int data_type)
@@ -420,6 +452,7 @@ enum fsnotify_iter_type {
FSNOTIFY_ITER_TYPE_SB,
FSNOTIFY_ITER_TYPE_PARENT,
FSNOTIFY_ITER_TYPE_INODE2,
+ FSNOTIFY_ITER_TYPE_MNTNS,
FSNOTIFY_ITER_TYPE_COUNT
};
@@ -429,6 +462,7 @@ enum fsnotify_obj_type {
FSNOTIFY_OBJ_TYPE_INODE,
FSNOTIFY_OBJ_TYPE_VFSMOUNT,
FSNOTIFY_OBJ_TYPE_SB,
+ FSNOTIFY_OBJ_TYPE_MNTNS,
FSNOTIFY_OBJ_TYPE_COUNT,
FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
};
@@ -613,8 +647,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
+extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns);
extern void fsnotify_sb_free(struct super_block *sb);
extern u32 fsnotify_get_cookie(void);
+extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt);
static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
{
@@ -928,6 +964,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
static inline void fsnotify_sb_delete(struct super_block *sb)
{}
+static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{}
+
static inline void fsnotify_sb_free(struct super_block *sb)
{}
@@ -942,6 +981,9 @@ static inline u32 fsnotify_get_cookie(void)
static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}
+static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{}
+
#endif /* CONFIG_FSNOTIFY */
#endif /* __KERNEL __ */
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index c3b4b7ed7c16..84a5045f80f3 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -125,6 +125,7 @@ struct hrtimer_cpu_base {
ktime_t softirq_expires_next;
struct hrtimer *softirq_next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+ call_single_data_t csd;
} ____cacheline_aligned;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..76a75ec03dd6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1004,7 +1005,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
}
#endif
@@ -1066,6 +1069,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index c31fd1dba3bd..2b2af24d2a43 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -244,6 +244,7 @@ enum i2c_driver_flags {
* @id_table: List of I2C devices supported by this driver
* @detect: Callback for device detection
* @address_list: The I2C addresses to probe (for detect)
+ * @clients: List of detected clients we created (for i2c-core use only)
* @flags: A bitmask of flags defined in &enum i2c_driver_flags
*
* The driver.owner field should be set to the module owner of this driver.
@@ -298,6 +299,7 @@ struct i2c_driver {
/* Device detection callback for automatic device creation */
int (*detect)(struct i2c_client *client, struct i2c_board_info *info);
const unsigned short *address_list;
+ struct list_head clients;
u32 flags;
};
@@ -313,6 +315,8 @@ struct i2c_driver {
* @dev: Driver model device node for the slave.
* @init_irq: IRQ that was set at initialization
* @irq: indicates the IRQ generated by this device (if any)
+ * @detected: member of an i2c_driver.clients list or i2c-core's
+ * userspace_devices list
* @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter
* calls it to pass on slave events to the slave driver.
* @devres_group_id: id of the devres group that will be created for resources
@@ -332,8 +336,6 @@ struct i2c_client {
#define I2C_CLIENT_SLAVE 0x20 /* we are the slave */
#define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */
#define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */
-#define I2C_CLIENT_AUTO 0x100 /* client was auto-detected */
-#define I2C_CLIENT_USER 0x200 /* client was userspace-created */
#define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */
/* Must match I2C_M_STOP|IGNORE_NAK */
@@ -345,6 +347,7 @@ struct i2c_client {
struct device dev; /* the device structure */
int init_irq; /* irq set at initialization */
int irq; /* irq issued by device */
+ struct list_head detected;
#if IS_ENABLED(CONFIG_I2C_SLAVE)
i2c_slave_cb_t slave_cb; /* callback for slave mode */
#endif
@@ -751,6 +754,9 @@ struct i2c_adapter {
char name[48];
struct completion dev_released;
+ struct mutex userspace_clients_lock;
+ struct list_head userspace_clients;
+
struct i2c_bus_recovery_info *bus_recovery_info;
const struct i2c_adapter_quirks *quirks;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 75bf54e76f3b..02fe001feebb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -56,6 +56,13 @@ struct vm_fault;
*
* IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
* never be merged with the mapping before it.
+ *
+ * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block
+ * assigned to it yet and the file system will do that in the bio submission
+ * handler, splitting the I/O as needed.
+ *
+ * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic
+ * bio, i.e. set REQ_ATOMIC.
*/
#define IOMAP_F_NEW (1U << 0)
#define IOMAP_F_DIRTY (1U << 1)
@@ -68,6 +75,8 @@ struct vm_fault;
#endif /* CONFIG_BUFFER_HEAD */
#define IOMAP_F_XATTR (1U << 5)
#define IOMAP_F_BOUNDARY (1U << 6)
+#define IOMAP_F_ANON_WRITE (1U << 7)
+#define IOMAP_F_ATOMIC_BIO (1U << 8)
/*
* Flags set by the core iomap code during operations:
@@ -111,6 +120,8 @@ struct iomap {
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
{
+ if (iomap->flags & IOMAP_F_ANON_WRITE)
+ return U64_MAX; /* invalid */
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
}
@@ -182,7 +193,8 @@ struct iomap_folio_ops {
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
-#define IOMAP_ATOMIC (1 << 9)
+#define IOMAP_ATOMIC (1 << 9) /* torn-write protection */
+#define IOMAP_DONTCACHE (1 << 10)
struct iomap_ops {
/*
@@ -211,8 +223,10 @@ struct iomap_ops {
* calls to iomap_iter(). Treat as read-only in the body.
* @len: The remaining length of the file segment we're operating on.
* It is updated at the same time as @pos.
- * @processed: The number of bytes processed by the body in the most recent
- * iteration, or a negative errno. 0 causes the iteration to stop.
+ * @iter_start_pos: The original start pos for the current iomap. Used for
+ * incremental iter advance.
+ * @status: Status of the most recent iteration. Zero on success or a negative
+ * errno on error.
* @flags: Zero or more of the iomap_begin flags above.
* @iomap: Map describing the I/O iteration
* @srcmap: Source map for COW operations
@@ -221,7 +235,8 @@ struct iomap_iter {
struct inode *inode;
loff_t pos;
u64 len;
- s64 processed;
+ loff_t iter_start_pos;
+ int status;
unsigned flags;
struct iomap iomap;
struct iomap srcmap;
@@ -229,20 +244,46 @@ struct iomap_iter {
};
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
+int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
/**
- * iomap_length - length of the current iomap iteration
+ * iomap_length_trim - trimmed length of the current iomap iteration
* @iter: iteration structure
+ * @pos: File position to trim from.
+ * @len: Length of the mapping to trim to.
*
- * Returns the length that the operation applies to for the current iteration.
+ * Returns a trimmed length that the operation applies to for the current
+ * iteration.
*/
-static inline u64 iomap_length(const struct iomap_iter *iter)
+static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos,
+ u64 len)
{
u64 end = iter->iomap.offset + iter->iomap.length;
if (iter->srcmap.type != IOMAP_HOLE)
end = min(end, iter->srcmap.offset + iter->srcmap.length);
- return min(iter->len, end - iter->pos);
+ return min(len, end - pos);
+}
+
+/**
+ * iomap_length - length of the current iomap iteration
+ * @iter: iteration structure
+ *
+ * Returns the length that the operation applies to for the current iteration.
+ */
+static inline u64 iomap_length(const struct iomap_iter *iter)
+{
+ return iomap_length_trim(iter, iter->pos, iter->len);
+}
+
+/**
+ * iomap_iter_advance_full - advance by the full length of current map
+ */
+static inline int iomap_iter_advance_full(struct iomap_iter *iter)
+{
+ u64 length = iomap_length(iter);
+
+ return iomap_iter_advance(iter, &length);
}
/**
@@ -306,12 +347,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
- bool *did_zero, const struct iomap_ops *ops);
+ bool *did_zero, const struct iomap_ops *ops, void *private);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
- const struct iomap_ops *ops);
-vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
- const struct iomap_ops *ops);
-
+ const struct iomap_ops *ops, void *private);
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops,
+ void *private);
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
struct iomap *iomap);
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
@@ -328,16 +368,42 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops);
/*
+ * Flags for iomap_ioend->io_flags.
+ */
+/* shared COW extent */
+#define IOMAP_IOEND_SHARED (1U << 0)
+/* unwritten extent */
+#define IOMAP_IOEND_UNWRITTEN (1U << 1)
+/* don't merge into previous ioend */
+#define IOMAP_IOEND_BOUNDARY (1U << 2)
+/* is direct I/O */
+#define IOMAP_IOEND_DIRECT (1U << 3)
+
+/*
+ * Flags that if set on either ioend prevent the merge of two ioends.
+ * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
+ */
+#define IOMAP_IOEND_NOMERGE_FLAGS \
+ (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
+
+/*
* Structure for writeback I/O completions.
+ *
+ * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io
+ * for direct I/O) can split a bio generated by iomap. In that case the parent
+ * ioend it was split from is recorded in ioend->io_parent.
*/
struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */
- u16 io_type;
- u16 io_flags; /* IOMAP_F_* */
+ u16 io_flags; /* IOMAP_IOEND_* */
struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of data within eof */
+ size_t io_size; /* size of the extent */
+ atomic_t io_remaining; /* completetion defer count */
+ int io_error; /* stashed away status */
+ struct iomap_ioend *io_parent; /* parent for completions */
loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */
+ void *io_private; /* file system private data */
struct bio io_bio; /* MUST BE LAST! */
};
@@ -362,12 +428,14 @@ struct iomap_writeback_ops {
loff_t offset, unsigned len);
/*
- * Optional, allows the file systems to perform actions just before
- * submitting the bio and/or override the bio end_io handler for complex
- * operations like copy on write extent manipulation or unwritten extent
- * conversions.
+ * Optional, allows the file systems to hook into bio submission,
+ * including overriding the bi_end_io handler.
+ *
+ * Returns 0 if the bio was successfully submitted, or a negative
+ * error code if status was non-zero or another error happened and
+ * the bio could not be submitted.
*/
- int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
+ int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
/*
* Optional, allows the file system to discard state on a page where
@@ -383,6 +451,10 @@ struct iomap_writepage_ctx {
u32 nr_folios; /* folios added to the ioend */
};
+struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio,
+ loff_t file_offset, u16 ioend_flags);
+struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
+ unsigned int max_len, bool is_append);
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
struct list_head *more_ioends);
@@ -454,4 +526,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
# define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO)
#endif /* CONFIG_SWAP */
+extern struct bio_set iomap_ioend_bioset;
+
#endif /* LINUX_IOMAP_H */
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index ed945f42e064..0ea8c9887429 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m)
*
* Return: jiffies value
*/
-#define secs_to_jiffies(_secs) ((_secs) * HZ)
+#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ)
extern unsigned long __usecs_to_jiffies(const unsigned int u);
#if !(USEC_PER_SEC % HZ)
diff --git a/include/linux/key.h b/include/linux/key.h
index 074dca3222b9..ba05de8579ec 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -236,6 +236,7 @@ struct key {
#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */
#define KEY_FLAG_KEEP 8 /* set if key should not be removed */
#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */
+#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */
/* the key type and key description string
* - the desc is used to match a key against search criteria
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3cb9a32a6330..f34f4cfaa513 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1615,7 +1615,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
-int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
void kvm_arch_create_vm_debugfs(struct kvm *kvm);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c1c57f814b98..e5695998acb0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -88,6 +88,7 @@ enum ata_quirks {
__ATA_QUIRK_MAX_SEC_1024, /* Limit max sects to 1024 */
__ATA_QUIRK_MAX_TRIM_128M, /* Limit max trim size to 128M */
__ATA_QUIRK_NO_NCQ_ON_ATI, /* Disable NCQ on ATI chipset */
+ __ATA_QUIRK_NO_LPM_ON_ATI, /* Disable LPM on ATI chipset */
__ATA_QUIRK_NO_ID_DEV_LOG, /* Identify device log missing */
__ATA_QUIRK_NO_LOG_DIR, /* Do not read log directory */
__ATA_QUIRK_NO_FUA, /* Do not use FUA */
@@ -432,6 +433,7 @@ enum {
ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024),
ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M),
ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI),
+ ATA_QUIRK_NO_LPM_ON_ATI = (1U << __ATA_QUIRK_NO_LPM_ON_ATI),
ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG),
ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR),
ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA),
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index c39f119659ba..676721ee878d 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -37,12 +37,13 @@ struct lockref {
/**
* lockref_init - Initialize a lockref
* @lockref: pointer to lockref structure
- * @count: initial count
+ *
+ * Initializes @lockref->count to 1.
*/
-static inline void lockref_init(struct lockref *lockref, unsigned int count)
+static inline void lockref_init(struct lockref *lockref)
{
spin_lock_init(&lockref->lock);
- lockref->count = count;
+ lockref->count = 1;
}
void lockref_get(struct lockref *lockref);
diff --git a/include/linux/log2.h b/include/linux/log2.h
index 9f30d087a128..1366cb688a6d 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -41,7 +41,7 @@ int __ilog2_u64(u64 n)
* *not* considered a power of two.
* Return: true if @n is a power of 2, otherwise false.
*/
-static inline __attribute__((const))
+static __always_inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
return (n != 0 && ((n & (n - 1)) == 0));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b1068ddcbb7..2edb8d14d165 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1458,7 +1458,10 @@ static inline void folio_get(struct folio *folio)
static inline void get_page(struct page *page)
{
- folio_get(page_folio(page));
+ struct folio *folio = page_folio(page);
+ if (WARN_ON_ONCE(folio_test_slab(folio)))
+ return;
+ folio_get(folio);
}
static inline __must_check bool try_get_page(struct page *page)
@@ -1552,6 +1555,9 @@ static inline void put_page(struct page *page)
{
struct folio *folio = page_folio(page);
+ if (folio_test_slab(folio))
+ return;
+
/*
* For some devmap managed pages we need to catch refcount transition
* from 2 to 1:
@@ -2549,7 +2555,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
struct task_struct *task, bool bypass_rlim);
struct kvec;
-struct page *get_dump_page(unsigned long addr);
+struct page *get_dump_page(unsigned long addr, int *locked);
bool folio_mark_dirty(struct folio *folio);
bool folio_mark_dirty_lock(struct folio *folio);
@@ -3420,7 +3426,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6b27db7f9496..0234f14f2aa6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -875,10 +875,11 @@ struct mm_struct {
*/
unsigned int nr_cpus_allowed;
/**
- * @max_nr_cid: Maximum number of concurrency IDs allocated.
+ * @max_nr_cid: Maximum number of allowed concurrency
+ * IDs allocated.
*
- * Track the highest number of concurrency IDs allocated for the
- * mm.
+ * Track the highest number of allowed concurrency IDs
+ * allocated for the mm.
*/
atomic_t max_nr_cid;
/**
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index b1b219bc3422..e71a6070a8f8 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));
+static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap)
+{
+ return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap;
+}
+
#ifdef CONFIG_MULTIUSER
static inline uid_t __vfsuid_val(vfsuid_t uid)
{
diff --git a/include/linux/module.h b/include/linux/module.h
index 23792d5d7b74..30e5b19bafa9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -306,7 +306,10 @@ extern int modules_disabled; /* for sysctl */
/* Get/put a kernel symbol (calls must be symmetric) */
void *__symbol_get(const char *symbol);
void *__symbol_get_gpl(const char *symbol);
-#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x))))
+#define symbol_get(x) ({ \
+ static const char __notrim[] \
+ __used __section(".no_trim_symbol") = __stringify(x); \
+ (typeof(&x))(__symbol_get(__stringify(x))); })
/* modules using other modules: kdb wants to see this. */
struct module_use {
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8ec8fed3bce8..e3042176cdf4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -18,35 +18,36 @@ enum { MAX_NESTED_LINKS = 8 };
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
/* pathwalk mode */
-#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */
-#define LOOKUP_DIRECTORY 0x0002 /* require a directory */
-#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */
-#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */
-#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */
-#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */
-
-#define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */
-#define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */
+#define LOOKUP_FOLLOW BIT(0) /* follow links at the end */
+#define LOOKUP_DIRECTORY BIT(1) /* require a directory */
+#define LOOKUP_AUTOMOUNT BIT(2) /* force terminal automount */
+#define LOOKUP_EMPTY BIT(3) /* accept empty path [user_... only] */
+#define LOOKUP_LINKAT_EMPTY BIT(4) /* Linkat request with empty path. */
+#define LOOKUP_DOWN BIT(5) /* follow mounts in the starting point */
+#define LOOKUP_MOUNTPOINT BIT(6) /* follow mounts in the end */
+#define LOOKUP_REVAL BIT(7) /* tell ->d_revalidate() to trust no cache */
+#define LOOKUP_RCU BIT(8) /* RCU pathwalk mode; semi-internal */
+#define LOOKUP_CACHED BIT(9) /* Only do cached lookup */
+#define LOOKUP_PARENT BIT(10) /* Looking up final parent in path */
+/* 5 spare bits for pathwalk */
/* These tell filesystem methods that we are dealing with the final component... */
-#define LOOKUP_OPEN 0x0100 /* ... in open */
-#define LOOKUP_CREATE 0x0200 /* ... in object creation */
-#define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */
-#define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */
+#define LOOKUP_OPEN BIT(16) /* ... in open */
+#define LOOKUP_CREATE BIT(17) /* ... in object creation */
+#define LOOKUP_EXCL BIT(18) /* ... in target must not exist */
+#define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */
-/* internal use only */
-#define LOOKUP_PARENT 0x0010
+/* 4 spare bits for intent */
/* Scoping flags for lookup. */
-#define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */
-#define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */
-#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */
-#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
-#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
-#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */
-#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */
+#define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */
+#define LOOKUP_NO_MAGICLINKS BIT(25) /* No nd_jump_link() crossing. */
+#define LOOKUP_NO_XDEV BIT(26) /* No mountpoint crossing. */
+#define LOOKUP_BENEATH BIT(27) /* No escaping from starting point. */
+#define LOOKUP_IN_ROOT BIT(28) /* Treat dirfd as fs root. */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)
+/* 3 spare bits for scoping */
extern int path_pts(struct path *path);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2a59034a5fa2..ab550a89b9bf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2664,6 +2664,12 @@ struct net *dev_net(const struct net_device *dev)
}
static inline
+struct net *dev_net_rcu(const struct net_device *dev)
+{
+ return read_pnet_rcu(&dev->nd_net);
+}
+
+static inline
void dev_net_set(struct net_device *dev, struct net *net)
{
write_pnet(&dev->nd_net, net);
@@ -2904,9 +2910,9 @@ struct pcpu_sw_netstats {
struct pcpu_dstats {
u64_stats_t rx_packets;
u64_stats_t rx_bytes;
- u64_stats_t rx_drops;
u64_stats_t tx_packets;
u64_stats_t tx_bytes;
+ u64_stats_t rx_drops;
u64_stats_t tx_drops;
struct u64_stats_sync syncp;
} __aligned(8 * sizeof(u64));
@@ -3269,6 +3275,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
}
int netdev_boot_setup_check(struct net_device *dev);
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
+ const char *hwaddr);
struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
const char *hwaddr);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -4109,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 071d05d81d38..c86a11cfc4a3 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -278,7 +278,7 @@ struct netfs_io_request {
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
-#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
+#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 71fbebfa43c7..9ac83ca88326 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -47,6 +47,7 @@ struct nfs4_acl {
struct nfs4_label {
uint32_t lfs;
uint32_t pi;
+ u32 lsmid;
u32 len;
char *label;
};
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9155a6ffc370..d66c61cbbd1d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1802,7 +1802,7 @@ struct nfs_rpc_ops {
int (*link) (struct inode *, struct inode *, const struct qstr *);
int (*symlink) (struct inode *, struct dentry *, struct folio *,
unsigned int, struct iattr *);
- int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
+ struct dentry *(*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index e07e8978d691..e435250fcb4d 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -13,6 +13,8 @@
#define NVME_TCP_ADMIN_CCSZ SZ_8K
#define NVME_TCP_DIGEST_LENGTH 4
#define NVME_TCP_MIN_MAXH2CDATA 4096
+#define NVME_TCP_MIN_C2HTERM_PLEN 24
+#define NVME_TCP_MAX_C2HTERM_PLEN 152
enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index fe3b60818fdc..2dc05b1c3283 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -199,28 +199,54 @@ enum {
#define NVME_NVM_IOSQES 6
#define NVME_NVM_IOCQES 4
+/*
+ * Controller Configuration (CC) register (Offset 14h)
+ */
enum {
+ /* Enable (EN): bit 0 */
NVME_CC_ENABLE = 1 << 0,
NVME_CC_EN_SHIFT = 0,
+
+ /* Bits 03:01 are reserved (NVMe Base Specification rev 2.1) */
+
+ /* I/O Command Set Selected (CSS): bits 06:04 */
NVME_CC_CSS_SHIFT = 4,
- NVME_CC_MPS_SHIFT = 7,
- NVME_CC_AMS_SHIFT = 11,
- NVME_CC_SHN_SHIFT = 14,
- NVME_CC_IOSQES_SHIFT = 16,
- NVME_CC_IOCQES_SHIFT = 20,
+ NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT,
- NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT,
+
+ /* Memory Page Size (MPS): bits 10:07 */
+ NVME_CC_MPS_SHIFT = 7,
+ NVME_CC_MPS_MASK = 0xf << NVME_CC_MPS_SHIFT,
+
+ /* Arbitration Mechanism Selected (AMS): bits 13:11 */
+ NVME_CC_AMS_SHIFT = 11,
+ NVME_CC_AMS_MASK = 7 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
+
+ /* Shutdown Notification (SHN): bits 15:14 */
+ NVME_CC_SHN_SHIFT = 14,
+ NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
- NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
+
+ /* I/O Submission Queue Entry Size (IOSQES): bits 19:16 */
+ NVME_CC_IOSQES_SHIFT = 16,
+ NVME_CC_IOSQES_MASK = 0xf << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
+
+ /* I/O Completion Queue Entry Size (IOCQES): bits 23:20 */
+ NVME_CC_IOCQES_SHIFT = 20,
+ NVME_CC_IOCQES_MASK = 0xf << NVME_CC_IOCQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
+
+ /* Controller Ready Independent of Media Enable (CRIME): bit 24 */
NVME_CC_CRIME = 1 << 24,
+
+ /* Bits 25:31 are reserved (NVMe Base Specification rev 2.1) */
};
enum {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 47bfc6b1b632..f348e7005306 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1044,21 +1044,23 @@ static inline pgoff_t page_pgoff(const struct folio *folio,
return folio->index + folio_page_idx(folio, page);
}
-/*
- * Return byte-offset into filesystem object for page.
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
*/
-static inline loff_t page_offset(struct page *page)
+static inline loff_t folio_pos(const struct folio *folio)
{
- return ((loff_t)page->index) << PAGE_SHIFT;
+ return ((loff_t)folio->index) * PAGE_SIZE;
}
-/**
- * folio_pos - Returns the byte position of this folio in its file.
- * @folio: The folio.
+/*
+ * Return byte-offset into filesystem object for page.
*/
-static inline loff_t folio_pos(struct folio *folio)
+static inline loff_t page_offset(struct page *page)
{
- return page_offset(&folio->page);
+ struct folio *folio = page_folio(page);
+
+ return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE;
}
/*
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index de5deb1a0118..1a2594a38199 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3134,6 +3134,7 @@
#define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828
#define PCI_DEVICE_ID_INTEL_S21152BB 0xb152
#define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7
+#define PCI_DEVICE_ID_INTEL_HDA_PTL_H 0xe328
#define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428
#define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8
#define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 7c830d0dec9a..05e6f8f4a026 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -6,6 +6,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
void pidfs_add_pid(struct pid *pid);
void pidfs_remove_pid(struct pid *pid);
+void pidfs_exit(struct task_struct *tsk);
extern const struct dentry_operations pidfs_dentry_operations;
#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..9d42d473d201 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -31,6 +31,33 @@ struct pipe_buffer {
unsigned long private;
};
+/*
+ * Really only alpha needs 32-bit fields, but
+ * might as well do it for 64-bit architectures
+ * since that's what we've historically done,
+ * and it makes 'head_tail' always be a simple
+ * 'unsigned long'.
+ */
+#ifdef CONFIG_64BIT
+typedef unsigned int pipe_index_t;
+#else
+typedef unsigned short pipe_index_t;
+#endif
+
+/*
+ * We have to declare this outside 'struct pipe_inode_info',
+ * but then we can't use 'union pipe_index' for an anonymous
+ * union, so we end up having to duplicate this declaration
+ * below. Annoying.
+ */
+union pipe_index {
+ unsigned long head_tail;
+ struct {
+ pipe_index_t head;
+ pipe_index_t tail;
+ };
+};
+
/**
* struct pipe_inode_info - a linux kernel pipe
* @mutex: mutex protecting the whole thing
@@ -38,6 +65,7 @@ struct pipe_buffer {
* @wr_wait: writer wait point in case of full pipe
* @head: The point of buffer production
* @tail: The point of buffer consumption
+ * @head_tail: unsigned long union of @head and @tail
* @note_loss: The next read() should insert a data-lost message
* @max_usage: The maximum number of slots that may be used in the ring
* @ring_size: total number of buffers (should be a power of 2)
@@ -58,8 +86,16 @@ struct pipe_buffer {
struct pipe_inode_info {
struct mutex mutex;
wait_queue_head_t rd_wait, wr_wait;
- unsigned int head;
- unsigned int tail;
+
+ /* This has to match the 'union pipe_index' above */
+ union {
+ unsigned long head_tail;
+ struct {
+ pipe_index_t head;
+ pipe_index_t tail;
+ };
+ };
+
unsigned int max_usage;
unsigned int ring_size;
unsigned int nr_accounted;
@@ -72,7 +108,7 @@ struct pipe_inode_info {
#ifdef CONFIG_WATCH_QUEUE
bool note_loss;
#endif
- struct page *tmp_page;
+ struct page *tmp_page[2];
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
@@ -141,23 +177,23 @@ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe)
}
/**
- * pipe_empty - Return true if the pipe is empty
+ * pipe_occupancy - Return number of slots used in the pipe
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
*/
-static inline bool pipe_empty(unsigned int head, unsigned int tail)
+static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail)
{
- return head == tail;
+ return (pipe_index_t)(head - tail);
}
/**
- * pipe_occupancy - Return number of slots used in the pipe
+ * pipe_empty - Return true if the pipe is empty
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
*/
-static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail)
+static inline bool pipe_empty(unsigned int head, unsigned int tail)
{
- return head - tail;
+ return !pipe_occupancy(head, tail);
}
/**
@@ -173,6 +209,33 @@ static inline bool pipe_full(unsigned int head, unsigned int tail,
}
/**
+ * pipe_is_full - Return true if the pipe is full
+ * @pipe: the pipe
+ */
+static inline bool pipe_is_full(const struct pipe_inode_info *pipe)
+{
+ return pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+}
+
+/**
+ * pipe_is_empty - Return true if the pipe is empty
+ * @pipe: the pipe
+ */
+static inline bool pipe_is_empty(const struct pipe_inode_info *pipe)
+{
+ return pipe_empty(pipe->head, pipe->tail);
+}
+
+/**
+ * pipe_buf_usage - Return how many pipe buffers are in use
+ * @pipe: the pipe
+ */
+static inline unsigned int pipe_buf_usage(const struct pipe_inode_info *pipe)
+{
+ return pipe_occupancy(pipe->head, pipe->tail);
+}
+
+/**
* pipe_buf - Return the pipe buffer for the specified slot in the pipe ring
* @pipe: The pipe to access
* @slot: The slot of interest
@@ -245,15 +308,6 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
return buf->ops->try_steal(pipe, buf);
}
-static inline void pipe_discard_from(struct pipe_inode_info *pipe,
- unsigned int old_head)
-{
- unsigned int mask = pipe->ring_size - 1;
-
- while (pipe->head > old_head)
- pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
-}
-
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h
index 8ab5b0e8eb2c..8c9df7dadd5d 100644
--- a/include/linux/platform_profile.h
+++ b/include/linux/platform_profile.h
@@ -33,6 +33,8 @@ enum platform_profile_option {
* @probe: Callback to setup choices available to the new class device. These
* choices will only be enforced when setting a new profile, not when
* getting the current one.
+ * @hidden_choices: Callback to setup choices that are not visible to the user
+ * but can be set by the driver.
* @profile_get: Callback that will be called when showing the current platform
* profile in sysfs.
* @profile_set: Callback that will be called when storing a new platform
@@ -40,6 +42,7 @@ enum platform_profile_option {
*/
struct platform_profile_ops {
int (*probe)(void *drvdata, unsigned long *choices);
+ int (*hidden_choices)(void *drvdata, unsigned long *choices);
int (*profile_get)(struct device *dev, enum platform_profile_option *profile);
int (*profile_set)(struct device *dev, enum platform_profile_option profile);
};
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0b2a89854440..ea62201c74c4 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -20,10 +20,13 @@ enum {
* If in doubt, ignore this flag.
*/
#ifdef MODULE
- PROC_ENTRY_PERMANENT = 0U,
+ PROC_ENTRY_PERMANENT = 0U,
#else
- PROC_ENTRY_PERMANENT = 1U << 0,
+ PROC_ENTRY_PERMANENT = 1U << 0,
#endif
+
+ PROC_ENTRY_proc_read_iter = 1U << 1,
+ PROC_ENTRY_proc_compat_ioctl = 1U << 2,
};
struct proc_ops {
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 903ddfea8585..f3cad182d4ef 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -815,6 +815,15 @@ struct sev_data_snp_commit {
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
/**
+ * sev_module_init - perform PSP SEV module initialization
+ *
+ * Returns:
+ * 0 if the PSP module is successfully initialized
+ * negative value if the PSP module initialization fails
+ */
+int sev_module_init(void);
+
+/**
* sev_platform_init - perform SEV INIT command
*
* @args: struct sev_platform_init_args to pass in arguments
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
index 2c8bfd0f1b6b..6322d8c1c6b4 100644
--- a/include/linux/rcuref.h
+++ b/include/linux/rcuref.h
@@ -71,27 +71,30 @@ static inline __must_check bool rcuref_get(rcuref_t *ref)
return rcuref_get_slowpath(ref);
}
-extern __must_check bool rcuref_put_slowpath(rcuref_t *ref);
+extern __must_check bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt);
/*
* Internal helper. Do not invoke directly.
*/
static __always_inline __must_check bool __rcuref_put(rcuref_t *ref)
{
+ int cnt;
+
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(),
"suspicious rcuref_put_rcusafe() usage");
/*
* Unconditionally decrease the reference count. The saturation and
* dead zones provide enough tolerance for this.
*/
- if (likely(!atomic_add_negative_release(-1, &ref->refcnt)))
+ cnt = atomic_sub_return_release(1, &ref->refcnt);
+ if (likely(cnt >= 0))
return false;
/*
* Handle the last reference drop and cases inside the saturation
* and dead zones.
*/
- return rcuref_put_slowpath(ref);
+ return rcuref_put_slowpath(ref, cnt);
}
/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9632e3318e0d..9c15365a30c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1701,7 +1701,7 @@ extern struct pid *cad_pid;
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
-#define PF__HOLE__00010000 0x00010000
+#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0f2aeb37bbb0..ca1db4b92c32 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -43,6 +43,7 @@ struct kernel_clone_args {
void *fn_arg;
struct cgroup *cgrp;
struct css_set *cset;
+ unsigned int kill_seq;
};
/*
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 2cbe0c22a32f..0b9095a281b8 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -91,6 +91,8 @@ struct sk_psock {
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
struct strparser strp;
+ u32 copied_seq;
+ u32 ingress_bytes;
#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
diff --git a/include/linux/socket.h b/include/linux/socket.h
index d18cc47e89bd..c3322eb3d686 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -392,6 +392,8 @@ struct ucred {
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len,
+ void *data);
struct timespec64;
struct __kernel_timespec;
diff --git a/include/linux/string.h b/include/linux/string.h
index 86d5d352068b..f8e21e80942f 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -414,7 +414,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* must be discoverable by the compiler.
*/
#define strtomem_pad(dest, src, pad) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
@@ -437,7 +438,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* must be discoverable by the compiler.
*/
#define strtomem(dest, src) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
@@ -456,7 +458,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* Note that sizes of @dest and @src must be known at compile-time.
*/
#define memtostr(dest, src) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
const size_t _src_chars = strnlen(src, _src_len); \
const size_t _copy_len = min(_dest_len - 1, _src_chars); \
@@ -481,7 +484,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* Note that sizes of @dest and @src must be known at compile-time.
*/
#define memtostr_pad(dest, src) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
const size_t _src_chars = strnlen(src, _src_len); \
const size_t _copy_len = min(_dest_len - 1, _src_chars); \
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index fec1e8a1570c..eac57914dcf3 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -158,7 +158,6 @@ enum {
RPC_TASK_NEED_XMIT,
RPC_TASK_NEED_RECV,
RPC_TASK_MSG_PIN_WAIT,
- RPC_TASK_SIGNALLED,
};
#define rpc_test_and_set_running(t) \
@@ -171,7 +170,7 @@ enum {
#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
-#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
+#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS)
/*
* Task priorities.
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
index b5ec038069da..91cdf12190a0 100644
--- a/include/linux/swap_cgroup.h
+++ b/include/linux/swap_cgroup.h
@@ -6,7 +6,7 @@
#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
-extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent);
+extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent);
extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents);
extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
@@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type);
#else
static inline
-void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
+void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent)
{
}
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6333204d451..e5603cc91963 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -951,6 +951,10 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
int flags, uint32_t sig);
asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
+asmlinkage long sys_open_tree_attr(int dfd, const char __user *path,
+ unsigned flags,
+ struct mount_attr __user *uattr,
+ size_t usize);
asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
int to_dfd, const char __user *to_path,
unsigned int ms_flags);
@@ -1266,14 +1270,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user,
AT_SYMLINK_NOFOLLOW);
}
-extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
+int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
static inline long ksys_ftruncate(unsigned int fd, loff_t length)
{
return do_sys_ftruncate(fd, length, 1);
}
-extern long do_sys_truncate(const char __user *pathname, loff_t length);
+int do_sys_truncate(const char __user *pathname, loff_t length);
static inline long ksys_truncate(const char __user *pathname, loff_t length)
{
diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h
deleted file mode 100644
index 5cf77dbb8d86..000000000000
--- a/include/linux/sysv_fs.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SYSV_FS_H
-#define _LINUX_SYSV_FS_H
-
-#define __packed2__ __attribute__((packed, aligned(2)))
-
-
-#ifndef __KERNEL__
-typedef u16 __fs16;
-typedef u32 __fs16;
-#endif
-
-/* inode numbers are 16 bit */
-typedef __fs16 sysv_ino_t;
-
-/* Block numbers are 24 bit, sometimes stored in 32 bit.
- On Coherent FS, they are always stored in PDP-11 manner: the least
- significant 16 bits come last. */
-typedef __fs32 sysv_zone_t;
-
-/* 0 is non-existent */
-#define SYSV_BADBL_INO 1 /* inode of bad blocks file */
-#define SYSV_ROOT_INO 2 /* inode of root directory */
-
-
-/* Xenix super-block data on disk */
-#define XENIX_NICINOD 100 /* number of inode cache entries */
-#define XENIX_NICFREE 100 /* number of free block list chunk entries */
-struct xenix_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= XENIX_NICFREE */
- sysv_zone_t s_free[XENIX_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= XENIX_NICINOD */
- sysv_ino_t s_inode[XENIX_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- __fs16 s_dinfo[4]; /* device information ?? */
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- char s_clean; /* set to 0x46 when filesystem is properly unmounted */
- char s_fill[371];
- s32 s_magic; /* version of file system */
- __fs32 s_type; /* type of file system: 1 for 512 byte blocks
- 2 for 1024 byte blocks
- 3 for 2048 byte blocks */
-
-};
-
-/*
- * SystemV FS comes in two variants:
- * sysv2: System V Release 2 (e.g. Microport), structure elements aligned(2).
- * sysv4: System V Release 4 (e.g. Consensys), structure elements aligned(4).
- */
-#define SYSV_NICINOD 100 /* number of inode cache entries */
-#define SYSV_NICFREE 50 /* number of free block list chunk entries */
-
-/* SystemV4 super-block data on disk */
-struct sysv4_super_block {
- __fs16 s_isize; /* index of first data zone */
- u16 s_pad0;
- __fs32 s_fsize; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */
- u16 s_pad1;
- sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */
- u16 s_pad2;
- sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time; /* time of last super block update */
- __fs16 s_dinfo[4]; /* device information ?? */
- __fs32 s_tfree; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- u16 s_pad3;
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- s32 s_fill[12];
- __fs32 s_state; /* file system state: 0x7c269d38-s_time means clean */
- s32 s_magic; /* version of file system */
- __fs32 s_type; /* type of file system: 1 for 512 byte blocks
- 2 for 1024 byte blocks */
-};
-
-/* SystemV2 super-block data on disk */
-struct sysv2_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */
- sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */
- sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- __fs16 s_dinfo[4]; /* device information ?? */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- s32 s_fill[14];
- __fs32 s_state; /* file system state: 0xcb096f43 means clean */
- s32 s_magic; /* version of file system */
- __fs32 s_type; /* type of file system: 1 for 512 byte blocks
- 2 for 1024 byte blocks */
-};
-
-/* V7 super-block data on disk */
-#define V7_NICINOD 100 /* number of inode cache entries */
-#define V7_NICFREE 50 /* number of free block list chunk entries */
-struct v7_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= V7_NICFREE */
- sysv_zone_t s_free[V7_NICFREE]; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= V7_NICINOD */
- sysv_ino_t s_inode[V7_NICINOD]; /* some free inodes */
- /* locks, not used by Linux or V7: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- /* the following fields are not maintained by V7: */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- __fs16 s_m; /* interleave factor */
- __fs16 s_n; /* interleave factor */
- char s_fname[6]; /* file system name */
- char s_fpack[6]; /* file system pack name */
-};
-/* Constants to aid sanity checking */
-/* This is not a hard limit, nor enforced by v7 kernel. It's actually just
- * the limit used by Seventh Edition's ls, though is high enough to assume
- * that no reasonable file system would have that much entries in root
- * directory. Thus, if we see anything higher, we just probably got the
- * endiannes wrong. */
-#define V7_NFILES 1024
-/* The disk addresses are three-byte (despite direct block addresses being
- * aligned word-wise in inode). If the most significant byte is non-zero,
- * something is most likely wrong (not a filesystem, bad bytesex). */
-#define V7_MAXSIZE 0x00ffffff
-
-/* Coherent super-block data on disk */
-#define COH_NICINOD 100 /* number of inode cache entries */
-#define COH_NICFREE 64 /* number of free block list chunk entries */
-struct coh_super_block {
- __fs16 s_isize; /* index of first data zone */
- __fs32 s_fsize __packed2__; /* total number of zones of this fs */
- /* the start of the free block list: */
- __fs16 s_nfree; /* number of free blocks in s_free, <= COH_NICFREE */
- sysv_zone_t s_free[COH_NICFREE] __packed2__; /* first free block list chunk */
- /* the cache of free inodes: */
- __fs16 s_ninode; /* number of free inodes in s_inode, <= COH_NICINOD */
- sysv_ino_t s_inode[COH_NICINOD]; /* some free inodes */
- /* locks, not used by Linux: */
- char s_flock; /* lock during free block list manipulation */
- char s_ilock; /* lock during inode cache manipulation */
- char s_fmod; /* super-block modified flag */
- char s_ronly; /* flag whether fs is mounted read-only */
- __fs32 s_time __packed2__; /* time of last super block update */
- __fs32 s_tfree __packed2__; /* total number of free zones */
- __fs16 s_tinode; /* total number of free inodes */
- __fs16 s_interleave_m; /* interleave factor */
- __fs16 s_interleave_n;
- char s_fname[6]; /* file system volume name */
- char s_fpack[6]; /* file system pack name */
- __fs32 s_unique; /* zero, not used */
-};
-
-/* SystemV/Coherent inode data on disk */
-struct sysv_inode {
- __fs16 i_mode;
- __fs16 i_nlink;
- __fs16 i_uid;
- __fs16 i_gid;
- __fs32 i_size;
- u8 i_data[3*(10+1+1+1)];
- u8 i_gen;
- __fs32 i_atime; /* time of last access */
- __fs32 i_mtime; /* time of last modification */
- __fs32 i_ctime; /* time of creation */
-};
-
-/* SystemV/Coherent directory entry on disk */
-#define SYSV_NAMELEN 14 /* max size of name in struct sysv_dir_entry */
-struct sysv_dir_entry {
- sysv_ino_t inode;
- char name[SYSV_NAMELEN]; /* up to 14 characters, the rest are zeroes */
-};
-
-#define SYSV_DIRSIZE sizeof(struct sysv_dir_entry) /* size of every directory entry */
-
-#endif /* _LINUX_SYSV_FS_H */
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index f85ec5613721..2dc767e08f54 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
u32 map_id_down(struct uid_gid_map *map, u32 id);
u32 map_id_up(struct uid_gid_map *map, u32 id);
+u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count);
#else
@@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
return id;
}
+static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count)
+{
+ return id;
+}
+
static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
{
return id;
diff --git a/include/linux/vfsdebug.h b/include/linux/vfsdebug.h
new file mode 100644
index 000000000000..9cf22d3eb9dd
--- /dev/null
+++ b/include/linux/vfsdebug.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VFS_DEBUG_H
+#define LINUX_VFS_DEBUG_H 1
+
+#include <linux/bug.h>
+
+struct inode;
+
+#ifdef CONFIG_DEBUG_VFS
+void dump_inode(struct inode *inode, const char *reason);
+
+#define VFS_BUG_ON(cond) BUG_ON(cond)
+#define VFS_WARN_ON(cond) (void)WARN_ON(cond)
+#define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+#define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
+#define VFS_WARN(cond, format...) (void)WARN(cond, format)
+
+#define VFS_BUG_ON_INODE(cond, inode) ({ \
+ if (unlikely(!!(cond))) { \
+ dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\
+ BUG_ON(1); \
+ } \
+})
+
+#define VFS_WARN_ON_INODE(cond, inode) ({ \
+ int __ret_warn = !!(cond); \
+ \
+ if (unlikely(__ret_warn)) { \
+ dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn); \
+})
+#else
+#define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
+
+#define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond)
+#define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond)
+#endif /* CONFIG_DEBUG_VFS */
+
+#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6d90ad974408..3503fe822e38 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -316,6 +316,9 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
} \
\
cmd; \
+ \
+ if (condition) \
+ break; \
} \
finish_wait(&wq_head, &__wq_entry); \
__out: __ret; \