summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/processor.h34
-rw-r--r--include/asm-generic/vmlinux.lds.h5
-rw-r--r--include/drm/Makefile2
-rw-r--r--include/drm/intel/pciids.h5
-rw-r--r--include/linux/arm_ffa.h21
-rw-r--r--include/linux/ata.h1
-rw-r--r--include/linux/backing-dev-defs.h2
-rw-r--r--include/linux/backing-dev.h5
-rw-r--r--include/linux/blk_types.h11
-rw-r--r--include/linux/ceph/libceph.h3
-rw-r--r--include/linux/cgroup-defs.h2
-rw-r--r--include/linux/cleanup.h22
-rw-r--r--include/linux/compiler_types.h11
-rw-r--r--include/linux/cred.h22
-rw-r--r--include/linux/dma-mapping.h2
-rw-r--r--include/linux/entry-virt.h2
-rw-r--r--include/linux/ethtool.h2
-rw-r--r--include/linux/exportfs.h7
-rw-r--r--include/linux/fbcon.h2
-rw-r--r--include/linux/file.h126
-rw-r--r--include/linux/filelock.h98
-rw-r--r--include/linux/filter.h20
-rw-r--r--include/linux/fs.h727
-rw-r--r--include/linux/fs/super.h238
-rw-r--r--include/linux/fs/super_types.h336
-rw-r--r--include/linux/fs_dirent.h (renamed from include/linux/fs_types.h)11
-rw-r--r--include/linux/fs_struct.h6
-rw-r--r--include/linux/ftrace.h10
-rw-r--r--include/linux/gfp.h3
-rw-r--r--include/linux/gpio/regmap.h5
-rw-r--r--include/linux/highmem.h6
-rw-r--r--include/linux/huge_mm.h55
-rw-r--r--include/linux/hung_task.h8
-rw-r--r--include/linux/iio/buffer-dma.h1
-rw-r--r--include/linux/iio/buffer_impl.h2
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/iomap.h86
-rw-r--r--include/linux/mailbox/mtk-cmdq-mailbox.h10
-rw-r--r--include/linux/map_benchmark.h1
-rw-r--r--include/linux/misc_cgroup.h2
-rw-r--r--include/linux/mlx5/cq.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/linux/mm.h21
-rw-r--r--include/linux/namei.h83
-rw-r--r--include/linux/net/intel/libie/fwlog.h12
-rw-r--r--include/linux/ns/ns_common_types.h196
-rw-r--r--include/linux/ns/nstree_types.h55
-rw-r--r--include/linux/ns_common.h233
-rw-r--r--include/linux/nsfs.h3
-rw-r--r--include/linux/nsproxy.h9
-rw-r--r--include/linux/nstree.h52
-rw-r--r--include/linux/pagemap.h18
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/pid_namespace.h3
-rw-r--r--include/linux/pipe_fs_i.h23
-rw-r--r--include/linux/platform_data/x86/int3472.h1
-rw-r--r--include/linux/pm_runtime.h8
-rw-r--r--include/linux/pseudo_fs.h1
-rw-r--r--include/linux/regmap.h2
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/sched/coredump.h2
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/linux/types.h1
-rw-r--r--include/linux/usb/gadget.h5
-rw-r--r--include/linux/user_namespace.h4
-rw-r--r--include/linux/virtio_net.h12
-rw-r--r--include/linux/writeback.h15
-rw-r--r--include/linux/xattr.h4
-rw-r--r--include/net/bluetooth/hci.h6
-rw-r--r--include/net/bluetooth/hci_core.h22
-rw-r--r--include/net/bluetooth/l2cap.h4
-rw-r--r--include/net/bluetooth/mgmt.h4
-rw-r--r--include/net/cfg80211.h78
-rw-r--r--include/net/libeth/xdp.h2
-rw-r--r--include/net/pkt_cls.h2
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/tls.h25
-rw-r--r--include/net/xfrm.h3
-rw-r--r--include/scsi/scsi_device.h10
-rw-r--r--include/trace/events/tcp.h9
-rw-r--r--include/trace/events/writeback.h8
-rw-r--r--include/uapi/asm-generic/posix_types.h1
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/drm/drm_fourcc.h25
-rw-r--r--include/uapi/drm/xe_drm.h15
-rw-r--r--include/uapi/linux/fb.h2
-rw-r--r--include/uapi/linux/fcntl.h16
-rw-r--r--include/uapi/linux/input-event-codes.h14
-rw-r--r--include/uapi/linux/io_uring.h12
-rw-r--r--include/uapi/linux/io_uring/query.h3
-rw-r--r--include/uapi/linux/isst_if.h50
-rw-r--r--include/uapi/linux/mount.h2
-rw-r--r--include/uapi/linux/nsfs.h58
-rw-r--r--include/uapi/linux/pidfd.h11
-rw-r--r--include/uapi/linux/tee.h23
-rw-r--r--include/uapi/linux/virtio_net.h3
-rw-r--r--include/ufs/ufshcd.h7
99 files changed, 2124 insertions, 993 deletions
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 7146a8e9e9c2..d0eccbd920e5 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -417,15 +417,32 @@ static inline void acpi_processor_throttling_init(void) {}
#endif /* CONFIG_ACPI_CPU_FREQ_PSS */
/* in processor_idle.c */
+extern struct cpuidle_driver acpi_idle_driver;
#ifdef CONFIG_ACPI_PROCESSOR_IDLE
-void acpi_processor_power_init(struct acpi_processor *pr);
-void acpi_processor_power_exit(struct acpi_processor *pr);
+int acpi_processor_power_init(struct acpi_processor *pr);
+int acpi_processor_power_exit(struct acpi_processor *pr);
int acpi_processor_power_state_has_changed(struct acpi_processor *pr);
int acpi_processor_hotplug(struct acpi_processor *pr);
-void acpi_processor_register_idle_driver(void);
-void acpi_processor_unregister_idle_driver(void);
-int acpi_processor_ffh_lpi_probe(unsigned int cpu);
-int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi);
+#else
+static inline int acpi_processor_power_init(struct acpi_processor *pr)
+{
+ return -ENODEV;
+}
+
+static inline int acpi_processor_power_exit(struct acpi_processor *pr)
+{
+ return -ENODEV;
+}
+
+static inline int acpi_processor_power_state_has_changed(struct acpi_processor *pr)
+{
+ return -ENODEV;
+}
+
+static inline int acpi_processor_hotplug(struct acpi_processor *pr)
+{
+ return -ENODEV;
+}
#endif /* CONFIG_ACPI_PROCESSOR_IDLE */
/* in processor_thermal.c */
@@ -448,6 +465,11 @@ static inline void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
}
#endif /* CONFIG_CPU_FREQ */
+#ifdef CONFIG_ACPI_PROCESSOR_IDLE
+extern int acpi_processor_ffh_lpi_probe(unsigned int cpu);
+extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi);
+#endif
+
void acpi_processor_init_invariance_cppc(void);
#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a9a2e732a65..caef183a73bc 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -832,7 +832,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
/* Required sections not related to debugging. */
#define ELF_DETAILS \
- .modinfo : { *(.modinfo) } \
+ .modinfo : { *(.modinfo) . = ALIGN(8); } \
.comment 0 : { *(.comment) } \
.symtab 0 : { *(.symtab) } \
.strtab 0 : { *(.strtab) } \
@@ -955,7 +955,8 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
#define RUNTIME_CONST_VARIABLES \
RUNTIME_CONST(shift, d_hash_shift) \
- RUNTIME_CONST(ptr, dentry_hashtable)
+ RUNTIME_CONST(ptr, dentry_hashtable) \
+ RUNTIME_CONST(ptr, __dentry_cache)
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
#define KUNIT_TABLE() \
diff --git a/include/drm/Makefile b/include/drm/Makefile
index 1df6962556ef..48fae3f167c7 100644
--- a/include/drm/Makefile
+++ b/include/drm/Makefile
@@ -11,7 +11,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \
quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
cmd_hdrtest = \
$(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \
- PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
+ PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
touch $@
$(obj)/%.hdrtest: $(src)/%.h FORCE
diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h
index da6301a6fcea..69d4ae92d822 100644
--- a/include/drm/intel/pciids.h
+++ b/include/drm/intel/pciids.h
@@ -877,7 +877,10 @@
MACRO__(0xB08F, ## __VA_ARGS__), \
MACRO__(0xB090, ## __VA_ARGS__), \
MACRO__(0xB0A0, ## __VA_ARGS__), \
- MACRO__(0xB0B0, ## __VA_ARGS__), \
+ MACRO__(0xB0B0, ## __VA_ARGS__)
+
+/* WCL */
+#define INTEL_WCL_IDS(MACRO__, ...) \
MACRO__(0xFD80, ## __VA_ARGS__), \
MACRO__(0xFD81, ## __VA_ARGS__)
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index cd7ee4df9045..81e603839c4a 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -338,6 +338,7 @@ struct ffa_mem_region_attributes {
* an `struct ffa_mem_region_addr_range`.
*/
u32 composite_off;
+ u8 impdef_val[16];
u64 reserved;
};
@@ -417,15 +418,31 @@ struct ffa_mem_region {
#define CONSTITUENTS_OFFSET(x) \
(offsetof(struct ffa_composite_mem_region, constituents[x]))
+#define FFA_EMAD_HAS_IMPDEF_FIELD(version) ((version) >= FFA_VERSION_1_2)
+#define FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version) ((version) > FFA_VERSION_1_0)
+
+static inline u32 ffa_emad_size_get(u32 ffa_version)
+{
+ u32 sz;
+ struct ffa_mem_region_attributes *ep_mem_access;
+
+ if (FFA_EMAD_HAS_IMPDEF_FIELD(ffa_version))
+ sz = sizeof(*ep_mem_access);
+ else
+ sz = sizeof(*ep_mem_access) - sizeof(ep_mem_access->impdef_val);
+
+ return sz;
+}
+
static inline u32
ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version)
{
- u32 offset = count * sizeof(struct ffa_mem_region_attributes);
+ u32 offset = count * ffa_emad_size_get(ffa_version);
/*
* Earlier to v1.1, the endpoint memory descriptor array started at
* offset 32(i.e. offset of ep_mem_offset in the current structure)
*/
- if (ffa_version <= FFA_VERSION_1_0)
+ if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(ffa_version))
offset += offsetof(struct ffa_mem_region, ep_mem_offset);
else
offset += sizeof(struct ffa_mem_region);
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 792e10a09787..c9013e472aa3 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -566,6 +566,7 @@ struct ata_bmdma_prd {
#define ata_id_has_ncq(id) ((id)[ATA_ID_SATA_CAPABILITY] & (1 << 8))
#define ata_id_queue_depth(id) (((id)[ATA_ID_QUEUE_DEPTH] & 0x1f) + 1)
#define ata_id_removable(id) ((id)[ATA_ID_CONFIG] & (1 << 7))
+#define ata_id_is_locked(id) (((id)[ATA_ID_DLF] & 0x7) == 0x7)
#define ata_id_has_atapi_AN(id) \
((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \
((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c5c9d89c73ed..610ef62b6a32 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -63,6 +63,8 @@ enum wb_reason {
struct wb_completion {
atomic_t cnt;
wait_queue_head_t *waitq;
+ unsigned long progress_stamp; /* The jiffies when slow progress is detected */
+ unsigned long wait_start; /* The jiffies when waiting for the writeback work to finish */
};
#define __WB_COMPLETION_INIT(_waitq) \
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3e64f14739dd..0c8342747cab 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -277,10 +277,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
rcu_read_lock();
/*
- * Paired with store_release in inode_switch_wbs_work_fn() and
+ * Paired with a release fence in inode_do_switch_wbs() and
* ensures that we see the new wb if we see cleared I_WB_SWITCH.
*/
- cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
+ cookie->locked = inode_state_read_once(inode) & I_WB_SWITCH;
+ smp_rmb();
if (unlikely(cookie->locked))
xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8e8d1cc8b06c..44c30183ecc3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -341,15 +341,15 @@ enum req_op {
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9,
/* Open a zone */
- REQ_OP_ZONE_OPEN = (__force blk_opf_t)10,
+ REQ_OP_ZONE_OPEN = (__force blk_opf_t)11,
/* Close a zone */
- REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11,
+ REQ_OP_ZONE_CLOSE = (__force blk_opf_t)13,
/* Transition a zone to full */
- REQ_OP_ZONE_FINISH = (__force blk_opf_t)13,
+ REQ_OP_ZONE_FINISH = (__force blk_opf_t)15,
/* reset a zone write pointer */
- REQ_OP_ZONE_RESET = (__force blk_opf_t)15,
+ REQ_OP_ZONE_RESET = (__force blk_opf_t)17,
/* reset all the zone present on the device */
- REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17,
+ REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)19,
/* Driver private requests */
REQ_OP_DRV_IN = (__force blk_opf_t)34,
@@ -478,6 +478,7 @@ static inline bool op_is_zone_mgmt(enum req_op op)
{
switch (op & REQ_OP_MASK) {
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_RESET_ALL:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 733e7f93db66..63e0e2aa1ce9 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -306,8 +306,7 @@ struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
u64 ceph_client_gid(struct ceph_client *client);
extern void ceph_destroy_client(struct ceph_client *client);
extern void ceph_reset_client_addr(struct ceph_client *client);
-extern int __ceph_open_session(struct ceph_client *client,
- unsigned long started);
+extern int __ceph_open_session(struct ceph_client *client);
extern int ceph_open_session(struct ceph_client *client);
int ceph_wait_for_latest_osdmap(struct ceph_client *client,
unsigned long timeout);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 93318fce31f3..b760a3c470a5 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -452,7 +452,7 @@ struct cgroup_freezer_state {
int nr_frozen_tasks;
/* Freeze time data consistency protection */
- seqcount_t freeze_seq;
+ seqcount_spinlock_t freeze_seq;
/*
* Most recent time the cgroup was requested to freeze.
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 2573585b7f06..b8bd2f15f91f 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -261,6 +261,10 @@ const volatile void * __must_check_fn(const volatile void *val)
* CLASS(name, var)(args...):
* declare the variable @var as an instance of the named class
*
+ * CLASS_INIT(name, var, init_expr):
+ * declare the variable @var as an instance of the named class with
+ * custom initialization expression.
+ *
* Ex.
*
* DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd)
@@ -290,15 +294,19 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
class_##_name##_t var __cleanup(class_##_name##_destructor) = \
class_##_name##_constructor
-#define scoped_class(_name, var, args) \
- for (CLASS(_name, var)(args); \
- __guard_ptr(_name)(&var) || !__is_cond_ptr(_name); \
- ({ goto _label; })) \
- if (0) { \
-_label: \
- break; \
+#define CLASS_INIT(_name, _var, _init_expr) \
+ class_##_name##_t _var __cleanup(class_##_name##_destructor) = (_init_expr)
+
+#define __scoped_class(_name, var, _label, args...) \
+ for (CLASS(_name, var)(args); ; ({ goto _label; })) \
+ if (0) { \
+_label: \
+ break; \
} else
+#define scoped_class(_name, var, args...) \
+ __scoped_class(_name, var, __UNIQUE_ID(label), args)
+
/*
* DEFINE_GUARD(name, type, lock, unlock):
* trivial wrapper around DEFINE_CLASS() above specifically
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 59288a2c1ad2..0a1b9598940d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -250,10 +250,9 @@ struct ftrace_likely_data {
/*
* GCC does not warn about unused static inline functions for -Wunused-function.
* Suppress the warning in clang as well by using __maybe_unused, but enable it
- * for W=1 build. This will allow clang to find unused functions. Remove the
- * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings.
+ * for W=2 build. This will allow clang to find unused functions.
*/
-#ifdef KBUILD_EXTRA_WARN1
+#ifdef KBUILD_EXTRA_WARN2
#define __inline_maybe_unused
#else
#define __inline_maybe_unused __maybe_unused
@@ -461,6 +460,12 @@ struct ftrace_likely_data {
# define __nocfi
#endif
+#if defined(CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS)
+# define __nocfi_generic __nocfi
+#else
+# define __nocfi_generic
+#endif
+
/*
* Any place that could be marked with the "alloc_size" attribute is also
* a place to be marked with the "malloc" attribute, except those that may
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 89ae50ad2ace..343a140a6ba2 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -20,6 +20,8 @@
struct cred;
struct inode;
+extern struct task_struct init_task;
+
/*
* COW Supplementary groups list
*/
@@ -156,6 +158,11 @@ extern struct cred *prepare_exec_creds(void);
extern int commit_creds(struct cred *);
extern void abort_creds(struct cred *);
extern struct cred *prepare_kernel_cred(struct task_struct *);
+static inline const struct cred *kernel_cred(void)
+{
+ /* shut up sparse */
+ return rcu_dereference_raw(init_task.cred);
+}
extern int set_security_override(struct cred *, u32);
extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
@@ -180,6 +187,16 @@ static inline const struct cred *revert_creds(const struct cred *revert_cred)
return rcu_replace_pointer(current->cred, revert_cred, 1);
}
+DEFINE_CLASS(override_creds,
+ const struct cred *,
+ revert_creds(_T),
+ override_creds(override_cred), const struct cred *override_cred)
+
+#define scoped_with_creds(cred) \
+ scoped_class(override_creds, __UNIQUE_ID(label), cred)
+
+#define scoped_with_kernel_creds() scoped_with_creds(kernel_cred())
+
/**
* get_cred_many - Get references on a set of credentials
* @cred: The credentials to reference
@@ -263,6 +280,11 @@ static inline void put_cred(const struct cred *cred)
put_cred_many(cred, 1);
}
+DEFINE_CLASS(prepare_creds,
+ struct cred *,
+ if (_T) put_cred(_T),
+ prepare_creds(), void)
+
DEFINE_FREE(put_cred, struct cred *, if (!IS_ERR_OR_NULL(_T)) put_cred(_T))
/**
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8248ff9363ee..2ceda49c609f 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -90,7 +90,7 @@
*/
#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
-#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
+#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0)
struct dma_iova_state {
dma_addr_t addr;
diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h
index 42c89e3e5ca7..bfa767702d9a 100644
--- a/include/linux/entry-virt.h
+++ b/include/linux/entry-virt.h
@@ -32,7 +32,7 @@
*/
static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work);
-#ifndef arch_xfer_to_guest_mode_work
+#ifndef arch_xfer_to_guest_mode_handle_work
static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work)
{
return 0;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c2d8b4ec62eb..5c9162193d26 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -492,7 +492,7 @@ struct ethtool_pause_stats {
};
#define ETHTOOL_MAX_LANES 8
-/**
+/*
* IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for
* the end-of-list marker, total 17 items
*/
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index d0cf10d5e0f7..f0cf2714ec52 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -320,9 +320,6 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop)
static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
int fh_flags)
{
- if (!nop)
- return false;
-
/*
* If a non-decodeable file handle was requested, we only need to make
* sure that filesystem did not opt-out of encoding fid.
@@ -330,6 +327,10 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
if (fh_flags & EXPORT_FH_FID)
return exportfs_can_encode_fid(nop);
+ /* Normal file handles cannot be created without export ops */
+ if (!nop)
+ return false;
+
/*
* If a connectable file handle was requested, we need to make sure that
* filesystem can also decode connected file handles.
diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h
index 81f0e698acbf..f206370060e1 100644
--- a/include/linux/fbcon.h
+++ b/include/linux/fbcon.h
@@ -18,6 +18,7 @@ void fbcon_suspended(struct fb_info *info);
void fbcon_resumed(struct fb_info *info);
int fbcon_mode_deleted(struct fb_info *info,
struct fb_videomode *mode);
+void fbcon_delete_modelist(struct list_head *head);
void fbcon_new_modelist(struct fb_info *info);
void fbcon_get_requirement(struct fb_info *info,
struct fb_blit_caps *caps);
@@ -38,6 +39,7 @@ static inline void fbcon_suspended(struct fb_info *info) {}
static inline void fbcon_resumed(struct fb_info *info) {}
static inline int fbcon_mode_deleted(struct fb_info *info,
struct fb_videomode *mode) { return 0; }
+static inline void fbcon_delete_modelist(struct list_head *head) {}
static inline void fbcon_new_modelist(struct fb_info *info) {}
static inline void fbcon_get_requirement(struct fb_info *info,
struct fb_blit_caps *caps) {}
diff --git a/include/linux/file.h b/include/linux/file.h
index af1768d934a0..cf389fde9bc2 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -127,4 +127,130 @@ extern void __fput_sync(struct file *);
extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
+/*
+ * fd_prepare: Combined fd + file allocation cleanup class.
+ * @err: Error code to indicate if allocation succeeded.
+ * @__fd: Allocated fd (may not be accessed directly)
+ * @__file: Allocated struct file pointer (may not be accessed directly)
+ *
+ * Allocates an fd and a file together. On error paths, automatically cleans
+ * up whichever resource was successfully allocated. Allows flexible file
+ * allocation with different functions per usage.
+ *
+ * Do not use directly.
+ */
+struct fd_prepare {
+ s32 err;
+ s32 __fd; /* do not access directly */
+ struct file *__file; /* do not access directly */
+};
+
+/* Typedef for fd_prepare cleanup guards. */
+typedef struct fd_prepare class_fd_prepare_t;
+
+/*
+ * Accessors for fd_prepare class members.
+ * _Generic() is used for zero-cost type safety.
+ */
+#define fd_prepare_fd(_fdf) \
+ (_Generic((_fdf), struct fd_prepare: (_fdf).__fd))
+
+#define fd_prepare_file(_fdf) \
+ (_Generic((_fdf), struct fd_prepare: (_fdf).__file))
+
+/* Do not use directly. */
+static inline void class_fd_prepare_destructor(const struct fd_prepare *fdf)
+{
+ if (unlikely(fdf->err)) {
+ if (likely(fdf->__fd >= 0))
+ put_unused_fd(fdf->__fd);
+ if (unlikely(!IS_ERR_OR_NULL(fdf->__file)))
+ fput(fdf->__file);
+ }
+}
+
+/* Do not use directly. */
+static inline int class_fd_prepare_lock_err(const struct fd_prepare *fdf)
+{
+ if (unlikely(fdf->err))
+ return fdf->err;
+ if (unlikely(fdf->__fd < 0))
+ return fdf->__fd;
+ if (unlikely(IS_ERR(fdf->__file)))
+ return PTR_ERR(fdf->__file);
+ if (unlikely(!fdf->__file))
+ return -ENOMEM;
+ return 0;
+}
+
+/*
+ * __FD_PREPARE_INIT - Helper to initialize fd_prepare class.
+ * @_fd_flags: flags for get_unused_fd_flags()
+ * @_file_owned: expression that returns struct file *
+ *
+ * Returns a struct fd_prepare with fd, file, and err set.
+ * If fd allocation fails, fd will be negative and err will be set. If
+ * fd succeeds but file_init_expr fails, file will be ERR_PTR and err
+ * will be set. The err field is the single source of truth for error
+ * checking.
+ */
+#define __FD_PREPARE_INIT(_fd_flags, _file_owned) \
+ ({ \
+ struct fd_prepare fdf = { \
+ .__fd = get_unused_fd_flags((_fd_flags)), \
+ }; \
+ if (likely(fdf.__fd >= 0)) \
+ fdf.__file = (_file_owned); \
+ fdf.err = ACQUIRE_ERR(fd_prepare, &fdf); \
+ fdf; \
+ })
+
+/*
+ * FD_PREPARE - Macro to declare and initialize an fd_prepare variable.
+ *
+ * Declares and initializes an fd_prepare variable with automatic
+ * cleanup. No separate scope required - cleanup happens when variable
+ * goes out of scope.
+ *
+ * @_fdf: name of struct fd_prepare variable to define
+ * @_fd_flags: flags for get_unused_fd_flags()
+ * @_file_owned: struct file to take ownership of (can be expression)
+ */
+#define FD_PREPARE(_fdf, _fd_flags, _file_owned) \
+ CLASS_INIT(fd_prepare, _fdf, __FD_PREPARE_INIT(_fd_flags, _file_owned))
+
+/*
+ * fd_publish - Publish prepared fd and file to the fd table.
+ * @_fdf: struct fd_prepare variable
+ */
+#define fd_publish(_fdf) \
+ ({ \
+ struct fd_prepare *fdp = &(_fdf); \
+ VFS_WARN_ON_ONCE(fdp->err); \
+ VFS_WARN_ON_ONCE(fdp->__fd < 0); \
+ VFS_WARN_ON_ONCE(IS_ERR_OR_NULL(fdp->__file)); \
+ fd_install(fdp->__fd, fdp->__file); \
+ fdp->__fd; \
+ })
+
+/* Do not use directly. */
+#define __FD_ADD(_fdf, _fd_flags, _file_owned) \
+ ({ \
+ FD_PREPARE(_fdf, _fd_flags, _file_owned); \
+ s32 ret = _fdf.err; \
+ if (likely(!ret)) \
+ ret = fd_publish(_fdf); \
+ ret; \
+ })
+
+/*
+ * FD_ADD - Allocate and install an fd and file in one step.
+ * @_fd_flags: flags for get_unused_fd_flags()
+ * @_file_owned: struct file to take ownership of
+ *
+ * Returns the allocated fd number, or negative error code on failure.
+ */
+#define FD_ADD(_fd_flags, _file_owned) \
+ __FD_ADD(__UNIQUE_ID(fd_prepare), _fd_flags, _file_owned)
+
#endif /* __LINUX_FILE_H */
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index c2ce8ba05d06..54b824c05299 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -159,6 +159,8 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);
+int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg);
+int fcntl_getdeleg(struct file *filp, struct delegation *deleg);
static inline bool lock_is_unlock(struct file_lock *fl)
{
@@ -212,7 +214,14 @@ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
void locks_init_lease(struct file_lease *);
void locks_free_lease(struct file_lease *fl);
struct file_lease *locks_alloc_lease(void);
-int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
+
+#define LEASE_BREAK_LEASE BIT(0) // break leases and delegations
+#define LEASE_BREAK_DELEG BIT(1) // break delegations only
+#define LEASE_BREAK_LAYOUT BIT(2) // break layouts only
+#define LEASE_BREAK_NONBLOCK BIT(3) // non-blocking break
+#define LEASE_BREAK_OPEN_RDONLY BIT(4) // readonly open event
+
+int __break_lease(struct inode *inode, unsigned int flags);
void lease_get_mtime(struct inode *, struct timespec64 *time);
int generic_setlease(struct file *, int, struct file_lease **, void **priv);
int kernel_setlease(struct file *, int, struct file_lease **, void **);
@@ -271,6 +280,16 @@ static inline int fcntl_getlease(struct file *filp)
return F_UNLCK;
}
+static inline int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg)
+{
+ return -EINVAL;
+}
+
+static inline int fcntl_getdeleg(struct file *filp, struct delegation *deleg)
+{
+ return -EINVAL;
+}
+
static inline bool lock_is_unlock(struct file_lock *fl)
{
return false;
@@ -367,7 +386,7 @@ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *f
return -ENOLCK;
}
-static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
+static inline int __break_lease(struct inode *inode, unsigned int flags)
{
return 0;
}
@@ -428,6 +447,17 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
}
#ifdef CONFIG_FILE_LOCKING
+static inline unsigned int openmode_to_lease_flags(unsigned int mode)
+{
+ unsigned int flags = 0;
+
+ if ((mode & O_ACCMODE) == O_RDONLY)
+ flags |= LEASE_BREAK_OPEN_RDONLY;
+ if (mode & O_NONBLOCK)
+ flags |= LEASE_BREAK_NONBLOCK;
+ return flags;
+}
+
static inline int break_lease(struct inode *inode, unsigned int mode)
{
struct file_lock_context *flctx;
@@ -443,11 +473,11 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
return 0;
smp_mb();
if (!list_empty_careful(&flctx->flc_lease))
- return __break_lease(inode, mode, FL_LEASE);
+ return __break_lease(inode, LEASE_BREAK_LEASE | openmode_to_lease_flags(mode));
return 0;
}
-static inline int break_deleg(struct inode *inode, unsigned int mode)
+static inline int break_deleg(struct inode *inode, unsigned int flags)
{
struct file_lock_context *flctx;
@@ -461,60 +491,84 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
if (!flctx)
return 0;
smp_mb();
- if (!list_empty_careful(&flctx->flc_lease))
- return __break_lease(inode, mode, FL_DELEG);
+ if (!list_empty_careful(&flctx->flc_lease)) {
+ flags |= LEASE_BREAK_DELEG;
+ return __break_lease(inode, flags);
+ }
return 0;
}
-static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+struct delegated_inode {
+ struct inode *di_inode;
+};
+
+static inline bool is_delegated(struct delegated_inode *di)
+{
+ return di->di_inode;
+}
+
+static inline int try_break_deleg(struct inode *inode,
+ struct delegated_inode *di)
{
int ret;
- ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
- if (ret == -EWOULDBLOCK && delegated_inode) {
- *delegated_inode = inode;
+ ret = break_deleg(inode, LEASE_BREAK_NONBLOCK);
+ if (ret == -EWOULDBLOCK && di) {
+ di->di_inode = inode;
ihold(inode);
}
return ret;
}
-static inline int break_deleg_wait(struct inode **delegated_inode)
+static inline int break_deleg_wait(struct delegated_inode *di)
{
int ret;
- ret = break_deleg(*delegated_inode, O_WRONLY);
- iput(*delegated_inode);
- *delegated_inode = NULL;
+ ret = break_deleg(di->di_inode, 0);
+ iput(di->di_inode);
+ di->di_inode = NULL;
return ret;
}
static inline int break_layout(struct inode *inode, bool wait)
{
smp_mb();
- if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
- return __break_lease(inode,
- wait ? O_WRONLY : O_WRONLY | O_NONBLOCK,
- FL_LAYOUT);
+ if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) {
+ unsigned int flags = LEASE_BREAK_LAYOUT;
+
+ if (!wait)
+ flags |= LEASE_BREAK_NONBLOCK;
+
+ return __break_lease(inode, flags);
+ }
return 0;
}
#else /* !CONFIG_FILE_LOCKING */
-static inline int break_lease(struct inode *inode, unsigned int mode)
+struct delegated_inode { };
+
+static inline bool is_delegated(struct delegated_inode *di)
+{
+ return false;
+}
+
+static inline int break_lease(struct inode *inode, bool wait)
{
return 0;
}
-static inline int break_deleg(struct inode *inode, unsigned int mode)
+static inline int break_deleg(struct inode *inode, unsigned int flags)
{
return 0;
}
-static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+static inline int try_break_deleg(struct inode *inode,
+ struct delegated_inode *delegated_inode)
{
return 0;
}
-static inline int break_deleg_wait(struct inode **delegated_inode)
+static inline int break_deleg_wait(struct delegated_inode *delegated_inode)
{
BUG();
return 0;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f5c859b8131a..973233b82dc1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -901,6 +901,26 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
cb->data_end = skb->data + skb_headlen(skb);
}
+static inline int bpf_prog_run_data_pointers(
+ const struct bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+ void *save_data_meta, *save_data_end;
+ int res;
+
+ save_data_meta = cb->data_meta;
+ save_data_end = cb->data_end;
+
+ bpf_compute_data_pointers(skb);
+ res = bpf_prog_run(prog, skb);
+
+ cb->data_meta = save_data_meta;
+ cb->data_end = save_data_end;
+
+ return res;
+}
+
/* Similar to bpf_compute_data_pointers(), except that save orginal
* data in cb->data and cb->meta_data for restore.
*/
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..ce25feb06727 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
+#include <linux/fs/super.h>
#include <linux/vfsdebug.h>
#include <linux/linkage.h>
#include <linux/wait_bit.h>
@@ -11,7 +12,6 @@
#include <linux/stat.h>
#include <linux/cache.h>
#include <linux/list.h>
-#include <linux/list_lru.h>
#include <linux/llist.h>
#include <linux/radix-tree.h>
#include <linux/xarray.h>
@@ -37,7 +37,6 @@
#include <linux/uuid.h>
#include <linux/errseq.h>
#include <linux/ioprio.h>
-#include <linux/fs_types.h>
#include <linux/build_bug.h>
#include <linux/stddef.h>
#include <linux/mount.h>
@@ -52,11 +51,9 @@
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
-struct backing_dev_info;
struct bdi_writeback;
struct bio;
struct io_comp_batch;
-struct export_operations;
struct fiemap_extent_info;
struct hd_geometry;
struct iovec;
@@ -70,16 +67,13 @@ struct vfsmount;
struct cred;
struct swap_info_struct;
struct seq_file;
-struct workqueue_struct;
struct iov_iter;
-struct fscrypt_operations;
-struct fsverity_operations;
struct fsnotify_mark_connector;
-struct fsnotify_sb_info;
struct fs_context;
struct fs_parameter_spec;
struct file_kattr;
struct iomap_ops;
+struct delegated_inode;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -299,11 +293,6 @@ struct iattr {
};
/*
- * Includes for diskquotas.
- */
-#include <linux/quota.h>
-
-/*
* Maximum number of layers of fs stack. Needs to be limited to
* prevent kernel stack overflow
*/
@@ -367,23 +356,9 @@ struct readahead_control;
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
-/*
- * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
- * iocb completion can be passed back to the owner for execution from a safe
- * context rather than needing to be punted through a workqueue. If this
- * flag is set, the bio completion handling may set iocb->dio_complete to a
- * handler function and iocb->private to context information for that handler.
- * The issuer should call the handler with that context information from task
- * context to complete the processing of the iocb. Note that while this
- * provides a task context for the dio_complete() callback, it should only be
- * used on the completion side for non-IO generating completions. It's fine to
- * call blocking functions from this callback, but they should not wait for
- * unrelated IO (like cache flushing, new IO generation, etc).
- */
-#define IOCB_DIO_CALLER_COMP (1 << 22)
/* kiocb is a read or write operation submitted by fs/aio.c. */
-#define IOCB_AIO_RW (1 << 23)
-#define IOCB_HAS_METADATA (1 << 24)
+#define IOCB_AIO_RW (1 << 22)
+#define IOCB_HAS_METADATA (1 << 23)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -400,7 +375,6 @@ struct readahead_control;
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
- { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \
{ IOCB_AIO_RW, "AIO_RW" }, \
{ IOCB_HAS_METADATA, "AIO_HAS_METADATA" }
@@ -412,23 +386,13 @@ struct kiocb {
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
u8 ki_write_stream;
- union {
- /*
- * Only used for async buffered reads, where it denotes the
- * page waitqueue associated with completing the read. Valid
- * IFF IOCB_WAITQ is set.
- */
- struct wait_page_queue *ki_waitq;
- /*
- * Can be used for O_DIRECT IO, where the completion handling
- * is punted back to the issuer of the IO. May only be set
- * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
- * must then check for presence of this handler when ki_complete
- * is invoked. The data passed in to this handler must be
- * assigned to ->private when dio_complete is assigned.
- */
- ssize_t (*dio_complete)(void *data);
- };
+
+ /*
+ * Only used for async buffered reads, where it denotes the page
+ * waitqueue associated with completing the read.
+ * Valid IFF IOCB_WAITQ is set.
+ */
+ struct wait_page_queue *ki_waitq;
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -659,13 +623,14 @@ is_uncached_acl(struct posix_acl *acl)
return (long)acl & 1;
}
-#define IOP_FASTPERM 0x0001
-#define IOP_LOOKUP 0x0002
-#define IOP_NOFOLLOW 0x0004
-#define IOP_XATTR 0x0008
+#define IOP_FASTPERM 0x0001
+#define IOP_LOOKUP 0x0002
+#define IOP_NOFOLLOW 0x0004
+#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
-#define IOP_MGTIME 0x0020
-#define IOP_CACHED_LINK 0x0040
+#define IOP_MGTIME 0x0020
+#define IOP_CACHED_LINK 0x0040
+#define IOP_FASTPERM_MAY_EXEC 0x0080
/*
* Inode state bits. Protected by inode->i_lock
@@ -759,7 +724,7 @@ enum inode_state_bits {
/* reserved wait address bit 3 */
};
-enum inode_state_flags_t {
+enum inode_state_flags_enum {
I_NEW = (1U << __I_NEW),
I_SYNC = (1U << __I_SYNC),
I_LRU_ISOLATING = (1U << __I_LRU_ISOLATING),
@@ -786,6 +751,13 @@ enum inode_state_flags_t {
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
/*
+ * Use inode_state_read() & friends to access.
+ */
+struct inode_state_flags {
+ enum inode_state_flags_enum __state;
+};
+
+/*
* Keep mostly read-only and often accessed (especially for
* the RCU path lookup and 'stat' data) fields at the beginning
* of the 'struct inode'
@@ -793,14 +765,13 @@ enum inode_state_flags_t {
struct inode {
umode_t i_mode;
unsigned short i_opflags;
- kuid_t i_uid;
- kgid_t i_gid;
unsigned int i_flags;
-
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif
+ kuid_t i_uid;
+ kgid_t i_gid;
const struct inode_operations *i_op;
struct super_block *i_sb;
@@ -843,7 +814,7 @@ struct inode {
#endif
/* Misc */
- enum inode_state_flags_t i_state;
+ struct inode_state_flags i_state;
/* 32-bit hole */
struct rw_semaphore i_rwsem;
@@ -902,6 +873,80 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+/*
+ * i_state handling
+ *
+ * We hide all of it behind helpers so that we can validate consumers.
+ */
+static inline enum inode_state_flags_enum inode_state_read_once(struct inode *inode)
+{
+ return READ_ONCE(inode->i_state.__state);
+}
+
+static inline enum inode_state_flags_enum inode_state_read(struct inode *inode)
+{
+ lockdep_assert_held(&inode->i_lock);
+ return inode->i_state.__state;
+}
+
+static inline void inode_state_set_raw(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ WRITE_ONCE(inode->i_state.__state, inode->i_state.__state | flags);
+}
+
+static inline void inode_state_set(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_set_raw(inode, flags);
+}
+
+static inline void inode_state_clear_raw(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ WRITE_ONCE(inode->i_state.__state, inode->i_state.__state & ~flags);
+}
+
+static inline void inode_state_clear(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_clear_raw(inode, flags);
+}
+
+static inline void inode_state_assign_raw(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ WRITE_ONCE(inode->i_state.__state, flags);
+}
+
+static inline void inode_state_assign(struct inode *inode,
+ enum inode_state_flags_enum flags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_assign_raw(inode, flags);
+}
+
+static inline void inode_state_replace_raw(struct inode *inode,
+ enum inode_state_flags_enum clearflags,
+ enum inode_state_flags_enum setflags)
+{
+ enum inode_state_flags_enum flags;
+ flags = inode->i_state.__state;
+ flags &= ~clearflags;
+ flags |= setflags;
+ inode_state_assign_raw(inode, flags);
+}
+
+static inline void inode_state_replace(struct inode *inode,
+ enum inode_state_flags_enum clearflags,
+ enum inode_state_flags_enum setflags)
+{
+ lockdep_assert_held(&inode->i_lock);
+ inode_state_replace_raw(inode, clearflags, setflags);
+}
+
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
@@ -949,6 +994,8 @@ static inline void inode_fake_hash(struct inode *inode)
hlist_add_fake(&inode->i_hash);
}
+void wait_on_new_inode(struct inode *inode);
+
/*
* inode->i_rwsem nesting subclasses for the lock validator:
*
@@ -1348,49 +1395,6 @@ extern pid_t f_getown(struct file *filp);
extern int send_sigurg(struct file *file);
/*
- * sb->s_flags. Note that these mirror the equivalent MS_* flags where
- * represented in both.
- */
-#define SB_RDONLY BIT(0) /* Mount read-only */
-#define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */
-#define SB_NODEV BIT(2) /* Disallow access to device special files */
-#define SB_NOEXEC BIT(3) /* Disallow program execution */
-#define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */
-#define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */
-#define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */
-#define SB_NOATIME BIT(10) /* Do not update access times. */
-#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
-#define SB_SILENT BIT(15)
-#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */
-#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
-#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
-#define SB_I_VERSION BIT(23) /* Update inode I_version field */
-#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
-
-/* These sb flags are internal to the kernel */
-#define SB_DEAD BIT(21)
-#define SB_DYING BIT(24)
-#define SB_FORCE BIT(27)
-#define SB_NOSEC BIT(28)
-#define SB_BORN BIT(29)
-#define SB_ACTIVE BIT(30)
-#define SB_NOUSER BIT(31)
-
-/* These flags relate to encoding and casefolding */
-#define SB_ENC_STRICT_MODE_FL (1 << 0)
-#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1)
-
-#define sb_has_strict_encoding(sb) \
- (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
-
-#if IS_ENABLED(CONFIG_UNICODE)
-#define sb_no_casefold_compat_fallback(sb) \
- (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL)
-#else
-#define sb_no_casefold_compat_fallback(sb) (1)
-#endif
-
-/*
* Umount options
*/
@@ -1400,191 +1404,6 @@ extern int send_sigurg(struct file *file);
#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
-/* sb->s_iflags */
-#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
-#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
-#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */
-#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */
-
-/* sb->s_iflags to limit user namespace mounts */
-#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
-#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
-#define SB_I_UNTRUSTED_MOUNTER 0x00000040
-#define SB_I_EVM_HMAC_UNSUPPORTED 0x00000080
-
-#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
-#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
-#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
-#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
-#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
-#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
-#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */
-
-/* Possible states of 'frozen' field */
-enum {
- SB_UNFROZEN = 0, /* FS is unfrozen */
- SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
- SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
- SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop
- * internal threads if needed) */
- SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
-};
-
-#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
-
-struct sb_writers {
- unsigned short frozen; /* Is sb frozen? */
- int freeze_kcount; /* How many kernel freeze requests? */
- int freeze_ucount; /* How many userspace freeze requests? */
- const void *freeze_owner; /* Owner of the freeze */
- struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
-};
-
-struct mount;
-
-struct super_block {
- struct list_head s_list; /* Keep this first */
- dev_t s_dev; /* search index; _not_ kdev_t */
- unsigned char s_blocksize_bits;
- unsigned long s_blocksize;
- loff_t s_maxbytes; /* Max file size */
- struct file_system_type *s_type;
- const struct super_operations *s_op;
- const struct dquot_operations *dq_op;
- const struct quotactl_ops *s_qcop;
- const struct export_operations *s_export_op;
- unsigned long s_flags;
- unsigned long s_iflags; /* internal SB_I_* flags */
- unsigned long s_magic;
- struct dentry *s_root;
- struct rw_semaphore s_umount;
- int s_count;
- atomic_t s_active;
-#ifdef CONFIG_SECURITY
- void *s_security;
-#endif
- const struct xattr_handler * const *s_xattr;
-#ifdef CONFIG_FS_ENCRYPTION
- const struct fscrypt_operations *s_cop;
- struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
-#endif
-#ifdef CONFIG_FS_VERITY
- const struct fsverity_operations *s_vop;
-#endif
-#if IS_ENABLED(CONFIG_UNICODE)
- struct unicode_map *s_encoding;
- __u16 s_encoding_flags;
-#endif
- struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
- struct mount *s_mounts; /* list of mounts; _not_ for fs use */
- struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
- struct file *s_bdev_file;
- struct backing_dev_info *s_bdi;
- struct mtd_info *s_mtd;
- struct hlist_node s_instances;
- unsigned int s_quota_types; /* Bitmask of supported quota types */
- struct quota_info s_dquot; /* Diskquota specific options */
-
- struct sb_writers s_writers;
-
- /*
- * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
- * s_fsnotify_info together for cache efficiency. They are frequently
- * accessed and rarely modified.
- */
- void *s_fs_info; /* Filesystem private info */
-
- /* Granularity of c/m/atime in ns (cannot be worse than a second) */
- u32 s_time_gran;
- /* Time limits for c/m/atime in seconds */
- time64_t s_time_min;
- time64_t s_time_max;
-#ifdef CONFIG_FSNOTIFY
- u32 s_fsnotify_mask;
- struct fsnotify_sb_info *s_fsnotify_info;
-#endif
-
- /*
- * q: why are s_id and s_sysfs_name not the same? both are human
- * readable strings that identify the filesystem
- * a: s_id is allowed to change at runtime; it's used in log messages,
- * and we want to when a device starts out as single device (s_id is dev
- * name) but then a device is hot added and we have to switch to
- * identifying it by UUID
- * but s_sysfs_name is a handle for programmatic access, and can't
- * change at runtime
- */
- char s_id[32]; /* Informational name */
- uuid_t s_uuid; /* UUID */
- u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */
-
- /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */
- char s_sysfs_name[UUID_STRING_LEN + 1];
-
- unsigned int s_max_links;
- unsigned int s_d_flags; /* default d_flags for dentries */
-
- /*
- * The next field is for VFS *only*. No filesystems have any business
- * even looking at it. You had been warned.
- */
- struct mutex s_vfs_rename_mutex; /* Kludge */
-
- /*
- * Filesystem subtype. If non-empty the filesystem type field
- * in /proc/mounts will be "type.subtype"
- */
- const char *s_subtype;
-
- const struct dentry_operations *__s_d_op; /* default d_op for dentries */
-
- struct shrinker *s_shrink; /* per-sb shrinker handle */
-
- /* Number of inodes with nlink == 0 but still referenced */
- atomic_long_t s_remove_count;
-
- /* Read-only state of the superblock is being changed */
- int s_readonly_remount;
-
- /* per-sb errseq_t for reporting writeback errors via syncfs */
- errseq_t s_wb_err;
-
- /* AIO completions deferred from interrupt context */
- struct workqueue_struct *s_dio_done_wq;
- struct hlist_head s_pins;
-
- /*
- * Owning user namespace and default context in which to
- * interpret filesystem uids, gids, quotas, device nodes,
- * xattrs and security labels.
- */
- struct user_namespace *s_user_ns;
-
- /*
- * The list_lru structure is essentially just a pointer to a table
- * of per-node lru lists, each of which has its own spinlock.
- * There is no need to put them into separate cachelines.
- */
- struct list_lru s_dentry_lru;
- struct list_lru s_inode_lru;
- struct rcu_head rcu;
- struct work_struct destroy_work;
-
- struct mutex s_sync_lock; /* sync serialisation lock */
-
- /*
- * Indicates how deep in a filesystem stack this SB is
- */
- int s_stack_depth;
-
- /* s_inode_list_lock protects s_inodes */
- spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
- struct list_head s_inodes; /* all inodes */
-
- spinlock_t s_inode_wblist_lock;
- struct list_head s_inodes_wb; /* writeback inodes */
-} __randomize_layout;
-
static inline struct user_namespace *i_user_ns(const struct inode *inode)
{
return inode->i_sb->s_user_ns;
@@ -1902,66 +1721,6 @@ struct timespec64 simple_inode_init_ts(struct inode *inode);
* Snapshotting support.
*/
-/*
- * These are internal functions, please use sb_start_{write,pagefault,intwrite}
- * instead.
- */
-static inline void __sb_end_write(struct super_block *sb, int level)
-{
- percpu_up_read(sb->s_writers.rw_sem + level-1);
-}
-
-static inline void __sb_start_write(struct super_block *sb, int level)
-{
- percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true);
-}
-
-static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
-{
- return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
-}
-
-#define __sb_writers_acquired(sb, lev) \
- percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
-#define __sb_writers_release(sb, lev) \
- percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
-
-/**
- * __sb_write_started - check if sb freeze level is held
- * @sb: the super we write to
- * @level: the freeze level
- *
- * * > 0 - sb freeze level is held
- * * 0 - sb freeze level is not held
- * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
- */
-static inline int __sb_write_started(const struct super_block *sb, int level)
-{
- return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
-}
-
-/**
- * sb_write_started - check if SB_FREEZE_WRITE is held
- * @sb: the super we write to
- *
- * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
- */
-static inline bool sb_write_started(const struct super_block *sb)
-{
- return __sb_write_started(sb, SB_FREEZE_WRITE);
-}
-
-/**
- * sb_write_not_started - check if SB_FREEZE_WRITE is not held
- * @sb: the super we write to
- *
- * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
- */
-static inline bool sb_write_not_started(const struct super_block *sb)
-{
- return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
-}
-
/**
* file_write_started - check if SB_FREEZE_WRITE is held
* @file: the file we write to
@@ -1992,137 +1751,26 @@ static inline bool file_write_not_started(const struct file *file)
return sb_write_not_started(file_inode(file)->i_sb);
}
-/**
- * sb_end_write - drop write access to a superblock
- * @sb: the super we wrote to
- *
- * Decrement number of writers to the filesystem. Wake up possible waiters
- * wanting to freeze the filesystem.
- */
-static inline void sb_end_write(struct super_block *sb)
-{
- __sb_end_write(sb, SB_FREEZE_WRITE);
-}
-
-/**
- * sb_end_pagefault - drop write access to a superblock from a page fault
- * @sb: the super we wrote to
- *
- * Decrement number of processes handling write page fault to the filesystem.
- * Wake up possible waiters wanting to freeze the filesystem.
- */
-static inline void sb_end_pagefault(struct super_block *sb)
-{
- __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
-}
-
-/**
- * sb_end_intwrite - drop write access to a superblock for internal fs purposes
- * @sb: the super we wrote to
- *
- * Decrement fs-internal number of writers to the filesystem. Wake up possible
- * waiters wanting to freeze the filesystem.
- */
-static inline void sb_end_intwrite(struct super_block *sb)
-{
- __sb_end_write(sb, SB_FREEZE_FS);
-}
-
-/**
- * sb_start_write - get write access to a superblock
- * @sb: the super we write to
- *
- * When a process wants to write data or metadata to a file system (i.e. dirty
- * a page or an inode), it should embed the operation in a sb_start_write() -
- * sb_end_write() pair to get exclusion against file system freezing. This
- * function increments number of writers preventing freezing. If the file
- * system is already frozen, the function waits until the file system is
- * thawed.
- *
- * Since freeze protection behaves as a lock, users have to preserve
- * ordering of freeze protection and other filesystem locks. Generally,
- * freeze protection should be the outermost lock. In particular, we have:
- *
- * sb_start_write
- * -> i_rwsem (write path, truncate, directory ops, ...)
- * -> s_umount (freeze_super, thaw_super)
- */
-static inline void sb_start_write(struct super_block *sb)
-{
- __sb_start_write(sb, SB_FREEZE_WRITE);
-}
-
-static inline bool sb_start_write_trylock(struct super_block *sb)
-{
- return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
-}
-
-/**
- * sb_start_pagefault - get write access to a superblock from a page fault
- * @sb: the super we write to
- *
- * When a process starts handling write page fault, it should embed the
- * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
- * exclusion against file system freezing. This is needed since the page fault
- * is going to dirty a page. This function increments number of running page
- * faults preventing freezing. If the file system is already frozen, the
- * function waits until the file system is thawed.
- *
- * Since page fault freeze protection behaves as a lock, users have to preserve
- * ordering of freeze protection and other filesystem locks. It is advised to
- * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
- * handling code implies lock dependency:
- *
- * mmap_lock
- * -> sb_start_pagefault
- */
-static inline void sb_start_pagefault(struct super_block *sb)
-{
- __sb_start_write(sb, SB_FREEZE_PAGEFAULT);
-}
-
-/**
- * sb_start_intwrite - get write access to a superblock for internal fs purposes
- * @sb: the super we write to
- *
- * This is the third level of protection against filesystem freezing. It is
- * free for use by a filesystem. The only requirement is that it must rank
- * below sb_start_pagefault.
- *
- * For example filesystem can call sb_start_intwrite() when starting a
- * transaction which somewhat eases handling of freezing for internal sources
- * of filesystem changes (internal fs threads, discarding preallocation on file
- * close, etc.).
- */
-static inline void sb_start_intwrite(struct super_block *sb)
-{
- __sb_start_write(sb, SB_FREEZE_FS);
-}
-
-static inline bool sb_start_intwrite_trylock(struct super_block *sb)
-{
- return __sb_start_write_trylock(sb, SB_FREEZE_FS);
-}
-
bool inode_owner_or_capable(struct mnt_idmap *idmap,
const struct inode *inode);
/*
* VFS helper functions..
*/
-int vfs_create(struct mnt_idmap *, struct inode *,
- struct dentry *, umode_t, bool);
+int vfs_create(struct mnt_idmap *, struct dentry *, umode_t,
+ struct delegated_inode *);
struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
- struct dentry *, umode_t);
+ struct dentry *, umode_t, struct delegated_inode *);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
- umode_t, dev_t);
+ umode_t, dev_t, struct delegated_inode *);
int vfs_symlink(struct mnt_idmap *, struct inode *,
- struct dentry *, const char *);
+ struct dentry *, const char *, struct delegated_inode *);
int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
- struct dentry *, struct inode **);
-int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *);
+ struct dentry *, struct delegated_inode *);
+int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *,
+ struct delegated_inode *);
int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
- struct inode **);
+ struct delegated_inode *);
/**
* struct renamedata - contains all information required for renaming
@@ -2140,7 +1788,7 @@ struct renamedata {
struct dentry *old_dentry;
struct dentry *new_parent;
struct dentry *new_dentry;
- struct inode **delegated_inode;
+ struct delegated_inode *delegated_inode;
unsigned int flags;
} __randomize_layout;
@@ -2150,7 +1798,7 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry)
{
return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
- WHITEOUT_DEV);
+ WHITEOUT_DEV, NULL);
}
struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
@@ -2431,72 +2079,6 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
-/**
- * enum freeze_holder - holder of the freeze
- * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
- * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
- * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
- * @FREEZE_EXCL: a freeze that can only be undone by the owner
- *
- * Indicate who the owner of the freeze or thaw request is and whether
- * the freeze needs to be exclusive or can nest.
- * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
- * same holder aren't allowed. It is however allowed to hold a single
- * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
- * the same time. This is relied upon by some filesystems during online
- * repair or similar.
- */
-enum freeze_holder {
- FREEZE_HOLDER_KERNEL = (1U << 0),
- FREEZE_HOLDER_USERSPACE = (1U << 1),
- FREEZE_MAY_NEST = (1U << 2),
- FREEZE_EXCL = (1U << 3),
-};
-
-struct super_operations {
- struct inode *(*alloc_inode)(struct super_block *sb);
- void (*destroy_inode)(struct inode *);
- void (*free_inode)(struct inode *);
-
- void (*dirty_inode) (struct inode *, int flags);
- int (*write_inode) (struct inode *, struct writeback_control *wbc);
- int (*drop_inode) (struct inode *);
- void (*evict_inode) (struct inode *);
- void (*put_super) (struct super_block *);
- int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner);
- int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner);
- int (*unfreeze_fs) (struct super_block *);
- int (*statfs) (struct dentry *, struct kstatfs *);
- int (*remount_fs) (struct super_block *, int *, char *);
- void (*umount_begin) (struct super_block *);
-
- int (*show_options)(struct seq_file *, struct dentry *);
- int (*show_devname)(struct seq_file *, struct dentry *);
- int (*show_path)(struct seq_file *, struct dentry *);
- int (*show_stats)(struct seq_file *, struct dentry *);
-#ifdef CONFIG_QUOTA
- ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
- ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
- struct dquot __rcu **(*get_dquots)(struct inode *);
-#endif
- long (*nr_cached_objects)(struct super_block *,
- struct shrink_control *);
- long (*free_cached_objects)(struct super_block *,
- struct shrink_control *);
- /*
- * If a filesystem can support graceful removal of a device and
- * continue read-write operations, implement this callback.
- *
- * Return 0 if the filesystem can continue read-write.
- * Non-zero return value or no such callback means the fs will be shutdown
- * as usual.
- */
- int (*remove_bdev)(struct super_block *sb, struct block_device *bdev);
- void (*shutdown)(struct super_block *sb);
-};
-
/*
* Inode flags - they have no relation to superblock flags now
*/
@@ -2539,7 +2121,6 @@ struct super_operations {
*/
#define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg))
-static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; }
#define IS_RDONLY(inode) sb_rdonly((inode)->i_sb)
#define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \
((inode)->i_flags & S_SYNC))
@@ -2635,8 +2216,8 @@ static inline int icount_read(const struct inode *inode)
*/
static inline bool inode_is_dirtytime_only(struct inode *inode)
{
- return (inode->i_state & (I_DIRTY_TIME | I_NEW |
- I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
+ return (inode_state_read_once(inode) &
+ (I_DIRTY_TIME | I_NEW | I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
}
extern void inc_nlink(struct inode *inode);
@@ -2689,6 +2270,7 @@ struct file_system_type {
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_LBS 128 /* FS supports LBS */
+#define FS_POWER_FREEZE 256 /* Always freeze on suspend/hibernate */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2773,10 +2355,6 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-int freeze_super(struct super_block *super, enum freeze_holder who,
- const void *freeze_owner);
-int thaw_super(struct super_block *super, enum freeze_holder who,
- const void *freeze_owner);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
@@ -2819,10 +2397,9 @@ static inline void super_set_sysfs_name_generic(struct super_block *sb, const ch
va_end(args);
}
-extern int current_umask(void);
-
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
+void iput_not_last(struct inode *);
int inode_update_timestamps(struct inode *inode, int flags);
int generic_update_time(struct inode *, int);
@@ -2963,12 +2540,6 @@ extern struct kmem_cache *names_cachep;
#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
-extern struct super_block *blockdev_superblock;
-static inline bool sb_is_blkdev_sb(struct super_block *sb)
-{
- return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
-}
-
void emergency_thaw_all(void);
extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
@@ -3014,7 +2585,7 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
extern int __must_check file_check_and_advance_wb_err(struct file *file);
extern int __must_check file_write_and_wait_range(struct file *file,
loff_t start, loff_t end);
-int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start,
+int filemap_flush_range(struct address_space *mapping, loff_t start,
loff_t end);
static inline int file_write_and_wait(struct file *file)
@@ -3051,8 +2622,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
} else if (iocb->ki_flags & IOCB_DONTCACHE) {
struct address_space *mapping = iocb->ki_filp->f_mapping;
- filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count,
- iocb->ki_pos - 1);
+ filemap_flush_range(mapping, iocb->ki_pos - count,
+ iocb->ki_pos - 1);
}
return count;
@@ -3071,7 +2642,7 @@ static inline int bmap(struct inode *inode, sector_t *block)
#endif
int notify_change(struct mnt_idmap *, struct dentry *,
- struct iattr *, struct inode **);
+ struct iattr *, struct delegated_inode *);
int inode_permission(struct mnt_idmap *, struct inode *, int);
int generic_permission(struct mnt_idmap *, struct inode *, int);
static inline int file_permission(struct file *file, int mask)
@@ -3101,7 +2672,7 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
* file_start_write - get write access to a superblock for regular file io
* @file: the file we want to write to
*
- * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * This is a variant of sb_start_write() which is a noop on non-regular file.
* Should be matched with a call to file_end_write().
*/
static inline void file_start_write(struct file *file)
@@ -3269,6 +2840,7 @@ extern struct file * open_exec(const char *);
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
extern char *file_path(struct file *, char *, int);
@@ -3326,7 +2898,7 @@ extern void d_mark_dontcache(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
- void *data);
+ void *data, bool *isnew);
extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data);
extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
@@ -3378,11 +2950,9 @@ static inline bool is_zero_ino(ino_t ino)
return (u32)ino == 0;
}
-/*
- * inode->i_lock must be held
- */
static inline void __iget(struct inode *inode)
{
+ lockdep_assert_held(&inode->i_lock);
atomic_inc(&inode->i_count);
}
@@ -3421,10 +2991,7 @@ static inline void remove_inode_hash(struct inode *inode)
}
extern void inode_sb_list_add(struct inode *inode);
-extern void inode_add_lru(struct inode *inode);
-
-extern int sb_set_blocksize(struct super_block *, int);
-extern int sb_min_blocksize(struct super_block *, int);
+extern void inode_lru_list_add(struct inode *inode);
int generic_file_mmap(struct file *, struct vm_area_struct *);
int generic_file_mmap_prepare(struct vm_area_desc *desc);
@@ -3606,9 +3173,11 @@ extern void drop_super_exclusive(struct super_block *sb);
extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg);
extern void iterate_supers_type(struct file_system_type *,
void (*)(struct super_block *, void *), void *);
-void filesystems_freeze(void);
+void filesystems_freeze(bool freeze_all);
void filesystems_thaw(void);
+void end_dirop(struct dentry *de);
+
extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
@@ -3745,38 +3314,6 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir,
}
#endif
-static inline struct unicode_map *sb_encoding(const struct super_block *sb)
-{
-#if IS_ENABLED(CONFIG_UNICODE)
- return sb->s_encoding;
-#else
- return NULL;
-#endif
-}
-
-static inline bool sb_has_encoding(const struct super_block *sb)
-{
- return !!sb_encoding(sb);
-}
-
-/*
- * Compare if two super blocks have the same encoding and flags
- */
-static inline bool sb_same_encoding(const struct super_block *sb1,
- const struct super_block *sb2)
-{
-#if IS_ENABLED(CONFIG_UNICODE)
- if (sb1->s_encoding == sb2->s_encoding)
- return true;
-
- return (sb1->s_encoding && sb2->s_encoding &&
- (sb1->s_encoding->version == sb2->s_encoding->version) &&
- (sb1->s_encoding_flags == sb2->s_encoding_flags));
-#else
- return true;
-#endif
-}
-
int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
unsigned int ia_valid);
int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
diff --git a/include/linux/fs/super.h b/include/linux/fs/super.h
new file mode 100644
index 000000000000..f21ffbb6dea5
--- /dev/null
+++ b/include/linux/fs/super.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FS_SUPER_H
+#define _LINUX_FS_SUPER_H
+
+#include <linux/fs/super_types.h>
+#include <linux/unicode.h>
+
+/*
+ * These are internal functions, please use sb_start_{write,pagefault,intwrite}
+ * instead.
+ */
+static inline void __sb_end_write(struct super_block *sb, int level)
+{
+ percpu_up_read(sb->s_writers.rw_sem + level - 1);
+}
+
+static inline void __sb_start_write(struct super_block *sb, int level)
+{
+ percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true);
+}
+
+static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
+{
+ return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
+}
+
+#define __sb_writers_acquired(sb, lev) \
+ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev) - 1], 1, _THIS_IP_)
+#define __sb_writers_release(sb, lev) \
+ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev) - 1], _THIS_IP_)
+
+/**
+ * __sb_write_started - check if sb freeze level is held
+ * @sb: the super we write to
+ * @level: the freeze level
+ *
+ * * > 0 - sb freeze level is held
+ * * 0 - sb freeze level is not held
+ * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
+ */
+static inline int __sb_write_started(const struct super_block *sb, int level)
+{
+ return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
+}
+
+/**
+ * sb_write_started - check if SB_FREEZE_WRITE is held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
+static inline bool sb_write_started(const struct super_block *sb)
+{
+ return __sb_write_started(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
+static inline bool sb_write_not_started(const struct super_block *sb)
+{
+ return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
+}
+
+/**
+ * sb_end_write - drop write access to a superblock
+ * @sb: the super we wrote to
+ *
+ * Decrement number of writers to the filesystem. Wake up possible waiters
+ * wanting to freeze the filesystem.
+ */
+static inline void sb_end_write(struct super_block *sb)
+{
+ __sb_end_write(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_end_pagefault - drop write access to a superblock from a page fault
+ * @sb: the super we wrote to
+ *
+ * Decrement number of processes handling write page fault to the filesystem.
+ * Wake up possible waiters wanting to freeze the filesystem.
+ */
+static inline void sb_end_pagefault(struct super_block *sb)
+{
+ __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
+}
+
+/**
+ * sb_end_intwrite - drop write access to a superblock for internal fs purposes
+ * @sb: the super we wrote to
+ *
+ * Decrement fs-internal number of writers to the filesystem. Wake up possible
+ * waiters wanting to freeze the filesystem.
+ */
+static inline void sb_end_intwrite(struct super_block *sb)
+{
+ __sb_end_write(sb, SB_FREEZE_FS);
+}
+
+/**
+ * sb_start_write - get write access to a superblock
+ * @sb: the super we write to
+ *
+ * When a process wants to write data or metadata to a file system (i.e. dirty
+ * a page or an inode), it should embed the operation in a sb_start_write() -
+ * sb_end_write() pair to get exclusion against file system freezing. This
+ * function increments number of writers preventing freezing. If the file
+ * system is already frozen, the function waits until the file system is
+ * thawed.
+ *
+ * Since freeze protection behaves as a lock, users have to preserve
+ * ordering of freeze protection and other filesystem locks. Generally,
+ * freeze protection should be the outermost lock. In particular, we have:
+ *
+ * sb_start_write
+ * -> i_rwsem (write path, truncate, directory ops, ...)
+ * -> s_umount (freeze_super, thaw_super)
+ */
+static inline void sb_start_write(struct super_block *sb)
+{
+ __sb_start_write(sb, SB_FREEZE_WRITE);
+}
+
+DEFINE_GUARD(super_write,
+ struct super_block *,
+ sb_start_write(_T),
+ sb_end_write(_T))
+
+static inline bool sb_start_write_trylock(struct super_block *sb)
+{
+ return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_start_pagefault - get write access to a superblock from a page fault
+ * @sb: the super we write to
+ *
+ * When a process starts handling write page fault, it should embed the
+ * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
+ * exclusion against file system freezing. This is needed since the page fault
+ * is going to dirty a page. This function increments number of running page
+ * faults preventing freezing. If the file system is already frozen, the
+ * function waits until the file system is thawed.
+ *
+ * Since page fault freeze protection behaves as a lock, users have to preserve
+ * ordering of freeze protection and other filesystem locks. It is advised to
+ * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
+ * handling code implies lock dependency:
+ *
+ * mmap_lock
+ * -> sb_start_pagefault
+ */
+static inline void sb_start_pagefault(struct super_block *sb)
+{
+ __sb_start_write(sb, SB_FREEZE_PAGEFAULT);
+}
+
+/**
+ * sb_start_intwrite - get write access to a superblock for internal fs purposes
+ * @sb: the super we write to
+ *
+ * This is the third level of protection against filesystem freezing. It is
+ * free for use by a filesystem. The only requirement is that it must rank
+ * below sb_start_pagefault.
+ *
+ * For example filesystem can call sb_start_intwrite() when starting a
+ * transaction which somewhat eases handling of freezing for internal sources
+ * of filesystem changes (internal fs threads, discarding preallocation on file
+ * close, etc.).
+ */
+static inline void sb_start_intwrite(struct super_block *sb)
+{
+ __sb_start_write(sb, SB_FREEZE_FS);
+}
+
+static inline bool sb_start_intwrite_trylock(struct super_block *sb)
+{
+ return __sb_start_write_trylock(sb, SB_FREEZE_FS);
+}
+
+static inline bool sb_rdonly(const struct super_block *sb)
+{
+ return sb->s_flags & SB_RDONLY;
+}
+
+static inline bool sb_is_blkdev_sb(struct super_block *sb)
+{
+ return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
+}
+
+#if IS_ENABLED(CONFIG_UNICODE)
+static inline struct unicode_map *sb_encoding(const struct super_block *sb)
+{
+ return sb->s_encoding;
+}
+
+/* Compare if two super blocks have the same encoding and flags */
+static inline bool sb_same_encoding(const struct super_block *sb1,
+ const struct super_block *sb2)
+{
+ if (sb1->s_encoding == sb2->s_encoding)
+ return true;
+
+ return (sb1->s_encoding && sb2->s_encoding &&
+ (sb1->s_encoding->version == sb2->s_encoding->version) &&
+ (sb1->s_encoding_flags == sb2->s_encoding_flags));
+}
+#else
+static inline struct unicode_map *sb_encoding(const struct super_block *sb)
+{
+ return NULL;
+}
+
+static inline bool sb_same_encoding(const struct super_block *sb1,
+ const struct super_block *sb2)
+{
+ return true;
+}
+#endif
+
+static inline bool sb_has_encoding(const struct super_block *sb)
+{
+ return !!sb_encoding(sb);
+}
+
+int sb_set_blocksize(struct super_block *sb, int size);
+int __must_check sb_min_blocksize(struct super_block *sb, int size);
+
+int freeze_super(struct super_block *super, enum freeze_holder who,
+ const void *freeze_owner);
+int thaw_super(struct super_block *super, enum freeze_holder who,
+ const void *freeze_owner);
+
+#endif /* _LINUX_FS_SUPER_H */
diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h
new file mode 100644
index 000000000000..6bd3009e09b3
--- /dev/null
+++ b/include/linux/fs/super_types.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FS_SUPER_TYPES_H
+#define _LINUX_FS_SUPER_TYPES_H
+
+#include <linux/fs_dirent.h>
+#include <linux/errseq.h>
+#include <linux/list_lru.h>
+#include <linux/list.h>
+#include <linux/list_bl.h>
+#include <linux/llist.h>
+#include <linux/uidgid.h>
+#include <linux/uuid.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/workqueue_types.h>
+#include <linux/quota.h>
+
+struct backing_dev_info;
+struct block_device;
+struct dentry;
+struct dentry_operations;
+struct dquot_operations;
+struct export_operations;
+struct file;
+struct file_system_type;
+struct fscrypt_operations;
+struct fsnotify_sb_info;
+struct fsverity_operations;
+struct kstatfs;
+struct mount;
+struct mtd_info;
+struct quotactl_ops;
+struct shrinker;
+struct unicode_map;
+struct user_namespace;
+struct workqueue_struct;
+struct writeback_control;
+struct xattr_handler;
+
+extern struct super_block *blockdev_superblock;
+
+/* Possible states of 'frozen' field */
+enum {
+ SB_UNFROZEN = 0, /* FS is unfrozen */
+ SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
+ SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
+ SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop internal threads if needed) */
+ SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
+};
+
+#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
+
+struct sb_writers {
+ unsigned short frozen; /* Is sb frozen? */
+ int freeze_kcount; /* How many kernel freeze requests? */
+ int freeze_ucount; /* How many userspace freeze requests? */
+ const void *freeze_owner; /* Owner of the freeze */
+ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
+};
+
+/**
+ * enum freeze_holder - holder of the freeze
+ * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
+ * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
+ * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
+ * @FREEZE_EXCL: a freeze that can only be undone by the owner
+ *
+ * Indicate who the owner of the freeze or thaw request is and whether
+ * the freeze needs to be exclusive or can nest.
+ * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
+ * same holder aren't allowed. It is however allowed to hold a single
+ * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
+ * the same time. This is relied upon by some filesystems during online
+ * repair or similar.
+ */
+enum freeze_holder {
+ FREEZE_HOLDER_KERNEL = (1U << 0),
+ FREEZE_HOLDER_USERSPACE = (1U << 1),
+ FREEZE_MAY_NEST = (1U << 2),
+ FREEZE_EXCL = (1U << 3),
+};
+
+struct super_operations {
+ struct inode *(*alloc_inode)(struct super_block *sb);
+ void (*destroy_inode)(struct inode *inode);
+ void (*free_inode)(struct inode *inode);
+ void (*dirty_inode)(struct inode *inode, int flags);
+ int (*write_inode)(struct inode *inode, struct writeback_control *wbc);
+ int (*drop_inode)(struct inode *inode);
+ void (*evict_inode)(struct inode *inode);
+ void (*put_super)(struct super_block *sb);
+ int (*sync_fs)(struct super_block *sb, int wait);
+ int (*freeze_super)(struct super_block *sb, enum freeze_holder who,
+ const void *owner);
+ int (*freeze_fs)(struct super_block *sb);
+ int (*thaw_super)(struct super_block *sb, enum freeze_holder who,
+ const void *owner);
+ int (*unfreeze_fs)(struct super_block *sb);
+ int (*statfs)(struct dentry *dentry, struct kstatfs *kstatfs);
+ int (*remount_fs) (struct super_block *, int *, char *);
+ void (*umount_begin)(struct super_block *sb);
+
+ int (*show_options)(struct seq_file *seq, struct dentry *dentry);
+ int (*show_devname)(struct seq_file *seq, struct dentry *dentry);
+ int (*show_path)(struct seq_file *seq, struct dentry *dentry);
+ int (*show_stats)(struct seq_file *seq, struct dentry *dentry);
+#ifdef CONFIG_QUOTA
+ ssize_t (*quota_read)(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off);
+ ssize_t (*quota_write)(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off);
+ struct dquot __rcu **(*get_dquots)(struct inode *inode);
+#endif
+ long (*nr_cached_objects)(struct super_block *sb,
+ struct shrink_control *sc);
+ long (*free_cached_objects)(struct super_block *sb,
+ struct shrink_control *sc);
+ /*
+ * If a filesystem can support graceful removal of a device and
+ * continue read-write operations, implement this callback.
+ *
+ * Return 0 if the filesystem can continue read-write.
+ * Non-zero return value or no such callback means the fs will be shutdown
+ * as usual.
+ */
+ int (*remove_bdev)(struct super_block *sb, struct block_device *bdev);
+ void (*shutdown)(struct super_block *sb);
+};
+
+struct super_block {
+ struct list_head s_list; /* Keep this first */
+ dev_t s_dev; /* search index; _not_ kdev_t */
+ unsigned char s_blocksize_bits;
+ unsigned long s_blocksize;
+ loff_t s_maxbytes; /* Max file size */
+ struct file_system_type *s_type;
+ const struct super_operations *s_op;
+ const struct dquot_operations *dq_op;
+ const struct quotactl_ops *s_qcop;
+ const struct export_operations *s_export_op;
+ unsigned long s_flags;
+ unsigned long s_iflags; /* internal SB_I_* flags */
+ unsigned long s_magic;
+ struct dentry *s_root;
+ struct rw_semaphore s_umount;
+ int s_count;
+ atomic_t s_active;
+#ifdef CONFIG_SECURITY
+ void *s_security;
+#endif
+ const struct xattr_handler *const *s_xattr;
+#ifdef CONFIG_FS_ENCRYPTION
+ const struct fscrypt_operations *s_cop;
+ struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
+#endif
+#ifdef CONFIG_FS_VERITY
+ const struct fsverity_operations *s_vop;
+#endif
+#if IS_ENABLED(CONFIG_UNICODE)
+ struct unicode_map *s_encoding;
+ __u16 s_encoding_flags;
+#endif
+ struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
+ struct mount *s_mounts; /* list of mounts; _not_ for fs use */
+ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
+ struct file *s_bdev_file;
+ struct backing_dev_info *s_bdi;
+ struct mtd_info *s_mtd;
+ struct hlist_node s_instances;
+ unsigned int s_quota_types; /* Bitmask of supported quota types */
+ struct quota_info s_dquot; /* Diskquota specific options */
+
+ struct sb_writers s_writers;
+
+ /*
+ * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
+ * s_fsnotify_info together for cache efficiency. They are frequently
+ * accessed and rarely modified.
+ */
+ void *s_fs_info; /* Filesystem private info */
+
+ /* Granularity of c/m/atime in ns (cannot be worse than a second) */
+ u32 s_time_gran;
+ /* Time limits for c/m/atime in seconds */
+ time64_t s_time_min;
+ time64_t s_time_max;
+#ifdef CONFIG_FSNOTIFY
+ u32 s_fsnotify_mask;
+ struct fsnotify_sb_info *s_fsnotify_info;
+#endif
+
+ /*
+ * q: why are s_id and s_sysfs_name not the same? both are human
+ * readable strings that identify the filesystem
+ * a: s_id is allowed to change at runtime; it's used in log messages,
+ * and we want to when a device starts out as single device (s_id is dev
+ * name) but then a device is hot added and we have to switch to
+ * identifying it by UUID
+ * but s_sysfs_name is a handle for programmatic access, and can't
+ * change at runtime
+ */
+ char s_id[32]; /* Informational name */
+ uuid_t s_uuid; /* UUID */
+ u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */
+
+ /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */
+ char s_sysfs_name[UUID_STRING_LEN + 1];
+
+ unsigned int s_max_links;
+ unsigned int s_d_flags; /* default d_flags for dentries */
+
+ /*
+ * The next field is for VFS *only*. No filesystems have any business
+ * even looking at it. You had been warned.
+ */
+ struct mutex s_vfs_rename_mutex; /* Kludge */
+
+ /*
+ * Filesystem subtype. If non-empty the filesystem type field
+ * in /proc/mounts will be "type.subtype"
+ */
+ const char *s_subtype;
+
+ const struct dentry_operations *__s_d_op; /* default d_op for dentries */
+
+ struct shrinker *s_shrink; /* per-sb shrinker handle */
+
+ /* Number of inodes with nlink == 0 but still referenced */
+ atomic_long_t s_remove_count;
+
+ /* Read-only state of the superblock is being changed */
+ int s_readonly_remount;
+
+ /* per-sb errseq_t for reporting writeback errors via syncfs */
+ errseq_t s_wb_err;
+
+ /* AIO completions deferred from interrupt context */
+ struct workqueue_struct *s_dio_done_wq;
+ struct hlist_head s_pins;
+
+ /*
+ * Owning user namespace and default context in which to
+ * interpret filesystem uids, gids, quotas, device nodes,
+ * xattrs and security labels.
+ */
+ struct user_namespace *s_user_ns;
+
+ /*
+ * The list_lru structure is essentially just a pointer to a table
+ * of per-node lru lists, each of which has its own spinlock.
+ * There is no need to put them into separate cachelines.
+ */
+ struct list_lru s_dentry_lru;
+ struct list_lru s_inode_lru;
+ struct rcu_head rcu;
+ struct work_struct destroy_work;
+
+ struct mutex s_sync_lock; /* sync serialisation lock */
+
+ /*
+ * Indicates how deep in a filesystem stack this SB is
+ */
+ int s_stack_depth;
+
+ /* s_inode_list_lock protects s_inodes */
+ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
+ struct list_head s_inodes; /* all inodes */
+
+ spinlock_t s_inode_wblist_lock;
+ struct list_head s_inodes_wb; /* writeback inodes */
+ long s_min_writeback_pages;
+} __randomize_layout;
+
+/*
+ * sb->s_flags. Note that these mirror the equivalent MS_* flags where
+ * represented in both.
+ */
+#define SB_RDONLY BIT(0) /* Mount read-only */
+#define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */
+#define SB_NODEV BIT(2) /* Disallow access to device special files */
+#define SB_NOEXEC BIT(3) /* Disallow program execution */
+#define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */
+#define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */
+#define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */
+#define SB_NOATIME BIT(10) /* Do not update access times. */
+#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
+#define SB_SILENT BIT(15)
+#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */
+#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
+#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
+#define SB_I_VERSION BIT(23) /* Update inode I_version field */
+#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define SB_DEAD BIT(21)
+#define SB_DYING BIT(24)
+#define SB_FORCE BIT(27)
+#define SB_NOSEC BIT(28)
+#define SB_BORN BIT(29)
+#define SB_ACTIVE BIT(30)
+#define SB_NOUSER BIT(31)
+
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL (1 << 0)
+#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1)
+
+#define sb_has_strict_encoding(sb) \
+ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
+#if IS_ENABLED(CONFIG_UNICODE)
+#define sb_no_casefold_compat_fallback(sb) \
+ (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL)
+#else
+#define sb_no_casefold_compat_fallback(sb) (1)
+#endif
+
+/* sb->s_iflags */
+#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
+#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
+#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */
+#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */
+
+/* sb->s_iflags to limit user namespace mounts */
+#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
+#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
+#define SB_I_UNTRUSTED_MOUNTER 0x00000040
+#define SB_I_EVM_HMAC_UNSUPPORTED 0x00000080
+
+#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
+#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
+#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
+#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
+#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
+#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
+#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */
+
+#endif /* _LINUX_FS_SUPER_TYPES_H */
diff --git a/include/linux/fs_types.h b/include/linux/fs_dirent.h
index 54816791196f..92f75c5bac19 100644
--- a/include/linux/fs_types.h
+++ b/include/linux/fs_dirent.h
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_FS_TYPES_H
-#define _LINUX_FS_TYPES_H
+#ifndef _LINUX_FS_DIRENT_H
+#define _LINUX_FS_DIRENT_H
+
+#include <linux/stat.h>
+#include <linux/types.h>
/*
* This is a header for the common implementation of dirent
@@ -66,10 +69,10 @@
/*
* declarations for helper functions, accompanying implementation
- * is in fs/fs_types.c
+ * is in fs/fs_dirent.c
*/
extern unsigned char fs_ftype_to_dtype(unsigned int filetype);
extern unsigned char fs_umode_to_ftype(umode_t mode);
extern unsigned char fs_umode_to_dtype(umode_t mode);
-#endif
+#endif /* _LINUX_FS_DIRENT_H */
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index baf200ab5c77..0070764b790a 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FS_STRUCT_H
#define _LINUX_FS_STRUCT_H
+#include <linux/sched.h>
#include <linux/path.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>
@@ -41,4 +42,9 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
extern bool current_chrooted(void);
+static inline int current_umask(void)
+{
+ return current->fs->umask;
+}
+
#endif /* _LINUX_FS_STRUCT_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7ded7df6e9b5..07f8c309e432 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -193,6 +193,10 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS)
+#ifndef arch_ftrace_partial_regs
+#define arch_ftrace_partial_regs(regs) do {} while (0)
+#endif
+
static __always_inline struct pt_regs *
ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
{
@@ -202,7 +206,11 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
* Since arch_ftrace_get_regs() will check some members and may return
* NULL, we can not use it.
*/
- return &arch_ftrace_regs(fregs)->regs;
+ regs = &arch_ftrace_regs(fregs)->regs;
+
+ /* Allow arch specific updates to regs. */
+ arch_ftrace_partial_regs(regs);
+ return regs;
}
#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0ceb4e09306c..623bee335383 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -7,6 +7,7 @@
#include <linux/mmzone.h>
#include <linux/topology.h>
#include <linux/alloc_tag.h>
+#include <linux/cleanup.h>
#include <linux/sched.h>
struct vm_area_struct;
@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
/* This should be paired with folio_put() rather than free_contig_range(). */
#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
+
#endif /* __LINUX_GFP_H */
diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h
index 622a2939ebe0..87983a5f3681 100644
--- a/include/linux/gpio/regmap.h
+++ b/include/linux/gpio/regmap.h
@@ -38,6 +38,10 @@ struct regmap;
* offset to a register/bitmask pair. If not
* given the default gpio_regmap_simple_xlate()
* is used.
+ * @fixed_direction_output:
+ * (Optional) Bitmap representing the fixed direction of
+ * the GPIO lines. Useful when there are GPIO lines with a
+ * fixed direction mixed together in the same register.
* @drvdata: (Optional) Pointer to driver specific data which is
* not used by gpio-remap but is provided "as is" to the
* driver callback(s).
@@ -85,6 +89,7 @@ struct gpio_regmap_config {
int reg_stride;
int ngpio_per_reg;
struct irq_domain *irq_domain;
+ unsigned long *fixed_direction_output;
#ifdef CONFIG_REGMAP_IRQ
struct regmap_irq_chip *regmap_irq_chip;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 105cc4c00cc3..abc20f9810fd 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -249,10 +249,12 @@ static inline void clear_highpage_kasan_tagged(struct page *page)
kunmap_local(kaddr);
}
-#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
+#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
-static inline void tag_clear_highpage(struct page *page)
+/* Return false to let people know we did not initialize the pages */
+static inline bool tag_clear_highpages(struct page *page, int numpages)
{
+ return false;
}
#endif
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f327d62fc985..71ac78b9f834 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -376,45 +376,30 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
struct list_head *list);
/*
- * try_folio_split - try to split a @folio at @page using non uniform split.
+ * try_folio_split_to_order - try to split a @folio at @page to @new_order using
+ * non uniform split.
* @folio: folio to be split
- * @page: split to order-0 at the given page
- * @list: store the after-split folios
+ * @page: split to @new_order at the given page
+ * @new_order: the target split order
*
- * Try to split a @folio at @page using non uniform split to order-0, if
- * non uniform split is not supported, fall back to uniform split.
+ * Try to split a @folio at @page using non uniform split to @new_order, if
+ * non uniform split is not supported, fall back to uniform split. After-split
+ * folios are put back to LRU list. Use min_order_for_split() to get the lower
+ * bound of @new_order.
*
* Return: 0: split is successful, otherwise split failed.
*/
-static inline int try_folio_split(struct folio *folio, struct page *page,
- struct list_head *list)
+static inline int try_folio_split_to_order(struct folio *folio,
+ struct page *page, unsigned int new_order)
{
- int ret = min_order_for_split(folio);
-
- if (ret < 0)
- return ret;
-
- if (!non_uniform_split_supported(folio, 0, false))
- return split_huge_page_to_list_to_order(&folio->page, list,
- ret);
- return folio_split(folio, ret, page, list);
+ if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
+ return split_huge_page_to_list_to_order(&folio->page, NULL,
+ new_order);
+ return folio_split(folio, new_order, page, NULL);
}
static inline int split_huge_page(struct page *page)
{
- struct folio *folio = page_folio(page);
- int ret = min_order_for_split(folio);
-
- if (ret < 0)
- return ret;
-
- /*
- * split_huge_page() locks the page before splitting and
- * expects the same page that has been split to be locked when
- * returned. split_folio(page_folio(page)) cannot be used here
- * because it converts the page to folio and passes the head
- * page to be split.
- */
- return split_huge_page_to_list_to_order(page, NULL, ret);
+ return split_huge_page_to_list_to_order(page, NULL, 0);
}
void deferred_split_folio(struct folio *folio, bool partially_mapped);
@@ -597,14 +582,20 @@ static inline int split_huge_page(struct page *page)
return -EINVAL;
}
+static inline int min_order_for_split(struct folio *folio)
+{
+ VM_WARN_ON_ONCE_FOLIO(1, folio);
+ return -EINVAL;
+}
+
static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
{
VM_WARN_ON_ONCE_FOLIO(1, folio);
return -EINVAL;
}
-static inline int try_folio_split(struct folio *folio, struct page *page,
- struct list_head *list)
+static inline int try_folio_split_to_order(struct folio *folio,
+ struct page *page, unsigned int new_order)
{
VM_WARN_ON_ONCE_FOLIO(1, folio);
return -EINVAL;
diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
index 34e615c76ca5..c4403eeb7144 100644
--- a/include/linux/hung_task.h
+++ b/include/linux/hung_task.h
@@ -20,6 +20,10 @@
* always zero. So we can use these bits to encode the specific blocking
* type.
*
+ * Note that on architectures where this is not guaranteed, or for any
+ * unaligned lock, this tracking mechanism is silently skipped for that
+ * lock.
+ *
* Type encoding:
* 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX)
* 01 - Blocked on semaphore (BLOCKER_TYPE_SEM)
@@ -45,7 +49,7 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type)
* If the lock pointer matches the BLOCKER_TYPE_MASK, return
* without writing anything.
*/
- if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK))
+ if (lock_ptr & BLOCKER_TYPE_MASK)
return;
WRITE_ONCE(current->blocker, lock_ptr | type);
@@ -53,8 +57,6 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type)
static inline void hung_task_clear_blocker(void)
{
- WARN_ON_ONCE(!READ_ONCE(current->blocker));
-
WRITE_ONCE(current->blocker, 0UL);
}
diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h
index 5eb66a399002..4f33e6a39797 100644
--- a/include/linux/iio/buffer-dma.h
+++ b/include/linux/iio/buffer-dma.h
@@ -174,5 +174,6 @@ int iio_dma_buffer_enqueue_dmabuf(struct iio_buffer *buffer,
size_t size, bool cyclic);
void iio_dma_buffer_lock_queue(struct iio_buffer *buffer);
void iio_dma_buffer_unlock_queue(struct iio_buffer *buffer);
+struct device *iio_dma_buffer_get_dma_dev(struct iio_buffer *buffer);
#endif
diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h
index e72552e026f3..8d770ced66b2 100644
--- a/include/linux/iio/buffer_impl.h
+++ b/include/linux/iio/buffer_impl.h
@@ -50,6 +50,7 @@ struct sg_table;
* @enqueue_dmabuf: called from userspace via ioctl to queue this DMABUF
* object to this buffer. Requires a valid DMABUF fd, that
* was previouly attached to this buffer.
+ * @get_dma_dev: called to get the DMA channel associated with this buffer.
* @lock_queue: called when the core needs to lock the buffer queue;
* it is used when enqueueing DMABUF objects.
* @unlock_queue: used to unlock a previously locked buffer queue
@@ -90,6 +91,7 @@ struct iio_buffer_access_funcs {
struct iio_dma_buffer_block *block,
struct dma_fence *fence, struct sg_table *sgt,
size_t size, bool cyclic);
+ struct device * (*get_dma_dev)(struct iio_buffer *buffer);
void (*lock_queue)(struct iio_buffer *buffer);
void (*unlock_queue)(struct iio_buffer *buffer);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index bccb3f1f6262..a6cb241ea00c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,7 +25,6 @@
extern struct files_struct init_files;
extern struct fs_struct init_fs;
extern struct nsproxy init_nsproxy;
-extern struct cred init_cred;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define INIT_PREV_CPUTIME(x) .prev_cputime = { \
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 73dceabc21c8..520e967cb501 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/mm_types.h>
#include <linux/blkdev.h>
+#include <linux/pagevec.h>
struct address_space;
struct fiemap_extent_info;
@@ -16,6 +17,7 @@ struct inode;
struct iomap_iter;
struct iomap_dio;
struct iomap_writepage_ctx;
+struct iomap_read_folio_ctx;
struct iov_iter;
struct kiocb;
struct page;
@@ -241,11 +243,12 @@ struct iomap_iter {
unsigned flags;
struct iomap iomap;
struct iomap srcmap;
+ struct folio_batch *fbatch;
void *private;
};
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
-int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
+int iomap_iter_advance(struct iomap_iter *iter, u64 count);
/**
* iomap_length_trim - trimmed length of the current iomap iteration
@@ -282,9 +285,7 @@ static inline u64 iomap_length(const struct iomap_iter *iter)
*/
static inline int iomap_iter_advance_full(struct iomap_iter *iter)
{
- u64 length = iomap_length(iter);
-
- return iomap_iter_advance(iter, &length);
+ return iomap_iter_advance(iter, iomap_length(iter));
}
/**
@@ -339,8 +340,10 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops,
const struct iomap_write_ops *write_ops, void *private);
-int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
-void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
+void iomap_read_folio(const struct iomap_ops *ops,
+ struct iomap_read_folio_ctx *ctx);
+void iomap_readahead(const struct iomap_ops *ops,
+ struct iomap_read_folio_ctx *ctx);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
@@ -349,6 +352,8 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops,
const struct iomap_write_ops *write_ops);
+loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset,
+ loff_t length);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, const struct iomap_ops *ops,
const struct iomap_write_ops *write_ops, void *private);
@@ -430,6 +435,10 @@ struct iomap_writeback_ops {
* An existing mapping from a previous call to this method can be reused
* by the file system if it is still valid.
*
+ * If this succeeds, iomap_finish_folio_write() must be called once
+ * writeback completes for the range, regardless of whether the
+ * writeback succeeded or failed.
+ *
* Returns the number of bytes processed or a negative errno.
*/
ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc,
@@ -467,14 +476,41 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
loff_t pos, loff_t end_pos, unsigned int dirty_len);
int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error);
-void iomap_start_folio_write(struct inode *inode, struct folio *folio,
- size_t len);
+void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
+ int error);
void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
size_t len);
int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio);
int iomap_writepages(struct iomap_writepage_ctx *wpc);
+struct iomap_read_folio_ctx {
+ const struct iomap_read_ops *ops;
+ struct folio *cur_folio;
+ struct readahead_control *rac;
+ void *read_ctx;
+};
+
+struct iomap_read_ops {
+ /*
+ * Read in a folio range.
+ *
+ * If this succeeds, iomap_finish_folio_read() must be called after the
+ * range is read in, regardless of whether the read succeeded or failed.
+ *
+ * Returns 0 on success or a negative error on failure.
+ */
+ int (*read_folio_range)(const struct iomap_iter *iter,
+ struct iomap_read_folio_ctx *ctx, size_t len);
+
+ /*
+ * Submit any pending read requests.
+ *
+ * This is optional.
+ */
+ void (*submit_read)(struct iomap_read_folio_ctx *ctx);
+};
+
/*
* Flags for direct I/O ->end_io:
*/
@@ -518,6 +554,14 @@ struct iomap_dio_ops {
*/
#define IOMAP_DIO_PARTIAL (1 << 2)
+/*
+ * Ensure each bio is aligned to fs block size.
+ *
+ * For filesystems which need to calculate/verify the checksum of each fs
+ * block. Otherwise they may not be able to handle unaligned bios.
+ */
+#define IOMAP_DIO_FSBLOCK_ALIGNED (1 << 3)
+
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, void *private, size_t done_before);
@@ -540,4 +584,30 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
extern struct bio_set iomap_ioend_bioset;
+#ifdef CONFIG_BLOCK
+extern const struct iomap_read_ops iomap_bio_read_ops;
+
+static inline void iomap_bio_read_folio(struct folio *folio,
+ const struct iomap_ops *ops)
+{
+ struct iomap_read_folio_ctx ctx = {
+ .ops = &iomap_bio_read_ops,
+ .cur_folio = folio,
+ };
+
+ iomap_read_folio(ops, &ctx);
+}
+
+static inline void iomap_bio_readahead(struct readahead_control *rac,
+ const struct iomap_ops *ops)
+{
+ struct iomap_read_folio_ctx ctx = {
+ .ops = &iomap_bio_read_ops,
+ .rac = rac,
+ };
+
+ iomap_readahead(ops, &ctx);
+}
+#endif /* CONFIG_BLOCK */
+
#endif /* LINUX_IOMAP_H */
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
index 4c1a91b07de3..e1555e06e7e5 100644
--- a/include/linux/mailbox/mtk-cmdq-mailbox.h
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -77,6 +77,16 @@ struct cmdq_pkt {
size_t buf_size; /* real buffer size */
};
+/**
+ * cmdq_get_shift_pa() - get the shift bits of physical address
+ * @chan: mailbox channel
+ *
+ * GCE can only fetch the command buffer address from a 32-bit register.
+ * Some SOCs support more than 32-bit command buffer address for GCE, which
+ * requires some shift bits to make the address fit into the 32-bit register.
+ *
+ * Return: the shift bits of physical address
+ */
u8 cmdq_get_shift_pa(struct mbox_chan *chan);
#endif /* __MTK_CMDQ_MAILBOX_H__ */
diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h
index 62674c83bde4..48e2ff95332f 100644
--- a/include/linux/map_benchmark.h
+++ b/include/linux/map_benchmark.h
@@ -27,5 +27,6 @@ struct map_benchmark {
__u32 dma_dir; /* DMA data direction */
__u32 dma_trans_ns; /* time for DMA transmission in ns */
__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
+ __u8 expansion[76]; /* For future use */
};
#endif /* _KERNEL_DMA_BENCHMARK_H */
diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index 71cf5bfc6349..0cb36a3ffc47 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -19,7 +19,7 @@ enum misc_res_type {
MISC_CG_RES_SEV_ES,
#endif
#ifdef CONFIG_INTEL_TDX_HOST
- /* Intel TDX HKIDs resource */
+ /** @MISC_CG_RES_TDX: Intel TDX HKIDs resource */
MISC_CG_RES_TDX,
#endif
/** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 7ef2c7c7d803..9d47cdc727ad 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -183,6 +183,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
complete(&cq->free);
}
+void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe);
int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen, u32 *out, int outlen);
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 07614cd95bed..1b0b36aa2a76 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10833,7 +10833,9 @@ struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
u8 port_access_reg_cap_mask_127_to_96[0x20];
u8 port_access_reg_cap_mask_95_to_64[0x20];
- u8 port_access_reg_cap_mask_63_to_36[0x1c];
+ u8 port_access_reg_cap_mask_63[0x1];
+ u8 pphcr[0x1];
+ u8 port_access_reg_cap_mask_61_to_36[0x1a];
u8 pplm[0x1];
u8 port_access_reg_cap_mask_34_to_32[0x3];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d16b33bacc32..ed3d0cc1ebe5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2074,7 +2074,7 @@ static inline unsigned long folio_nr_pages(const struct folio *folio)
return folio_large_nr_pages(folio);
}
-#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE)
+#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS)
/*
* We don't expect any folios that exceed buddy sizes (and consequently
* memory sections).
@@ -2087,10 +2087,17 @@ static inline unsigned long folio_nr_pages(const struct folio *folio)
* pages are guaranteed to be contiguous.
*/
#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT
-#else
+#elif defined(CONFIG_HUGETLB_PAGE)
/*
* There is no real limit on the folio size. We limit them to the maximum we
- * currently expect (e.g., hugetlb, dax).
+ * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
+ * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
+ */
+#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
+#else
+/*
+ * Without hugetlb, gigantic folios that are bigger than a single PUD are
+ * currently impossible.
*/
#define MAX_FOLIO_ORDER PUD_ORDER
#endif
@@ -3495,10 +3502,10 @@ struct vm_unmapped_area_info {
extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
/* truncate.c */
-extern void truncate_inode_pages(struct address_space *, loff_t);
-extern void truncate_inode_pages_range(struct address_space *,
- loff_t lstart, loff_t lend);
-extern void truncate_inode_pages_final(struct address_space *);
+void truncate_inode_pages(struct address_space *mapping, loff_t lstart);
+void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart,
+ uoff_t lend);
+void truncate_inode_pages_final(struct address_space *mapping);
/* generic vm_area_ops exported for stackable file systems */
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index fed86221c69c..58600cf234bc 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -7,6 +7,7 @@
#include <linux/path.h>
#include <linux/fcntl.h>
#include <linux/errno.h>
+#include <linux/fs_struct.h>
enum { MAX_NESTED_LINKS = 8 };
@@ -88,6 +89,81 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
struct qstr *name,
struct dentry *base);
+struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name);
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name);
+struct dentry *start_creating_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name);
+struct dentry *start_removing_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name);
+struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_creating_dentry(struct dentry *parent,
+ struct dentry *child);
+struct dentry *start_removing_dentry(struct dentry *parent,
+ struct dentry *child);
+
+/* end_creating - finish action started with start_creating
+ * @child: dentry returned by start_creating() or vfs_mkdir()
+ *
+ * Unlock and release the child. This can be called after
+ * start_creating() whether that function succeeded or not,
+ * but it is not needed on failure.
+ *
+ * If vfs_mkdir() was called then the value returned from that function
+ * should be given for @child rather than the original dentry, as vfs_mkdir()
+ * may have provided a new dentry.
+ *
+ *
+ * If vfs_mkdir() was not called, then @child will be a valid dentry and
+ * @parent will be ignored.
+ */
+static inline void end_creating(struct dentry *child)
+{
+ end_dirop(child);
+}
+
+/* end_creating_keep - finish action started with start_creating() and return result
+ * @child: dentry returned by start_creating() or vfs_mkdir()
+ *
+ * Unlock and return the child. This can be called after
+ * start_creating() whether that function succeeded or not,
+ * but it is not needed on failure.
+ *
+ * If vfs_mkdir() was called then the value returned from that function
+ * should be given for @child rather than the original dentry, as vfs_mkdir()
+ * may have provided a new dentry.
+ *
+ * Returns: @child, which may be a dentry or an error.
+ *
+ */
+static inline struct dentry *end_creating_keep(struct dentry *child)
+{
+ if (!IS_ERR(child))
+ dget(child);
+ end_dirop(child);
+ return child;
+}
+
+/**
+ * end_removing - finish action started with start_removing
+ * @child: dentry returned by start_removing()
+ * @parent: dentry given to start_removing()
+ *
+ * Unlock and release the child.
+ *
+ * This is identical to end_dirop(). It can be passed the result of
+ * start_removing() whether that was successful or not, but it not needed
+ * if start_removing() failed.
+ */
+static inline void end_removing(struct dentry *child)
+{
+ end_dirop(child);
+}
+
extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
extern int follow_up(struct path *);
@@ -95,6 +171,13 @@ extern int follow_up(struct path *);
extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);
+int start_renaming(struct renamedata *rd, int lookup_flags,
+ struct qstr *old_last, struct qstr *new_last);
+int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
+ struct dentry *old_dentry, struct qstr *new_last);
+int start_renaming_two_dentries(struct renamedata *rd,
+ struct dentry *old_dentry, struct dentry *new_dentry);
+void end_renaming(struct renamedata *rd);
/**
* mode_strip_umask - handle vfs umask stripping
diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h
index 36b13fabca9e..7273c78c826b 100644
--- a/include/linux/net/intel/libie/fwlog.h
+++ b/include/linux/net/intel/libie/fwlog.h
@@ -78,8 +78,20 @@ struct libie_fwlog {
);
};
+#if IS_ENABLED(CONFIG_LIBIE_FWLOG)
int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api);
void libie_fwlog_deinit(struct libie_fwlog *fwlog);
void libie_fwlog_reregister(struct libie_fwlog *fwlog);
void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len);
+#else
+static inline int libie_fwlog_init(struct libie_fwlog *fwlog,
+ struct libie_fwlog_api *api)
+{
+ return -EOPNOTSUPP;
+}
+static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { }
+static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { }
+static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf,
+ u16 len) { }
+#endif /* CONFIG_LIBIE_FWLOG */
#endif /* _LIBIE_FWLOG_H_ */
diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h
new file mode 100644
index 000000000000..b332b019b29c
--- /dev/null
+++ b/include/linux/ns/ns_common_types.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NS_COMMON_TYPES_H
+#define _LINUX_NS_COMMON_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/ns/nstree_types.h>
+#include <linux/rbtree.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+
+struct cgroup_namespace;
+struct dentry;
+struct ipc_namespace;
+struct mnt_namespace;
+struct net;
+struct pid_namespace;
+struct proc_ns_operations;
+struct time_namespace;
+struct user_namespace;
+struct uts_namespace;
+
+extern struct cgroup_namespace init_cgroup_ns;
+extern struct ipc_namespace init_ipc_ns;
+extern struct mnt_namespace init_mnt_ns;
+extern struct net init_net;
+extern struct pid_namespace init_pid_ns;
+extern struct time_namespace init_time_ns;
+extern struct user_namespace init_user_ns;
+extern struct uts_namespace init_uts_ns;
+
+extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
+extern const struct proc_ns_operations timens_operations;
+extern const struct proc_ns_operations timens_for_children_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations utsns_operations;
+
+/*
+ * Namespace lifetimes are managed via a two-tier reference counting model:
+ *
+ * (1) __ns_ref (refcount_t): Main reference count tracking memory
+ * lifetime. Controls when the namespace structure itself is freed.
+ * It also pins the namespace on the namespace trees whereas (2)
+ * only regulates their visibility to userspace.
+ *
+ * (2) __ns_ref_active (atomic_t): Reference count tracking active users.
+ * Controls visibility of the namespace in the namespace trees.
+ * Any live task that uses the namespace (via nsproxy or cred) holds
+ * an active reference. Any open file descriptor or bind-mount of
+ * the namespace holds an active reference. Once all tasks have
+ * called exited their namespaces and all file descriptors and
+ * bind-mounts have been released the active reference count drops
+ * to zero and the namespace becomes inactive. IOW, the namespace
+ * cannot be listed or opened via file handles anymore.
+ *
+ * Note that it is valid to transition from active to inactive and
+ * back from inactive to active e.g., when resurrecting an inactive
+ * namespace tree via the SIOCGSKNS ioctl().
+ *
+ * Relationship and lifecycle states:
+ *
+ * - Active (__ns_ref_active > 0):
+ * Namespace is actively used and visible to userspace. The namespace
+ * can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file
+ * handles, or discovered via listns().
+ *
+ * - Inactive (__ns_ref_active == 0, __ns_ref > 0):
+ * No tasks are actively using the namespace and it isn't pinned by
+ * any bind-mounts or open file descriptors anymore. But the namespace
+ * is still kept alive by internal references. For example, the user
+ * namespace could be pinned by an open file through file->f_cred
+ * references when one of the now defunct tasks had opened a file and
+ * handed the file descriptor off to another process via a UNIX
+ * sockets. Such references keep the namespace structure alive through
+ * __ns_ref but will not hold an active reference.
+ *
+ * - Destroyed (__ns_ref == 0):
+ * No references remain. The namespace is removed from the tree and freed.
+ *
+ * State transitions:
+ *
+ * Active -> Inactive:
+ * When the last task using the namespace exits it drops its active
+ * references to all namespaces. However, user and pid namespaces
+ * remain accessible until the task has been reaped.
+ *
+ * Inactive -> Active:
+ * An inactive namespace tree might be resurrected due to e.g., the
+ * SIOCGSKNS ioctl() on a socket.
+ *
+ * Inactive -> Destroyed:
+ * When __ns_ref drops to zero the namespace is removed from the
+ * namespaces trees and the memory is freed (after RCU grace period).
+ *
+ * Initial namespaces:
+ * Boot-time namespaces (init_net, init_pid_ns, etc.) start with
+ * __ns_ref_active = 1 and remain active forever.
+ *
+ * @ns_type: type of namespace (e.g., CLONE_NEWNET)
+ * @stashed: cached dentry to be used by the vfs
+ * @ops: namespace operations
+ * @inum: namespace inode number (quickly recycled for non-initial namespaces)
+ * @__ns_ref: main reference count (do not use directly)
+ * @ns_tree: namespace tree nodes and active reference count
+ */
+struct ns_common {
+ u32 ns_type;
+ struct dentry *stashed;
+ const struct proc_ns_operations *ops;
+ unsigned int inum;
+ refcount_t __ns_ref; /* do not use directly */
+ union {
+ struct ns_tree;
+ struct rcu_head ns_rcu;
+ };
+};
+
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns)->ns, \
+ const struct cgroup_namespace *: &(__ns)->ns, \
+ struct ipc_namespace *: &(__ns)->ns, \
+ const struct ipc_namespace *: &(__ns)->ns, \
+ struct mnt_namespace *: &(__ns)->ns, \
+ const struct mnt_namespace *: &(__ns)->ns, \
+ struct net *: &(__ns)->ns, \
+ const struct net *: &(__ns)->ns, \
+ struct pid_namespace *: &(__ns)->ns, \
+ const struct pid_namespace *: &(__ns)->ns, \
+ struct time_namespace *: &(__ns)->ns, \
+ const struct time_namespace *: &(__ns)->ns, \
+ struct user_namespace *: &(__ns)->ns, \
+ const struct user_namespace *: &(__ns)->ns, \
+ struct uts_namespace *: &(__ns)->ns, \
+ const struct uts_namespace *: &(__ns)->ns)
+
+#define ns_init_inum(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
+ struct ipc_namespace *: IPC_NS_INIT_INO, \
+ struct mnt_namespace *: MNT_NS_INIT_INO, \
+ struct net *: NET_NS_INIT_INO, \
+ struct pid_namespace *: PID_NS_INIT_INO, \
+ struct time_namespace *: TIME_NS_INIT_INO, \
+ struct user_namespace *: USER_NS_INIT_INO, \
+ struct uts_namespace *: UTS_NS_INIT_INO)
+
+#define ns_init_ns(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &init_cgroup_ns, \
+ struct ipc_namespace *: &init_ipc_ns, \
+ struct mnt_namespace *: &init_mnt_ns, \
+ struct net *: &init_net, \
+ struct pid_namespace *: &init_pid_ns, \
+ struct time_namespace *: &init_time_ns, \
+ struct user_namespace *: &init_user_ns, \
+ struct uts_namespace *: &init_uts_ns)
+
+#define ns_init_id(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CGROUP_NS_INIT_ID, \
+ struct ipc_namespace *: IPC_NS_INIT_ID, \
+ struct mnt_namespace *: MNT_NS_INIT_ID, \
+ struct net *: NET_NS_INIT_ID, \
+ struct pid_namespace *: PID_NS_INIT_ID, \
+ struct time_namespace *: TIME_NS_INIT_ID, \
+ struct user_namespace *: USER_NS_INIT_ID, \
+ struct uts_namespace *: UTS_NS_INIT_ID)
+
+#define to_ns_operations(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
+ struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
+ struct mnt_namespace *: &mntns_operations, \
+ struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
+ struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
+ struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
+ struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
+ struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
+
+#define ns_common_type(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CLONE_NEWCGROUP, \
+ struct ipc_namespace *: CLONE_NEWIPC, \
+ struct mnt_namespace *: CLONE_NEWNS, \
+ struct net *: CLONE_NEWNET, \
+ struct pid_namespace *: CLONE_NEWPID, \
+ struct time_namespace *: CLONE_NEWTIME, \
+ struct user_namespace *: CLONE_NEWUSER, \
+ struct uts_namespace *: CLONE_NEWUTS)
+
+#endif /* _LINUX_NS_COMMON_TYPES_H */
diff --git a/include/linux/ns/nstree_types.h b/include/linux/ns/nstree_types.h
new file mode 100644
index 000000000000..2fb28ee31efb
--- /dev/null
+++ b/include/linux/ns/nstree_types.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
+#ifndef _LINUX_NSTREE_TYPES_H
+#define _LINUX_NSTREE_TYPES_H
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+/**
+ * struct ns_tree_root - Root of a namespace tree
+ * @ns_rb: Red-black tree root for efficient lookups
+ * @ns_list_head: List head for sequential iteration
+ *
+ * Each namespace tree maintains both an rbtree (for O(log n) lookups)
+ * and a list (for efficient sequential iteration). The list is kept in
+ * the same sorted order as the rbtree.
+ */
+struct ns_tree_root {
+ struct rb_root ns_rb;
+ struct list_head ns_list_head;
+};
+
+/**
+ * struct ns_tree_node - Node in a namespace tree
+ * @ns_node: Red-black tree node
+ * @ns_list_entry: List entry for sequential iteration
+ *
+ * Represents a namespace's position in a tree. Each namespace has
+ * multiple tree nodes for different trees (unified, per-type, owner).
+ */
+struct ns_tree_node {
+ struct rb_node ns_node;
+ struct list_head ns_list_entry;
+};
+
+/**
+ * struct ns_tree - Namespace tree nodes and active reference count
+ * @ns_id: Unique namespace identifier
+ * @__ns_ref_active: Active reference count (do not use directly)
+ * @ns_unified_node: Node in the global namespace tree
+ * @ns_tree_node: Node in the per-type namespace tree
+ * @ns_owner_node: Node in the owner namespace's tree of owned namespaces
+ * @ns_owner_root: Root of the tree of namespaces owned by this namespace
+ * (only used when this namespace is an owner)
+ */
+struct ns_tree {
+ u64 ns_id;
+ atomic_t __ns_ref_active;
+ struct ns_tree_node ns_unified_node;
+ struct ns_tree_node ns_tree_node;
+ struct ns_tree_node ns_owner_node;
+ struct ns_tree_root ns_owner_root;
+};
+
+#endif /* _LINUX_NSTREE_TYPES_H */
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index f5b68b8abb54..825f5865bfc5 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -2,122 +2,44 @@
#ifndef _LINUX_NS_COMMON_H
#define _LINUX_NS_COMMON_H
+#include <linux/ns/ns_common_types.h>
#include <linux/refcount.h>
-#include <linux/rbtree.h>
+#include <linux/vfsdebug.h>
#include <uapi/linux/sched.h>
+#include <uapi/linux/nsfs.h>
-struct proc_ns_operations;
-
-struct cgroup_namespace;
-struct ipc_namespace;
-struct mnt_namespace;
-struct net;
-struct pid_namespace;
-struct time_namespace;
-struct user_namespace;
-struct uts_namespace;
-
-extern struct cgroup_namespace init_cgroup_ns;
-extern struct ipc_namespace init_ipc_ns;
-extern struct mnt_namespace init_mnt_ns;
-extern struct net init_net;
-extern struct pid_namespace init_pid_ns;
-extern struct time_namespace init_time_ns;
-extern struct user_namespace init_user_ns;
-extern struct uts_namespace init_uts_ns;
-
-extern const struct proc_ns_operations netns_operations;
-extern const struct proc_ns_operations utsns_operations;
-extern const struct proc_ns_operations ipcns_operations;
-extern const struct proc_ns_operations pidns_operations;
-extern const struct proc_ns_operations pidns_for_children_operations;
-extern const struct proc_ns_operations userns_operations;
-extern const struct proc_ns_operations mntns_operations;
-extern const struct proc_ns_operations cgroupns_operations;
-extern const struct proc_ns_operations timens_operations;
-extern const struct proc_ns_operations timens_for_children_operations;
-
-struct ns_common {
- u32 ns_type;
- struct dentry *stashed;
- const struct proc_ns_operations *ops;
- unsigned int inum;
- refcount_t __ns_ref; /* do not use directly */
- union {
- struct {
- u64 ns_id;
- struct rb_node ns_tree_node;
- struct list_head ns_list_node;
- };
- struct rcu_head ns_rcu;
- };
-};
-
+bool is_current_namespace(struct ns_common *ns);
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
void __ns_common_free(struct ns_common *ns);
+struct ns_common *__must_check ns_owner(struct ns_common *ns);
+
+static __always_inline bool is_ns_init_inum(const struct ns_common *ns)
+{
+ VFS_WARN_ON_ONCE(ns->inum == 0);
+ return unlikely(in_range(ns->inum, MNT_NS_INIT_INO,
+ IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1));
+}
+
+static __always_inline bool is_ns_init_id(const struct ns_common *ns)
+{
+ VFS_WARN_ON_ONCE(ns->ns_id == 0);
+ return ns->ns_id <= NS_LAST_INIT_ID;
+}
-#define to_ns_common(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &(__ns)->ns, \
- const struct cgroup_namespace *: &(__ns)->ns, \
- struct ipc_namespace *: &(__ns)->ns, \
- const struct ipc_namespace *: &(__ns)->ns, \
- struct mnt_namespace *: &(__ns)->ns, \
- const struct mnt_namespace *: &(__ns)->ns, \
- struct net *: &(__ns)->ns, \
- const struct net *: &(__ns)->ns, \
- struct pid_namespace *: &(__ns)->ns, \
- const struct pid_namespace *: &(__ns)->ns, \
- struct time_namespace *: &(__ns)->ns, \
- const struct time_namespace *: &(__ns)->ns, \
- struct user_namespace *: &(__ns)->ns, \
- const struct user_namespace *: &(__ns)->ns, \
- struct uts_namespace *: &(__ns)->ns, \
- const struct uts_namespace *: &(__ns)->ns)
-
-#define ns_init_inum(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
- struct ipc_namespace *: IPC_NS_INIT_INO, \
- struct mnt_namespace *: MNT_NS_INIT_INO, \
- struct net *: NET_NS_INIT_INO, \
- struct pid_namespace *: PID_NS_INIT_INO, \
- struct time_namespace *: TIME_NS_INIT_INO, \
- struct user_namespace *: USER_NS_INIT_INO, \
- struct uts_namespace *: UTS_NS_INIT_INO)
-
-#define ns_init_ns(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &init_cgroup_ns, \
- struct ipc_namespace *: &init_ipc_ns, \
- struct mnt_namespace *: &init_mnt_ns, \
- struct net *: &init_net, \
- struct pid_namespace *: &init_pid_ns, \
- struct time_namespace *: &init_time_ns, \
- struct user_namespace *: &init_user_ns, \
- struct uts_namespace *: &init_uts_ns)
-
-#define to_ns_operations(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
- struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
- struct mnt_namespace *: &mntns_operations, \
- struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
- struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
- struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
- struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
- struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
-
-#define ns_common_type(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: CLONE_NEWCGROUP, \
- struct ipc_namespace *: CLONE_NEWIPC, \
- struct mnt_namespace *: CLONE_NEWNS, \
- struct net *: CLONE_NEWNET, \
- struct pid_namespace *: CLONE_NEWPID, \
- struct time_namespace *: CLONE_NEWTIME, \
- struct user_namespace *: CLONE_NEWUSER, \
- struct uts_namespace *: CLONE_NEWUTS)
+#define NS_COMMON_INIT(nsname) \
+{ \
+ .ns_type = ns_common_type(&nsname), \
+ .ns_id = ns_init_id(&nsname), \
+ .inum = ns_init_inum(&nsname), \
+ .ops = to_ns_operations(&nsname), \
+ .stashed = NULL, \
+ .__ns_ref = REFCOUNT_INIT(1), \
+ .__ns_ref_active = ATOMIC_INIT(1), \
+ .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \
+ .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \
+ .ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \
+ .ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \
+}
#define ns_common_init(__ns) \
__ns_common_init(to_ns_common(__ns), \
@@ -133,21 +55,96 @@ void __ns_common_free(struct ns_common *ns);
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
+{
+ return atomic_read(&ns->__ns_ref_active);
+}
+
+static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns)
+{
+ return refcount_read(&ns->__ns_ref);
+}
+
static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
{
- return refcount_dec_and_test(&ns->__ns_ref);
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return false;
+ }
+ if (refcount_dec_and_test(&ns->__ns_ref)) {
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
+ return true;
+ }
+ return false;
}
static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
{
- return refcount_inc_not_zero(&ns->__ns_ref);
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return true;
+ }
+ if (refcount_inc_not_zero(&ns->__ns_ref))
+ return true;
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
+ return false;
}
-#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
-#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
-#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
-#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
-#define ns_ref_put_and_lock(__ns, __lock) \
- refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
+static __always_inline void __ns_ref_inc(struct ns_common *ns)
+{
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return;
+ }
+ refcount_inc(&ns->__ns_ref);
+}
+
+static __always_inline __must_check bool __ns_ref_dec_and_lock(struct ns_common *ns,
+ spinlock_t *ns_lock)
+{
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return false;
+ }
+ return refcount_dec_and_lock(&ns->__ns_ref, ns_lock);
+}
+
+#define ns_ref_read(__ns) __ns_ref_read(to_ns_common((__ns)))
+#define ns_ref_inc(__ns) \
+ do { if (__ns) __ns_ref_inc(to_ns_common((__ns))); } while (0)
+#define ns_ref_get(__ns) \
+ ((__ns) ? __ns_ref_get(to_ns_common((__ns))) : false)
+#define ns_ref_put(__ns) \
+ ((__ns) ? __ns_ref_put(to_ns_common((__ns))) : false)
+#define ns_ref_put_and_lock(__ns, __ns_lock) \
+ ((__ns) ? __ns_ref_dec_and_lock(to_ns_common((__ns)), __ns_lock) : false)
+
+#define ns_ref_active_read(__ns) \
+ ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0)
+
+void __ns_ref_active_put(struct ns_common *ns);
+
+#define ns_ref_active_put(__ns) \
+ do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0)
+
+static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns)
+{
+ if (!__ns_ref_active_read(ns)) {
+ VFS_WARN_ON_ONCE(is_ns_init_id(ns));
+ return NULL;
+ }
+ if (!__ns_ref_get(ns))
+ return NULL;
+ return ns;
+}
+
+void __ns_ref_active_get(struct ns_common *ns);
+
+#define ns_ref_active_get(__ns) \
+ do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0)
#endif
diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
index e5a5fa83d36b..731b67fc2fec 100644
--- a/include/linux/nsfs.h
+++ b/include/linux/nsfs.h
@@ -37,4 +37,7 @@ void nsfs_init(void);
#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
+void nsproxy_ns_active_get(struct nsproxy *ns);
+void nsproxy_ns_active_put(struct nsproxy *ns);
+
#endif /* _LINUX_NSFS_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index bd118a187dec..5a67648721c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -93,10 +93,13 @@ static inline struct cred *nsset_cred(struct nsset *set)
*/
int copy_namespaces(u64 flags, struct task_struct *tsk);
-void exit_task_namespaces(struct task_struct *tsk);
+void switch_cred_namespaces(const struct cred *old, const struct cred *new);
+void exit_nsproxy_namespaces(struct task_struct *tsk);
+void get_cred_namespaces(struct task_struct *tsk);
+void exit_cred_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
int exec_task_namespaces(void);
-void free_nsproxy(struct nsproxy *ns);
+void deactivate_nsproxy(struct nsproxy *ns);
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
struct cred *, struct fs_struct *);
int __init nsproxy_cache_init(void);
@@ -104,7 +107,7 @@ int __init nsproxy_cache_init(void);
static inline void put_nsproxy(struct nsproxy *ns)
{
if (refcount_dec_and_test(&ns->count))
- free_nsproxy(ns);
+ deactivate_nsproxy(ns);
}
static inline void get_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/nstree.h b/include/linux/nstree.h
index 8b8636690473..175e4625bfa6 100644
--- a/include/linux/nstree.h
+++ b/include/linux/nstree.h
@@ -1,22 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
#ifndef _LINUX_NSTREE_H
#define _LINUX_NSTREE_H
-#include <linux/ns_common.h>
+#include <linux/ns/nstree_types.h>
#include <linux/nsproxy.h>
#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/rculist.h>
#include <linux/cookie.h>
+#include <uapi/linux/nsfs.h>
-extern struct ns_tree cgroup_ns_tree;
-extern struct ns_tree ipc_ns_tree;
-extern struct ns_tree mnt_ns_tree;
-extern struct ns_tree net_ns_tree;
-extern struct ns_tree pid_ns_tree;
-extern struct ns_tree time_ns_tree;
-extern struct ns_tree user_ns_tree;
-extern struct ns_tree uts_ns_tree;
+struct ns_common;
+
+extern struct ns_tree_root cgroup_ns_tree;
+extern struct ns_tree_root ipc_ns_tree;
+extern struct ns_tree_root mnt_ns_tree;
+extern struct ns_tree_root net_ns_tree;
+extern struct ns_tree_root pid_ns_tree;
+extern struct ns_tree_root time_ns_tree;
+extern struct ns_tree_root user_ns_tree;
+extern struct ns_tree_root uts_ns_tree;
+
+void ns_tree_node_init(struct ns_tree_node *node);
+void ns_tree_root_init(struct ns_tree_root *root);
+bool ns_tree_node_empty(const struct ns_tree_node *node);
+struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
+ struct ns_tree_root *root,
+ int (*cmp)(struct rb_node *, const struct rb_node *));
+void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root);
#define to_ns_tree(__ns) \
_Generic((__ns), \
@@ -29,17 +41,21 @@ extern struct ns_tree uts_ns_tree;
struct user_namespace *: &(user_ns_tree), \
struct uts_namespace *: &(uts_ns_tree))
-u64 ns_tree_gen_id(struct ns_common *ns);
-void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
-void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
+#define ns_tree_gen_id(__ns) \
+ __ns_tree_gen_id(to_ns_common(__ns), \
+ (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0))
+
+u64 __ns_tree_gen_id(struct ns_common *ns, u64 id);
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree);
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree);
struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
- struct ns_tree *ns_tree,
+ struct ns_tree_root *ns_tree,
bool previous);
-static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
+static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_tree, u64 id)
{
- ns_tree_gen_id(ns);
+ __ns_tree_gen_id(ns, id);
__ns_tree_add_raw(ns, ns_tree);
}
@@ -59,7 +75,9 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
* This function assigns a new id to the namespace and adds it to the
* appropriate namespace tree and list.
*/
-#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns))
+#define ns_tree_add(__ns) \
+ __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns), \
+ (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0))
/**
* ns_tree_remove - Remove a namespace from a namespace tree
@@ -73,6 +91,6 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
#define ns_tree_adjoined_rcu(__ns, __previous) \
__ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
-#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
+#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node.ns_node))
#endif /* _LINUX_NSTREE_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 09b581c1d878..e601a3144f28 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -38,6 +38,7 @@ int filemap_invalidate_pages(struct address_space *mapping,
int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *);
int filemap_flush(struct address_space *);
+int filemap_flush_nr(struct address_space *mapping, long *nr_to_write);
int filemap_fdatawait_keep_errors(struct address_space *mapping);
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
@@ -53,14 +54,10 @@ static inline int filemap_fdatawait(struct address_space *mapping)
bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
-int __filemap_fdatawrite_range(struct address_space *mapping,
- loff_t start, loff_t end, int sync_mode);
int filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end);
int filemap_check_errors(struct address_space *mapping);
void __filemap_set_wb_err(struct address_space *mapping, int err);
-int filemap_fdatawrite_wbc(struct address_space *mapping,
- struct writeback_control *wbc);
int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
static inline int filemap_write_and_wait(struct address_space *mapping)
@@ -942,6 +939,17 @@ static inline pgoff_t folio_next_index(const struct folio *folio)
}
/**
+ * folio_next_pos - Get the file position of the next folio.
+ * @folio: The current folio.
+ *
+ * Return: The position of the folio which follows this folio in the file.
+ */
+static inline loff_t folio_next_pos(const struct folio *folio)
+{
+ return (loff_t)folio_next_index(folio) << PAGE_SHIFT;
+}
+
+/**
* folio_file_page - The page for a particular index.
* @folio: The folio which contains this index.
* @index: The index we want to look up.
@@ -977,6 +985,8 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
+unsigned filemap_get_folios_dirty(struct address_space *mapping,
+ pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
struct folio *read_cache_folio(struct address_space *, pgoff_t index,
filler_t *filler, struct file *file);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d1fdf81fbe1e..bf97d49c23cf 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -412,6 +412,8 @@ struct pci_dev {
u16 l1ss; /* L1SS Capability pointer */
#ifdef CONFIG_PCIEASPM
struct pcie_link_state *link_state; /* ASPM link state */
+ unsigned int aspm_l0s_support:1; /* ASPM L0s support */
+ unsigned int aspm_l1_support:1; /* ASPM L1 support */
unsigned int ltr_path:1; /* Latency Tolerance Reporting
supported from root to here */
#endif
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 445517a72ad0..0e7ae12c96d2 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -61,8 +61,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
- if (ns != &init_pid_ns)
- ns_ref_inc(ns);
+ ns_ref_inc(ns);
return ns;
}
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 9d42d473d201..7f6a92ac9704 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -44,11 +44,11 @@ typedef unsigned int pipe_index_t;
typedef unsigned short pipe_index_t;
#endif
-/*
- * We have to declare this outside 'struct pipe_inode_info',
- * but then we can't use 'union pipe_index' for an anonymous
- * union, so we end up having to duplicate this declaration
- * below. Annoying.
+/**
+ * struct pipe_index - pipe indeces
+ * @head: The point of buffer production
+ * @tail: The point of buffer consumption
+ * @head_tail: unsigned long union of @head and @tail
*/
union pipe_index {
unsigned long head_tail;
@@ -63,9 +63,7 @@ union pipe_index {
* @mutex: mutex protecting the whole thing
* @rd_wait: reader wait point in case of empty pipe
* @wr_wait: writer wait point in case of full pipe
- * @head: The point of buffer production
- * @tail: The point of buffer consumption
- * @head_tail: unsigned long union of @head and @tail
+ * @pipe_index: the pipe indeces
* @note_loss: The next read() should insert a data-lost message
* @max_usage: The maximum number of slots that may be used in the ring
* @ring_size: total number of buffers (should be a power of 2)
@@ -87,14 +85,7 @@ struct pipe_inode_info {
struct mutex mutex;
wait_queue_head_t rd_wait, wr_wait;
- /* This has to match the 'union pipe_index' above */
- union {
- unsigned long head_tail;
- struct {
- pipe_index_t head;
- pipe_index_t tail;
- };
- };
+ union pipe_index;
unsigned int max_usage;
unsigned int ring_size;
diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h
index 1571e9157fa5..b1b837583d54 100644
--- a/include/linux/platform_data/x86/int3472.h
+++ b/include/linux/platform_data/x86/int3472.h
@@ -100,7 +100,6 @@ struct int3472_gpio_regulator {
struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2];
char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH];
char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
- struct gpio_desc *ena_gpio;
struct regulator_dev *rdev;
struct regulator_desc rdesc;
};
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index a3f44f6c2da1..0b436e15f4cd 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -629,13 +629,13 @@ DEFINE_GUARD(pm_runtime_active_auto, struct device *,
* device.
*/
DEFINE_GUARD_COND(pm_runtime_active, _try,
- pm_runtime_get_active(_T, RPM_TRANSPARENT))
+ pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0)
DEFINE_GUARD_COND(pm_runtime_active, _try_enabled,
- pm_runtime_resume_and_get(_T))
+ pm_runtime_resume_and_get(_T), _RET == 0)
DEFINE_GUARD_COND(pm_runtime_active_auto, _try,
- pm_runtime_get_active(_T, RPM_TRANSPARENT))
+ pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0)
DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled,
- pm_runtime_resume_and_get(_T))
+ pm_runtime_resume_and_get(_T), _RET == 0)
/**
* pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0.
diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h
index 2503f7625d65..a651e60d9410 100644
--- a/include/linux/pseudo_fs.h
+++ b/include/linux/pseudo_fs.h
@@ -9,6 +9,7 @@ struct pseudo_fs_context {
const struct xattr_handler * const *xattr;
const struct dentry_operations *dops;
unsigned long magic;
+ unsigned int s_d_flags;
};
struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4e1ac1fbcec4..55343795644b 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1643,7 +1643,7 @@ struct regmap_irq_chip_data;
* @status_invert: Inverted status register: cleared bits are active interrupts.
* @status_is_level: Status register is actuall signal level: Xor status
* register with previous value to get active interrupts.
- * @wake_invert: Inverted wake register: cleared bits are wake enabled.
+ * @wake_invert: Inverted wake register: cleared bits are wake disabled.
* @type_in_mask: Use the mask registers for controlling irq type. Use this if
* the hardware provides separate bits for rising/falling edge
* or low/high level interrupts and they should be combined into
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbb7340c5866..b469878de25c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2407,12 +2407,12 @@ static inline void __migrate_enable(void) { }
* be defined in kernel/sched/core.c.
*/
#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
-static inline void migrate_disable(void)
+static __always_inline void migrate_disable(void)
{
__migrate_disable();
}
-static inline void migrate_enable(void)
+static __always_inline void migrate_enable(void)
{
__migrate_enable();
}
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index b7fafe999073..624fda17a785 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -8,7 +8,7 @@
#define SUID_DUMP_USER 1 /* Dump as user of process */
#define SUID_DUMP_ROOT 2 /* Dump as root */
-static inline unsigned long __mm_flags_get_dumpable(struct mm_struct *mm)
+static inline unsigned long __mm_flags_get_dumpable(const struct mm_struct *mm)
{
/*
* By convention, dumpable bits are contained in first 32 bits of the
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 0e47465ef0fd..774efe592a9a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -111,7 +111,7 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
int shmem_writeout(struct folio *folio, struct swap_iocb **plug,
struct list_head *folio_list);
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
+void shmem_truncate_range(struct inode *inode, loff_t start, uoff_t end);
int shmem_unuse(unsigned int type);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fb3fec9affaa..a7cc3d1f4fd1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
struct sk_buff_head *sk_queue,
unsigned int flags, int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
+__poll_t datagram_poll_queue(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait,
+ struct sk_buff_head *rcv_queue);
__poll_t datagram_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 66c06fcdfe19..cf84d98964b2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -77,6 +77,7 @@ struct cachestat_range;
struct cachestat;
struct statmount;
struct mnt_id_req;
+struct ns_id_req;
struct xattr_args;
struct file_attr;
@@ -437,6 +438,9 @@ asmlinkage long sys_statmount(const struct mnt_id_req __user *req,
asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
u64 __user *mnt_ids, size_t nr_mnt_ids,
unsigned int flags);
+asmlinkage long sys_listns(const struct ns_id_req __user *req,
+ u64 __user *ns_ids, size_t nr_ns_ids,
+ unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
asmlinkage long sys_ftruncate(unsigned int fd, off_t length);
#if BITS_PER_LONG == 32
diff --git a/include/linux/types.h b/include/linux/types.h
index 6dfdb8e8e4c3..d4437e9c452c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -50,6 +50,7 @@ typedef __kernel_old_gid_t old_gid_t;
#if defined(__GNUC__)
typedef __kernel_loff_t loff_t;
+typedef __kernel_uoff_t uoff_t;
#endif
/*
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 3aaf19e77558..8285b19a25e0 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -376,6 +376,9 @@ struct usb_gadget_ops {
* can handle. The UDC must support this and all slower speeds and lower
* number of lanes.
* @state: the state we are now (attached, suspended, configured, etc)
+ * @state_lock: Spinlock protecting the `state` and `teardown` members.
+ * @teardown: True if the device is undergoing teardown, used to prevent
+ * new work from being scheduled during cleanup.
* @name: Identifies the controller hardware type. Used in diagnostics
* and sometimes configuration.
* @dev: Driver model state for this abstract device.
@@ -451,6 +454,8 @@ struct usb_gadget {
enum usb_ssp_rate max_ssp_rate;
enum usb_device_state state;
+ spinlock_t state_lock;
+ bool teardown;
const char *name;
struct device dev;
unsigned isoch_delay;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9a9aebbf96b9..9c3be157397e 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -166,13 +166,13 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
}
-#ifdef CONFIG_USER_NS
-
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
{
return container_of(ns, struct user_namespace, ns);
}
+#ifdef CONFIG_USER_NS
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 20e0584db1dd..75dabb763c65 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -384,7 +384,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
struct virtio_net_hdr_v1_hash_tunnel *vhdr,
bool tnl_hdr_negotiated,
bool little_endian,
- int vlan_hlen)
+ int vlan_hlen,
+ bool has_data_valid)
{
struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr;
unsigned int inner_nh, outer_th;
@@ -394,13 +395,18 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM);
if (!tnl_gso_type)
- return virtio_net_hdr_from_skb(skb, hdr, little_endian, false,
- vlan_hlen);
+ return virtio_net_hdr_from_skb(skb, hdr, little_endian,
+ has_data_valid, vlan_hlen);
/* Tunnel support not negotiated but skb ask for it. */
if (!tnl_hdr_negotiated)
return -EINVAL;
+ vhdr->hash_hdr.hash_value_lo = 0;
+ vhdr->hash_hdr.hash_value_hi = 0;
+ vhdr->hash_hdr.hash_report = 0;
+ vhdr->hash_hdr.padding = 0;
+
/* Let the basic parsing deal with plain GSO features. */
skb_shinfo(skb)->gso_type &= ~tnl_gso_type;
ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 22dd4adc5667..f48e8ccffe81 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -189,11 +189,11 @@ void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
void inode_wait_for_writeback(struct inode *inode);
void inode_io_list_del(struct inode *inode);
-/* writeback.h requires fs.h; it, too, is not included from here. */
-static inline void wait_on_inode(struct inode *inode)
+static inline xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
- wait_var_event(inode_state_wait_address(inode, __I_NEW),
- !(READ_ONCE(inode->i_state) & I_NEW));
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+ return PAGECACHE_TAG_TOWRITE;
+ return PAGECACHE_TAG_DIRTY;
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -234,7 +234,7 @@ static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
static inline void inode_detach_wb(struct inode *inode)
{
if (inode->i_wb) {
- WARN_ON_ONCE(!(inode->i_state & I_CLEAR));
+ WARN_ON_ONCE(!(inode_state_read_once(inode) & I_CLEAR));
wb_put(inode->i_wb);
inode->i_wb = NULL;
}
@@ -374,4 +374,9 @@ bool redirty_page_for_writepage(struct writeback_control *, struct page *);
void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode);
+/*
+ * 4MB minimal write chunk size
+ */
+#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
+
#endif /* WRITEBACK_H */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 86b0d47984a1..64e9afe7d647 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -85,12 +85,12 @@ int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *,
const char *, const void *, size_t, int);
int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *,
const char *, const void *, size_t, int,
- struct inode **);
+ struct delegated_inode *);
int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *,
const void *, size_t, int);
int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *);
int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *,
- const char *, struct inode **);
+ const char *, struct delegated_inode *);
int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *);
ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 9ecc70baaca9..cb4c02d00759 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -434,6 +434,7 @@ enum {
HCI_USER_CHANNEL,
HCI_EXT_CONFIGURED,
HCI_LE_ADV,
+ HCI_LE_ADV_0,
HCI_LE_PER_ADV,
HCI_LE_SCAN,
HCI_SSP_ENABLED,
@@ -2782,6 +2783,11 @@ struct hci_ev_le_per_adv_report {
__u8 data[];
} __packed;
+#define HCI_EV_LE_PA_SYNC_LOST 0x10
+struct hci_ev_le_pa_sync_lost {
+ __le16 handle;
+} __packed;
+
#define LE_PA_DATA_COMPLETE 0x00
#define LE_PA_DATA_MORE_TO_COME 0x01
#define LE_PA_DATA_TRUNCATED 0x02
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2924c2bf2a98..0cb87687837f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -244,6 +244,7 @@ struct adv_info {
bool enabled;
bool pending;
bool periodic;
+ bool periodic_enabled;
__u8 mesh;
__u8 instance;
__u8 handle;
@@ -748,7 +749,6 @@ struct hci_conn {
__u8 remote_cap;
__u8 remote_auth;
- __u8 remote_id;
unsigned int sent;
@@ -856,11 +856,12 @@ extern struct mutex hci_cb_list_lock;
/* ----- HCI interface to upper protocols ----- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
int l2cap_disconn_ind(struct hci_conn *hcon);
-void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
+int l2cap_recv_acldata(struct hci_dev *hdev, u16 handle, struct sk_buff *skb,
+ u16 flags);
#if IS_ENABLED(CONFIG_BT_BREDR)
int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
-void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb);
+int sco_recv_scodata(struct hci_dev *hdev, u16 handle, struct sk_buff *skb);
#else
static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
__u8 *flags)
@@ -868,23 +869,30 @@ static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
return 0;
}
-static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
+static inline int sco_recv_scodata(struct hci_dev *hdev, u16 handle,
+ struct sk_buff *skb)
{
+ kfree_skb(skb);
+ return -ENOENT;
}
#endif
#if IS_ENABLED(CONFIG_BT_LE)
int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
-void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
+int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb,
+ u16 flags);
#else
static inline int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
__u8 *flags)
{
return 0;
}
-static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb,
- u16 flags)
+
+static inline int iso_recv(struct hci_dev *hdev, u16 handle,
+ struct sk_buff *skb, u16 flags)
{
+ kfree_skb(skb);
+ return -ENOENT;
}
#endif
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 4bb0eaedda18..00e182a22720 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -38,8 +38,8 @@
#define L2CAP_DEFAULT_TX_WINDOW 63
#define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF
#define L2CAP_DEFAULT_MAX_TX 3
-#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */
-#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */
+#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */
+#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */
#define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */
#define L2CAP_DEFAULT_ACK_TO 200
#define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 74edea06985b..f5be96f08b9d 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -780,7 +780,7 @@ struct mgmt_adv_pattern {
__u8 ad_type;
__u8 offset;
__u8 length;
- __u8 value[31];
+ __u8 value[HCI_MAX_AD_LENGTH];
} __packed;
#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052
@@ -853,7 +853,7 @@ struct mgmt_cp_set_mesh {
__le16 window;
__le16 period;
__u8 num_ad_types;
- __u8 ad_types[];
+ __u8 ad_types[] __counted_by(num_ad_types);
} __packed;
#define MGMT_SET_MESH_RECEIVER_SIZE 6
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 781624f5913a..820e299f06b5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork,
* after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
* use just cancel_work() instead of cancel_work_sync(), it requires
* being in a section protected by wiphy_lock().
+ *
+ * Note that these are scheduled with a timer where the accuracy
+ * becomes less the longer in the future the scheduled timer is. Use
+ * wiphy_hrtimer_work_queue() if the timer must be not be late by more
+ * than approximately 10 percent.
*/
void wiphy_delayed_work_queue(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork,
@@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
bool wiphy_delayed_work_pending(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork);
+struct wiphy_hrtimer_work {
+ struct wiphy_work work;
+ struct wiphy *wiphy;
+ struct hrtimer timer;
+};
+
+enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t);
+
+static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork,
+ wiphy_work_func_t func)
+{
+ hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer,
+ CLOCK_BOOTTIME, HRTIMER_MODE_REL);
+ wiphy_work_init(&hrwork->work, func);
+}
+
+/**
+ * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @hrwork: the high resolution timer worker
+ * @delay: the delay given as a ktime_t
+ *
+ * Please refer to wiphy_delayed_work_queue(). The difference is that
+ * the hrtimer work uses a high resolution timer for scheduling. This
+ * may be needed if timeouts might be scheduled further in the future
+ * and the accuracy of the normal timer is not sufficient.
+ *
+ * Expect a delay of a few milliseconds as the timer is scheduled
+ * with some slack and some more time may pass between queueing the
+ * work and its start.
+ */
+void wiphy_hrtimer_work_queue(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork,
+ ktime_t delay);
+
+/**
+ * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work
+ * @wiphy: the wiphy, for debug purposes
+ * @hrtimer: the hrtimer work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_hrtimer_work_cancel(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrtimer);
+
+/**
+ * wiphy_hrtimer_work_flush - flush previously queued hrtimer work
+ * @wiphy: the wiphy, for debug purposes
+ * @hrwork: the hrtimer work to flush
+ *
+ * Flush the work (i.e. run it if pending). This must be called
+ * under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_hrtimer_work_flush(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork);
+
+/**
+ * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer
+ * work item is currently pending.
+ *
+ * @wiphy: the wiphy, for debug purposes
+ * @hrwork: the hrtimer work in question
+ *
+ * Return: true if timer is pending, false otherwise
+ *
+ * Please refer to the wiphy_delayed_work_pending() documentation as
+ * this is the equivalent function for hrtimer based delayed work
+ * items.
+ */
+bool wiphy_hrtimer_work_pending(struct wiphy *wiphy,
+ struct wiphy_hrtimer_work *hrwork);
+
/**
* enum ieee80211_ap_reg_power - regulatory power for an Access Point
*
diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h
index bc3507edd589..898723ab62e8 100644
--- a/include/net/libeth/xdp.h
+++ b/include/net/libeth/xdp.h
@@ -513,7 +513,7 @@ struct libeth_xdp_tx_desc {
* can't fail, but can send less frames if there's no enough free descriptors
* available. The actual free space is returned by @prep from the driver.
*/
-static __always_inline u32
+static __always_inline __nocfi_generic u32
libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq,
u32 n, bool unroll, u64 priv,
u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index c64fd896b1f9..99ac747b7906 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -536,6 +536,8 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
case TCF_LAYER_NETWORK:
return skb_network_header(skb);
case TCF_LAYER_TRANSPORT:
+ if (!skb_transport_header_was_set(skb))
+ break;
return skb_transport_header(skb);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5ca230ed526a..ab20f549b8f9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
-void tcp_rcvbuf_grow(struct sock *sk);
+void tcp_rcvbuf_grow(struct sock *sk, u32 newval);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
void tcp_twsk_destructor(struct sock *sk);
diff --git a/include/net/tls.h b/include/net/tls.h
index 857340338b69..c7bcdb3afad7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -451,25 +451,26 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
/* Log all TLS record header TCP sequences in [seq, seq+len] */
static inline void
-tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
+tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async,
+ __be32 seq, u16 len)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
-
- atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
+ atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) |
((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
- rx_ctx->resync_async->loglen = 0;
- rx_ctx->resync_async->rcd_delta = 0;
+ resync_async->loglen = 0;
+ resync_async->rcd_delta = 0;
}
static inline void
-tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq)
+tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async,
+ __be32 seq)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+ atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ);
+}
- atomic64_set(&rx_ctx->resync_async->req,
- ((u64)ntohl(seq) << 32) | RESYNC_REQ);
+static inline void
+tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async)
+{
+ atomic64_set(&resync_async->req, 0);
}
static inline void
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f3014e4f54fc..0a14daaa5dd4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -536,7 +536,8 @@ static inline int xfrm_af2proto(unsigned int family)
static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
{
- if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
+ if ((x->sel.family != AF_UNSPEC) ||
+ (ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
(ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6))
return &x->inner_mode;
else
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6d6500148c4b..993008cdea65 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -252,8 +252,8 @@ struct scsi_device {
unsigned int queue_stopped; /* request queue is quiesced */
bool offline_already; /* Device offline message logged */
- unsigned int ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */
- unsigned int ua_por_ctr; /* Counter for Power On / Reset UAs */
+ atomic_t ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */
+ atomic_t ua_por_ctr; /* Counter for Power On / Reset UAs */
atomic_t disk_events_disable_depth; /* disable depth for disk events */
@@ -693,10 +693,8 @@ static inline int scsi_device_busy(struct scsi_device *sdev)
}
/* Macros to access the UNIT ATTENTION counters */
-#define scsi_get_ua_new_media_ctr(sdev) \
- ((const unsigned int)(sdev->ua_new_media_ctr))
-#define scsi_get_ua_por_ctr(sdev) \
- ((const unsigned int)(sdev->ua_por_ctr))
+#define scsi_get_ua_new_media_ctr(sdev) atomic_read(&sdev->ua_new_media_ctr)
+#define scsi_get_ua_por_ctr(sdev) atomic_read(&sdev->ua_por_ctr)
#define MODULE_ALIAS_SCSI_DEVICE(type) \
MODULE_ALIAS("scsi:t-" __stringify(type) "*")
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 9d2c36c6a0ed..6757233bd064 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow,
__field(__u32, space)
__field(__u32, ooo_space)
__field(__u32, rcvbuf)
+ __field(__u32, rcv_ssthresh)
+ __field(__u32, window_clamp)
+ __field(__u32, rcv_wnd)
__field(__u8, scaling_ratio)
__field(__u16, sport)
__field(__u16, dport)
@@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow,
tp->rcv_nxt;
__entry->rcvbuf = sk->sk_rcvbuf;
+ __entry->rcv_ssthresh = tp->rcv_ssthresh;
+ __entry->window_clamp = tp->window_clamp;
+ __entry->rcv_wnd = tp->rcv_wnd;
__entry->scaling_ratio = tp->scaling_ratio;
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
@@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow,
),
TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u "
+ "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u "
"family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 "
"saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx",
__entry->time, __entry->rtt_us, __entry->copied,
__entry->inq, __entry->space, __entry->ooo_space,
__entry->scaling_ratio, __entry->rcvbuf,
+ __entry->rcv_ssthresh, __entry->window_clamp,
+ __entry->rcv_wnd,
show_family_name(__entry->family),
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index c08aff044e80..311a341e6fe4 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -120,7 +120,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
/* may be called for files on pseudo FSes w/ unregistered bdi */
strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->flags = flags;
),
@@ -748,7 +748,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->dirtied_when = inode->dirtied_when;
__entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
),
@@ -787,7 +787,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
strscpy_pad(__entry->name,
bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->dirtied_when = inode->dirtied_when;
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->nr_to_write = nr_to_write;
@@ -839,7 +839,7 @@ DECLARE_EVENT_CLASS(writeback_inode_template,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->state = inode->i_state;
+ __entry->state = inode_state_read_once(inode);
__entry->mode = inode->i_mode;
__entry->dirtied_when = inode->dirtied_when;
),
diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h
index b5f7594eee7a..0a90ad92dbf3 100644
--- a/include/uapi/asm-generic/posix_types.h
+++ b/include/uapi/asm-generic/posix_types.h
@@ -86,6 +86,7 @@ typedef struct {
*/
typedef __kernel_long_t __kernel_off_t;
typedef long long __kernel_loff_t;
+typedef unsigned long long __kernel_uoff_t;
typedef __kernel_long_t __kernel_old_time_t;
#ifndef __KERNEL__
typedef __kernel_long_t __kernel_time_t;
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 04e0077fb4c9..942370b3f5d2 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -857,9 +857,11 @@ __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
__SYSCALL(__NR_file_getattr, sys_file_getattr)
#define __NR_file_setattr 469
__SYSCALL(__NR_file_setattr, sys_file_setattr)
+#define __NR_listns 470
+__SYSCALL(__NR_listns, sys_listns)
#undef __NR_syscalls
-#define __NR_syscalls 470
+#define __NR_syscalls 471
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index ea91aa8afde9..e527b24bd824 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -979,14 +979,20 @@ extern "C" {
* 2 = Gob Height 8, Turing+ Page Kind mapping
* 3 = Reserved for future use.
*
- * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further
- * bit remapping step that occurs at an even lower level than the
- * page kind and block linear swizzles. This causes the layout of
- * surfaces mapped in those SOC's GPUs to be incompatible with the
- * equivalent mapping on other GPUs in the same system.
- *
- * 0 = Tegra K1 - Tegra Parker/TX2 Layout.
- * 1 = Desktop GPU and Tegra Xavier+ Layout
+ * 22:22 s Sector layout. There is a further bit remapping step that occurs
+ * 26:27 at an even lower level than the page kind and block linear
+ * swizzles. This causes the bit arrangement of surfaces in memory
+ * to differ subtly, and prevents direct sharing of surfaces between
+ * GPUs with different layouts.
+ *
+ * 0 = Tegra K1 - Tegra Parker/TX2 Layout
+ * 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout
+ * 2 = GB20x(Blackwell 2)+ 8 bpp surface layout
+ * 3 = GB20x(Blackwell 2)+ 16 bpp surface layout
+ * 4 = Reserved for future use.
+ * 5 = Reserved for future use.
+ * 6 = Reserved for future use.
+ * 7 = Reserved for future use.
*
* 25:23 c Lossless Framebuffer Compression type.
*
@@ -1001,7 +1007,7 @@ extern "C" {
* 6 = Reserved for future use
* 7 = Reserved for future use
*
- * 55:25 - Reserved for future use. Must be zero.
+ * 55:28 - Reserved for future use. Must be zero.
*/
#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
fourcc_mod_code(NVIDIA, (0x10 | \
@@ -1009,6 +1015,7 @@ extern "C" {
(((k) & 0xff) << 12) | \
(((g) & 0x3) << 20) | \
(((s) & 0x1) << 22) | \
+ (((s) & 0x6) << 25) | \
(((c) & 0x7) << 23)))
/* To grandfather in prior block linear format modifiers to the above layout,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 40ff19f52a8d..517489a7ec60 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1013,6 +1013,20 @@ struct drm_xe_vm_destroy {
* valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address
* mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
* handle MBZ, and the BO offset MBZ.
+ * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with
+ * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying
+ * CPU address space range is unmapped (typically with munmap(2) or brk(2)).
+ * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values
+ * that were present immediately after the &DRM_IOCTL_XE_VM_BIND.
+ * The reset GPU virtual address range is the intersection of the range bound
+ * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range
+ * unmapped.
+ * This functionality is present to mimic the behaviour of CPU address space
+ * madvises set using madvise(2), which are typically reset on unmap.
+ * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus
+ * not invoke autoreset. Neither will stack variables going out of scope.
+ * Therefore it's recommended to always explicitly reset the madvises when
+ * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call.
*
* The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be:
* - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in
@@ -1119,6 +1133,7 @@ struct drm_xe_vm_bind_op {
#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4)
#define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5)
+#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6)
/** @flags: Bind flags */
__u32 flags;
diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h
index cde8f173f566..22acaaec7b1c 100644
--- a/include/uapi/linux/fb.h
+++ b/include/uapi/linux/fb.h
@@ -319,7 +319,7 @@ enum {
#define FB_VBLANK_HAVE_VCOUNT 0x020 /* the vcount field is valid */
#define FB_VBLANK_HAVE_HCOUNT 0x040 /* the hcount field is valid */
#define FB_VBLANK_VSYNCING 0x080 /* currently in a vsync */
-#define FB_VBLANK_HAVE_VSYNC 0x100 /* verical syncs can be detected */
+#define FB_VBLANK_HAVE_VSYNC 0x100 /* vertical syncs can be detected */
struct fb_vblank {
__u32 flags; /* FB_VBLANK flags */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 3741ea1b73d8..5e277fd955aa 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -4,6 +4,11 @@
#include <asm/fcntl.h>
#include <linux/openat2.h>
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
@@ -79,6 +84,17 @@
*/
#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
+/* Set/Get delegations */
+#define F_GETDELEG (F_LINUX_SPECIFIC_BASE + 15)
+#define F_SETDELEG (F_LINUX_SPECIFIC_BASE + 16)
+
+/* Argument structure for F_GETDELEG and F_SETDELEG */
+struct delegation {
+ uint32_t d_flags; /* Must be 0 */
+ uint16_t d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */
+ uint16_t __pad; /* Must be 0 */
+};
+
/*
* Types of directory notifications that may be requested.
*/
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 4a9fbf42aa9f..30f3c9eaafaa 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -27,7 +27,7 @@
#define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */
#define INPUT_PROP_POINTING_STICK 0x05 /* is a pointing stick */
#define INPUT_PROP_ACCELEROMETER 0x06 /* has accelerometer */
-#define INPUT_PROP_HAPTIC_TOUCHPAD 0x07 /* is a haptic touchpad */
+#define INPUT_PROP_PRESSUREPAD 0x07 /* pressure triggers clicks */
#define INPUT_PROP_MAX 0x1f
#define INPUT_PROP_CNT (INPUT_PROP_MAX + 1)
@@ -631,6 +631,18 @@
#define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */
#define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */
+/*
+ * Keycodes for hotkeys toggling the electronic privacy screen found on some
+ * laptops on/off. Note when the embedded-controller turns on/off the eprivacy
+ * screen itself then the state should be reported through drm connecter props:
+ * https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties
+ * Except when implementing the drm connecter properties API is not possible
+ * because e.g. the firmware does not allow querying the presence and/or status
+ * of the eprivacy screen at boot.
+ */
+#define KEY_EPRIVACY_SCREEN_ON 0x252
+#define KEY_EPRIVACY_SCREEN_OFF 0x253
+
#define KEY_KBDINPUTASSIST_PREV 0x260
#define KEY_KBDINPUTASSIST_NEXT 0x261
#define KEY_KBDINPUTASSIST_PREVGROUP 0x262
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 263bed13473e..b7c8dad26690 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -689,9 +689,6 @@ enum io_uring_register_op {
/* query various aspects of io_uring, see linux/io_uring/query.h */
IORING_REGISTER_QUERY = 35,
- /* return zcrx buffers back into circulation */
- IORING_REGISTER_ZCRX_REFILL = 36,
-
/* this goes last */
IORING_REGISTER_LAST,
@@ -1073,15 +1070,6 @@ struct io_uring_zcrx_ifq_reg {
__u64 __resv[3];
};
-struct io_uring_zcrx_sync_refill {
- __u32 zcrx_id;
- /* the number of entries to return */
- __u32 nr_entries;
- /* pointer to an array of struct io_uring_zcrx_rqe */
- __u64 rqes;
- __u64 __resv[2];
-};
-
#ifdef __cplusplus
}
#endif
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
index 5d754322a27c..3539ccbfd064 100644
--- a/include/uapi/linux/io_uring/query.h
+++ b/include/uapi/linux/io_uring/query.h
@@ -36,6 +36,9 @@ struct io_uring_query_opcode {
__u64 enter_flags;
/* Bitmask of all supported IOSQE_* flags */
__u64 sqe_flags;
+ /* The number of available query opcodes */
+ __u32 nr_query_opcodes;
+ __u32 __pad;
};
#endif
diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h
index 8197a4800604..40aa545101a3 100644
--- a/include/uapi/linux/isst_if.h
+++ b/include/uapi/linux/isst_if.h
@@ -52,7 +52,7 @@ struct isst_if_cpu_map {
/**
* struct isst_if_cpu_maps - structure for CPU map IOCTL
* @cmd_count: Number of CPU mapping command in cpu_map[]
- * @cpu_map[]: Holds one or more CPU map data structure
+ * @cpu_map: Holds one or more CPU map data structure
*
* This structure used with ioctl ISST_IF_GET_PHY_ID to send
* one or more CPU mapping commands. Here IOCTL return value indicates
@@ -82,8 +82,8 @@ struct isst_if_io_reg {
/**
* struct isst_if_io_regs - structure for IO register commands
- * @cmd_count: Number of io reg commands in io_reg[]
- * @io_reg[]: Holds one or more io_reg command structure
+ * @req_count: Number of io reg commands in io_reg[]
+ * @io_reg: Holds one or more io_reg command structure
*
* This structure used with ioctl ISST_IF_IO_CMD to send
* one or more read/write commands to PUNIT. Here IOCTL return value
@@ -120,7 +120,7 @@ struct isst_if_mbox_cmd {
/**
* struct isst_if_mbox_cmds - structure for mailbox commands
* @cmd_count: Number of mailbox commands in mbox_cmd[]
- * @mbox_cmd[]: Holds one or more mbox commands
+ * @mbox_cmd: Holds one or more mbox commands
*
* This structure used with ioctl ISST_IF_MBOX_COMMAND to send
* one or more mailbox commands to PUNIT. Here IOCTL return value
@@ -152,7 +152,7 @@ struct isst_if_msr_cmd {
/**
* struct isst_if_msr_cmds - structure for msr commands
* @cmd_count: Number of mailbox commands in msr_cmd[]
- * @msr_cmd[]: Holds one or more msr commands
+ * @msr_cmd: Holds one or more msr commands
*
* This structure used with ioctl ISST_IF_MSR_COMMAND to send
* one or more MSR commands. IOCTL return value indicates number of
@@ -167,8 +167,9 @@ struct isst_if_msr_cmds {
* struct isst_core_power - Structure to get/set core_power feature
* @get_set: 0: Get, 1: Set
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @enable: Feature enable status
+ * @supported: Power domain supports SST_CP interface
* @priority_type: Priority type for the feature (ordered/proportional)
*
* Structure to get/set core_power feature state using IOCTL
@@ -187,11 +188,11 @@ struct isst_core_power {
* struct isst_clos_param - Structure to get/set clos praram
* @get_set: 0: Get, 1: Set
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
- * clos: Clos ID for the parameters
- * min_freq_mhz: Minimum frequency in MHz
- * max_freq_mhz: Maximum frequency in MHz
- * prop_prio: Proportional priority from 0-15
+ * @power_domain_id: Power Domain id
+ * @clos: Clos ID for the parameters
+ * @min_freq_mhz: Minimum frequency in MHz
+ * @max_freq_mhz: Maximum frequency in MHz
+ * @prop_prio: Proportional priority from 0-15
*
* Structure to get/set per clos property using IOCTL
* ISST_IF_CLOS_PARAM.
@@ -209,7 +210,7 @@ struct isst_clos_param {
/**
* struct isst_if_clos_assoc - Structure to assign clos to a CPU
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @logical_cpu: CPU number
* @clos: Clos ID to assign to the logical CPU
*
@@ -228,6 +229,7 @@ struct isst_if_clos_assoc {
* @get_set: Request is for get or set
* @punit_cpu_map: Set to 1 if the CPU number is punit numbering not
* Linux CPU number
+ * @assoc_info: CLOS data for this CPU
*
* Structure used to get/set associate CPUs to clos using IOCTL
* ISST_IF_CLOS_ASSOC.
@@ -257,7 +259,7 @@ struct isst_tpmi_instance_count {
/**
* struct isst_perf_level_info - Structure to get information on SST-PP levels
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @logical_cpu: CPU number
* @clos: Clos ID to assign to the logical CPU
* @max_level: Maximum performance level supported by the platform
@@ -267,8 +269,8 @@ struct isst_tpmi_instance_count {
* @feature_state: SST-BF and SST-TF (enabled/disabled) status at current level
* @locked: SST-PP performance level change is locked/unlocked
* @enabled: SST-PP feature is enabled or not
- * @sst-tf_support: SST-TF support status at this level
- * @sst-bf_support: SST-BF support status at this level
+ * @sst_tf_support: SST-TF support status at this level
+ * @sst_bf_support: SST-BF support status at this level
*
* Structure to get SST-PP details using IOCTL ISST_IF_PERF_LEVELS.
*/
@@ -289,7 +291,7 @@ struct isst_perf_level_info {
/**
* struct isst_perf_level_control - Structure to set SST-PP level
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @level: level to set
*
* Structure used change SST-PP level using IOCTL ISST_IF_PERF_SET_LEVEL.
@@ -303,7 +305,7 @@ struct isst_perf_level_control {
/**
* struct isst_perf_feature_control - Structure to activate SST-BF/SST-TF
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @feature: bit 0 = SST-BF state, bit 1 = SST-TF state
*
* Structure used to enable SST-BF/SST-TF using IOCTL ISST_IF_PERF_SET_FEATURE.
@@ -320,7 +322,7 @@ struct isst_perf_feature_control {
/**
* struct isst_perf_level_data_info - Structure to get SST-PP level details
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @level: SST-PP level for which caller wants to get information
* @tdp_ratio: TDP Ratio
* @base_freq_mhz: Base frequency in MHz
@@ -341,8 +343,8 @@ struct isst_perf_feature_control {
* @pm_fabric_freq_mhz: Fabric (Uncore) minimum frequency
* @max_buckets: Maximum trl buckets
* @max_trl_levels: Maximum trl levels
- * @bucket_core_counts[TRL_MAX_BUCKETS]: Number of cores per bucket
- * @trl_freq_mhz[TRL_MAX_LEVELS][TRL_MAX_BUCKETS]: maximum frequency
+ * @bucket_core_counts: Number of cores per bucket
+ * @trl_freq_mhz: maximum frequency
* for a bucket and trl level
*
* Structure used to get information on frequencies and TDP for a SST-PP
@@ -402,7 +404,7 @@ struct isst_perf_level_fabric_info {
/**
* struct isst_perf_level_cpu_mask - Structure to get SST-PP level CPU mask
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @level: SST-PP level for which caller wants to get information
* @punit_cpu_map: Set to 1 if the CPU number is punit numbering not
* Linux CPU number. If 0 CPU buffer is copied to user space
@@ -430,7 +432,7 @@ struct isst_perf_level_cpu_mask {
/**
* struct isst_base_freq_info - Structure to get SST-BF frequencies
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @level: SST-PP level for which caller wants to get information
* @high_base_freq_mhz: High priority CPU base frequency
* @low_base_freq_mhz: Low priority CPU base frequency
@@ -453,9 +455,11 @@ struct isst_base_freq_info {
/**
* struct isst_turbo_freq_info - Structure to get SST-TF frequencies
* @socket_id: Socket/package id
- * @power_domain: Power Domain id
+ * @power_domain_id: Power Domain id
* @level: SST-PP level for which caller wants to get information
* @max_clip_freqs: Maximum number of low priority core clipping frequencies
+ * @max_buckets: Maximum trl buckets
+ * @max_trl_levels: Maximum trl levels
* @lp_clip_freq_mhz: Clip frequencies per trl level
* @bucket_core_counts: Maximum number of cores for a bucket
* @trl_freq_mhz: Frequencies per trl level for each bucket
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 7fa67c2031a5..5d3f8c9e3a62 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -197,7 +197,7 @@ struct statmount {
*/
struct mnt_id_req {
__u32 size;
- __u32 spare;
+ __u32 mnt_ns_fd;
__u64 mnt_id;
__u64 param;
__u64 mnt_ns_id;
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index e098759ec917..a25e38d1c874 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -67,4 +67,62 @@ struct nsfs_file_handle {
#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+enum init_ns_id {
+ IPC_NS_INIT_ID = 1ULL,
+ UTS_NS_INIT_ID = 2ULL,
+ USER_NS_INIT_ID = 3ULL,
+ PID_NS_INIT_ID = 4ULL,
+ CGROUP_NS_INIT_ID = 5ULL,
+ TIME_NS_INIT_ID = 6ULL,
+ NET_NS_INIT_ID = 7ULL,
+ MNT_NS_INIT_ID = 8ULL,
+#ifdef __KERNEL__
+ NS_LAST_INIT_ID = MNT_NS_INIT_ID,
+#endif
+};
+
+enum ns_type {
+ TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
+ MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
+ CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
+ UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
+ IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
+ USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
+ PID_NS = (1ULL << 29), /* CLONE_NEWPID */
+ NET_NS = (1ULL << 30), /* CLONE_NEWNET */
+};
+
+/**
+ * struct ns_id_req - namespace ID request structure
+ * @size: size of this structure
+ * @spare: reserved for future use
+ * @filter: filter mask
+ * @ns_id: last namespace id
+ * @user_ns_id: owning user namespace ID
+ *
+ * Structure for passing namespace ID and miscellaneous parameters to
+ * statns(2) and listns(2).
+ *
+ * For statns(2) @param represents the request mask.
+ * For listns(2) @param represents the last listed mount id (or zero).
+ */
+struct ns_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 ns_id;
+ struct /* listns */ {
+ __u32 ns_type;
+ __u32 spare2;
+ __u64 user_ns_id;
+ };
+};
+
+/*
+ * Special @user_ns_id value that can be passed to listns()
+ */
+#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
+
+/* List of all ns_id_req versions. */
+#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
+
#endif /* __LINUX_NSFS_H */
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 957db425d459..ea9a6811fc76 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -26,8 +26,12 @@
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
#define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */
+#define PIDFD_INFO_SUPPORTED_MASK (1UL << 5) /* Want/got supported mask flags */
+#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6) /* Always returned if PIDFD_INFO_COREDUMP is requested. */
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
+#define PIDFD_INFO_SIZE_VER1 72 /* sizeof second published struct */
+#define PIDFD_INFO_SIZE_VER2 80 /* sizeof third published struct */
/*
* Values for @coredump_mask in pidfd_info.
@@ -91,8 +95,11 @@ struct pidfd_info {
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
- __u32 coredump_mask;
- __u32 __spare1;
+ struct /* coredump info */ {
+ __u32 coredump_mask;
+ __u32 coredump_signal;
+ };
+ __u64 supported_mask; /* Mask flags that this kernel supports */
};
#define PIDFS_IOCTL_MAGIC 0xFF
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
index 386ad36f1a0a..cab5cadca8ef 100644
--- a/include/uapi/linux/tee.h
+++ b/include/uapi/linux/tee.h
@@ -249,8 +249,9 @@ struct tee_ioctl_param {
* @cancel_id: [in] Cancellation id, a unique value to identify this request
* @session: [out] Session id
* @ret: [out] return value
- * @ret_origin [out] origin of the return value
- * @num_params [in] number of parameters following this struct
+ * @ret_origin: [out] origin of the return value
+ * @num_params: [in] number of &struct tee_ioctl_param entries in @params
+ * @params: array of ioctl parameters
*/
struct tee_ioctl_open_session_arg {
__u8 uuid[TEE_IOCTL_UUID_LEN];
@@ -276,14 +277,14 @@ struct tee_ioctl_open_session_arg {
struct tee_ioctl_buf_data)
/**
- * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted
- * Application
+ * struct tee_ioctl_invoke_arg - Invokes a function in a Trusted Application
* @func: [in] Trusted Application function, specific to the TA
* @session: [in] Session id
* @cancel_id: [in] Cancellation id, a unique value to identify this request
* @ret: [out] return value
- * @ret_origin [out] origin of the return value
- * @num_params [in] number of parameters following this struct
+ * @ret_origin: [out] origin of the return value
+ * @num_params: [in] number of parameters following this struct
+ * @params: array of ioctl parameters
*/
struct tee_ioctl_invoke_arg {
__u32 func;
@@ -338,7 +339,8 @@ struct tee_ioctl_close_session_arg {
/**
* struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function
* @func: [in] supplicant function
- * @num_params [in/out] number of parameters following this struct
+ * @num_params: [in/out] number of &struct tee_ioctl_param entries in @params
+ * @params: array of ioctl parameters
*
* @num_params is the number of params that tee-supplicant has room to
* receive when input, @num_params is the number of actual params
@@ -363,7 +365,8 @@ struct tee_iocl_supp_recv_arg {
/**
* struct tee_iocl_supp_send_arg - Send a response to a received request
* @ret: [out] return value
- * @num_params [in] number of parameters following this struct
+ * @num_params: [in] number of &struct tee_ioctl_param entries in @params
+ * @params: array of ioctl parameters
*/
struct tee_iocl_supp_send_arg {
__u32 ret;
@@ -454,11 +457,13 @@ struct tee_ioctl_shm_register_fd_data {
*/
/**
- * struct tee_ioctl_invoke_func_arg - Invokes an object in a Trusted Application
+ * struct tee_ioctl_object_invoke_arg - Invokes an object in a
+ * Trusted Application
* @id: [in] Object id
* @op: [in] Object operation, specific to the object
* @ret: [out] return value
* @num_params: [in] number of parameters following this struct
+ * @params: array of ioctl parameters
*/
struct tee_ioctl_object_invoke_arg {
__u64 id;
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 8bf27ab8bcb4..1db45b01532b 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -193,7 +193,8 @@ struct virtio_net_hdr_v1 {
struct virtio_net_hdr_v1_hash {
struct virtio_net_hdr_v1 hdr;
- __le32 hash_value;
+ __le16 hash_value_lo;
+ __le16 hash_value_hi;
#define VIRTIO_NET_HASH_REPORT_NONE 0
#define VIRTIO_NET_HASH_REPORT_IPv4 1
#define VIRTIO_NET_HASH_REPORT_TCPv4 2
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9425cfd9d00e..0f95576bf1f6 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -688,6 +688,13 @@ enum ufshcd_quirks {
* single doorbell mode.
*/
UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25,
+
+ /*
+ * This quirk indicates that DME_LINKSTARTUP should not be issued a 2nd
+ * time (refer link_startup_again) after the 1st time was successful,
+ * because it causes link startup to become unreliable.
+ */
+ UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE = 1 << 26,
};
enum ufshcd_caps {