summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-16 13:00:36 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-16 13:00:36 -0800
commit45a43ac5acc90b8f4835eea92692f620e561a06b (patch)
tree254f816efd2f9ce1363509520921d7a88192ac56 /include
parent543b9b63394ee67ecf5298fe42cbe65b21a16eac (diff)
parentdedfae78f00960d703badc500422d10e1f12b2bc (diff)
Merge tag 'vfs-7.0-rc1.misc.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull more misc vfs updates from Christian Brauner: "Features: - Optimize close_range() from O(range size) to O(active FDs) by using find_next_bit() on the open_fds bitmap instead of linearly scanning the entire requested range. This is a significant improvement for large-range close operations on sparse file descriptor tables. - Add FS_XFLAG_VERITY file attribute for fs-verity files, retrievable via FS_IOC_FSGETXATTR and file_getattr(). The flag is read-only. Add tracepoints for fs-verity enable and verify operations, replacing the previously removed debug printk's. - Prevent nfsd from exporting special kernel filesystems like pidfs and nsfs. These filesystems have custom ->open() and ->permission() export methods that are designed for open_by_handle_at(2) only and are incompatible with nfsd. Update the exportfs documentation accordingly. Fixes: - Fix KMSAN uninit-value in ovl_fill_real() where strcmp() was used on a non-null-terminated decrypted directory entry name from fscrypt. This triggered on encrypted lower layers when the decrypted name buffer contained uninitialized tail data. The fix also adds VFS-level name_is_dot(), name_is_dotdot(), and name_is_dot_dotdot() helpers, replacing various open-coded "." and ".." checks across the tree. - Fix read-only fsflags not being reset together with xflags in vfs_fileattr_set(). Currently harmless since no read-only xflags overlap with flags, but this would cause inconsistencies for any future shared read-only flag - Return -EREMOTE instead of -ESRCH from PIDFD_GET_INFO when the target process is in a different pid namespace. This lets userspace distinguish "process exited" from "process in another namespace", matching glibc's pidfd_getpid() behavior Cleanups: - Use C-string literals in the Rust seq_file bindings, replacing the kernel::c_str!() macro (available since Rust 1.77) - Fix typo in d_walk_ret enum comment, add porting notes for the readlink_copy() calling convention change" * tag 'vfs-7.0-rc1.misc.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: add porting notes about readlink_copy() pidfs: return -EREMOTE when PIDFD_GET_INFO is called on another ns nfsd: do not allow exporting of special kernel filesystems exportfs: clarify the documentation of open()/permission() expotrfs ops fsverity: add tracepoints fs: add FS_XFLAG_VERITY for fs-verity files rust: seq_file: replace `kernel::c_str!` with C-Strings fs: dcache: fix typo in enum d_walk_ret comment ovl: use name_is_dot* helpers in readdir code fs: add helpers name_is_dot{,dot,_dotdot} ovl: Fix uninit-value in ovl_fill_real fs: reset read-only fsflags together with xflags fs/file: optimize close_range() complexity from O(N) to O(Sparse)
Diffstat (limited to 'include')
-rw-r--r--include/linux/exportfs.h21
-rw-r--r--include/linux/fileattr.h6
-rw-r--r--include/linux/fs.h14
-rw-r--r--include/trace/events/fsverity.h146
-rw-r--r--include/uapi/linux/fs.h1
5 files changed, 181 insertions, 7 deletions
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 262e24d83313..8bcdba28b406 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -200,6 +200,10 @@ struct handle_to_path_ctx {
* @get_parent: find the parent of a given directory
* @commit_metadata: commit metadata changes to stable storage
*
+ * Methods for open_by_handle(2) syscall with special kernel file systems:
+ * @permission: custom permission for opening a file by handle
+ * @open: custom open routine for opening file by handle
+ *
* See Documentation/filesystems/nfs/exporting.rst for details on how to use
* this interface correctly and the definition of the flags.
*
@@ -244,10 +248,14 @@ struct handle_to_path_ctx {
* space cannot be allocated, a %ERR_PTR should be returned.
*
* @permission:
- * Allow filesystems to specify a custom permission function.
+ * Allow filesystems to specify a custom permission function for the
+ * open_by_handle_at(2) syscall instead of the default permission check.
+ * This custom permission function is not respected by nfsd.
*
* @open:
- * Allow filesystems to specify a custom open function.
+ * Allow filesystems to specify a custom open function for the
+ * open_by_handle_at(2) syscall instead of the default file_open_root().
+ * This custom open function is not respected by nfsd.
*
* @commit_metadata:
* @commit_metadata should commit metadata changes to stable storage.
@@ -330,6 +338,15 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop)
return nop && nop->fh_to_dentry;
}
+static inline bool exportfs_may_export(const struct export_operations *nop)
+{
+ /*
+ * Do not allow nfs export for filesystems with custom ->open() or
+ * ->permission() ops, which nfsd does not respect (e.g. pidfs, nsfs).
+ */
+ return exportfs_can_decode_fh(nop) && !nop->open && !nop->permission;
+}
+
static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
int fh_flags)
{
diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h
index f89dcfad3f8f..3780904a63a6 100644
--- a/include/linux/fileattr.h
+++ b/include/linux/fileattr.h
@@ -7,16 +7,16 @@
#define FS_COMMON_FL \
(FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NODUMP_FL | FS_NOATIME_FL | FS_DAX_FL | \
- FS_PROJINHERIT_FL)
+ FS_PROJINHERIT_FL | FS_VERITY_FL)
#define FS_XFLAG_COMMON \
(FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \
FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \
- FS_XFLAG_PROJINHERIT)
+ FS_XFLAG_PROJINHERIT | FS_XFLAG_VERITY)
/* Read-only inode flags */
#define FS_XFLAG_RDONLY_MASK \
- (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR)
+ (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY)
/* Flags to indicate valid value of fsx_ fields */
#define FS_XFLAG_VALUES_MASK \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2e4d1e8b0e71..a2af5ddd5323 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2872,12 +2872,22 @@ u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
extern char *file_path(struct file *, char *, int);
+static inline bool name_is_dot(const char *name, size_t len)
+{
+ return unlikely(len == 1 && name[0] == '.');
+}
+
+static inline bool name_is_dotdot(const char *name, size_t len)
+{
+ return unlikely(len == 2 && name[0] == '.' && name[1] == '.');
+}
+
/**
- * is_dot_dotdot - returns true only if @name is "." or ".."
+ * name_is_dot_dotdot - returns true only if @name is "." or ".."
* @name: file name to check
* @len: length of file name, in bytes
*/
-static inline bool is_dot_dotdot(const char *name, size_t len)
+static inline bool name_is_dot_dotdot(const char *name, size_t len)
{
return len && unlikely(name[0] == '.') &&
(len == 1 || (len == 2 && name[1] == '.'));
diff --git a/include/trace/events/fsverity.h b/include/trace/events/fsverity.h
new file mode 100644
index 000000000000..a8c52f21cbd5
--- /dev/null
+++ b/include/trace/events/fsverity.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fsverity
+
+#if !defined(_TRACE_FSVERITY_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FSVERITY_H
+
+#include <linux/tracepoint.h>
+
+struct fsverity_descriptor;
+struct merkle_tree_params;
+struct fsverity_info;
+
+TRACE_EVENT(fsverity_enable,
+ TP_PROTO(const struct inode *inode,
+ const struct merkle_tree_params *params),
+ TP_ARGS(inode, params),
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(u64, data_size)
+ __field(u64, tree_size)
+ __field(unsigned int, merkle_block)
+ __field(unsigned int, num_levels)
+ ),
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->data_size = i_size_read(inode);
+ __entry->tree_size = params->tree_size;
+ __entry->merkle_block = params->block_size;
+ __entry->num_levels = params->num_levels;
+ ),
+ TP_printk("ino %lu data_size %llu tree_size %llu merkle_block %u levels %u",
+ (unsigned long) __entry->ino,
+ __entry->data_size,
+ __entry->tree_size,
+ __entry->merkle_block,
+ __entry->num_levels)
+);
+
+TRACE_EVENT(fsverity_tree_done,
+ TP_PROTO(const struct inode *inode, const struct fsverity_info *vi,
+ const struct merkle_tree_params *params),
+ TP_ARGS(inode, vi, params),
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(u64, data_size)
+ __field(u64, tree_size)
+ __field(unsigned int, merkle_block)
+ __field(unsigned int, levels)
+ __dynamic_array(u8, root_hash, params->digest_size)
+ __dynamic_array(u8, file_digest, params->digest_size)
+ ),
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->data_size = i_size_read(inode);
+ __entry->tree_size = params->tree_size;
+ __entry->merkle_block = params->block_size;
+ __entry->levels = params->num_levels;
+ memcpy(__get_dynamic_array(root_hash), vi->root_hash, __get_dynamic_array_len(root_hash));
+ memcpy(__get_dynamic_array(file_digest), vi->file_digest, __get_dynamic_array_len(file_digest));
+ ),
+ TP_printk("ino %lu data_size %llu tree_size %lld merkle_block %u levels %u root_hash %s digest %s",
+ (unsigned long) __entry->ino,
+ __entry->data_size,
+ __entry->tree_size,
+ __entry->merkle_block,
+ __entry->levels,
+ __print_hex_str(__get_dynamic_array(root_hash), __get_dynamic_array_len(root_hash)),
+ __print_hex_str(__get_dynamic_array(file_digest), __get_dynamic_array_len(file_digest)))
+);
+
+TRACE_EVENT(fsverity_verify_data_block,
+ TP_PROTO(const struct inode *inode,
+ const struct merkle_tree_params *params,
+ u64 data_pos),
+ TP_ARGS(inode, params, data_pos),
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(u64, data_pos)
+ __field(unsigned int, merkle_block)
+ ),
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->data_pos = data_pos;
+ __entry->merkle_block = params->block_size;
+ ),
+ TP_printk("ino %lu data_pos %llu merkle_block %u",
+ (unsigned long) __entry->ino,
+ __entry->data_pos,
+ __entry->merkle_block)
+);
+
+TRACE_EVENT(fsverity_merkle_hit,
+ TP_PROTO(const struct inode *inode, u64 data_pos,
+ unsigned long hblock_idx, unsigned int level,
+ unsigned int hidx),
+ TP_ARGS(inode, data_pos, hblock_idx, level, hidx),
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(u64, data_pos)
+ __field(unsigned long, hblock_idx)
+ __field(unsigned int, level)
+ __field(unsigned int, hidx)
+ ),
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->data_pos = data_pos;
+ __entry->hblock_idx = hblock_idx;
+ __entry->level = level;
+ __entry->hidx = hidx;
+ ),
+ TP_printk("ino %lu data_pos %llu hblock_idx %lu level %u hidx %u",
+ (unsigned long) __entry->ino,
+ __entry->data_pos,
+ __entry->hblock_idx,
+ __entry->level,
+ __entry->hidx)
+);
+
+TRACE_EVENT(fsverity_verify_merkle_block,
+ TP_PROTO(const struct inode *inode, unsigned long hblock_idx,
+ unsigned int level, unsigned int hidx),
+ TP_ARGS(inode, hblock_idx, level, hidx),
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(unsigned long, hblock_idx)
+ __field(unsigned int, level)
+ __field(unsigned int, hidx)
+ ),
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->hblock_idx = hblock_idx;
+ __entry->level = level;
+ __entry->hidx = hidx;
+ ),
+ TP_printk("ino %lu hblock_idx %lu level %u hidx %u",
+ (unsigned long) __entry->ino,
+ __entry->hblock_idx,
+ __entry->level,
+ __entry->hidx)
+);
+
+#endif /* _TRACE_FSVERITY_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 66ca526cf786..70b2b661f42c 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -253,6 +253,7 @@ struct file_attr {
#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
+#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */
#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/* the read-only stuff doesn't really belong here, but any other place is