summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/Makefile2
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namespace.c82
-rw-r--r--fs/nullfs.c70
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--init/do_mounts.c14
-rw-r--r--init/do_mounts.h1
7 files changed, 159 insertions, 12 deletions
diff --git a/fs/Makefile b/fs/Makefile
index a04274a3c854..becf133e4791 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -16,7 +16,7 @@ obj-y := open.o read_write.o file_table.o super.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_dirent.o fs_context.o fs_parser.o fsopen.o init.o \
kernel_read_file.o mnt_idmapping.o remap_range.o pidfs.o \
- file_attr.o
+ file_attr.o nullfs.o
obj-$(CONFIG_BUFFER_HEAD) += buffer.o mpage.o
obj-$(CONFIG_PROC_FS) += proc_namespace.o
diff --git a/fs/mount.h b/fs/mount.h
index 2d28ef2a3aed..e0816c11a198 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -5,6 +5,7 @@
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
+extern struct file_system_type nullfs_fs_type;
extern struct list_head notify_list;
struct mnt_namespace {
diff --git a/fs/namespace.c b/fs/namespace.c
index 9261f56ccc81..a44ebb2f1161 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -75,6 +75,17 @@ static int __init initramfs_options_setup(char *str)
__setup("initramfs_options=", initramfs_options_setup);
+bool nullfs_rootfs = false;
+
+static int __init nullfs_rootfs_setup(char *str)
+{
+ if (*str)
+ return 0;
+ nullfs_rootfs = true;
+ return 1;
+}
+__setup("nullfs_rootfs", nullfs_rootfs_setup);
+
static u64 event;
static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC);
static DEFINE_IDA(mnt_group_ida);
@@ -4582,8 +4593,9 @@ int path_pivot_root(struct path *new, struct path *old)
* pointed to by put_old must yield the same directory as new_root. No other
* file system may be mounted on put_old. After all, new_root is a mountpoint.
*
- * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
- * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem
+ * unless the kernel was booted with "nullfs_rootfs". See
+ * Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
* in this situation.
*
* Notes:
@@ -5976,24 +5988,72 @@ struct mnt_namespace init_mnt_ns = {
static void __init init_mount_tree(void)
{
- struct vfsmount *mnt;
- struct mount *m;
+ struct vfsmount *mnt, *nullfs_mnt;
+ struct mount *mnt_root;
struct path root;
+ /*
+ * When nullfs is used, we create two mounts:
+ *
+ * (1) nullfs with mount id 1
+ * (2) mutable rootfs with mount id 2
+ *
+ * with (2) mounted on top of (1).
+ */
+ if (nullfs_rootfs) {
+ nullfs_mnt = vfs_kern_mount(&nullfs_fs_type, 0, "nullfs", NULL);
+ if (IS_ERR(nullfs_mnt))
+ panic("VFS: Failed to create nullfs");
+ }
+
mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
if (IS_ERR(mnt))
panic("Can't create rootfs");
- m = real_mount(mnt);
- init_mnt_ns.root = m;
- init_mnt_ns.nr_mounts = 1;
- mnt_add_to_ns(&init_mnt_ns, m);
+ if (nullfs_rootfs) {
+ VFS_WARN_ON_ONCE(real_mount(nullfs_mnt)->mnt_id != 1);
+ VFS_WARN_ON_ONCE(real_mount(mnt)->mnt_id != 2);
+
+ /* The namespace root is the nullfs mnt. */
+ mnt_root = real_mount(nullfs_mnt);
+ init_mnt_ns.root = mnt_root;
+
+ /* Mount mutable rootfs on top of nullfs. */
+ root.mnt = nullfs_mnt;
+ root.dentry = nullfs_mnt->mnt_root;
+
+ LOCK_MOUNT_EXACT(mp, &root);
+ if (unlikely(IS_ERR(mp.parent)))
+ panic("VFS: Failed to mount rootfs on nullfs");
+ scoped_guard(mount_writer)
+ attach_mnt(real_mount(mnt), mp.parent, mp.mp);
+
+ pr_info("VFS: Finished mounting rootfs on nullfs\n");
+ } else {
+ VFS_WARN_ON_ONCE(real_mount(mnt)->mnt_id != 1);
+
+ /* The namespace root is the mutable rootfs. */
+ mnt_root = real_mount(mnt);
+ init_mnt_ns.root = mnt_root;
+ }
+
+ /*
+ * We've dropped all locks here but that's fine. Not just are we
+ * the only task that's running, there's no other mount
+ * namespace in existence and the initial mount namespace is
+ * completely empty until we add the mounts we just created.
+ */
+ for (struct mount *p = mnt_root; p; p = next_mnt(p, mnt_root)) {
+ mnt_add_to_ns(&init_mnt_ns, p);
+ init_mnt_ns.nr_mounts++;
+ }
+
init_task.nsproxy->mnt_ns = &init_mnt_ns;
get_mnt_ns(&init_mnt_ns);
- root.mnt = mnt;
- root.dentry = mnt->mnt_root;
-
+ /* The root and pwd always point to the mutable rootfs. */
+ root.mnt = mnt;
+ root.dentry = mnt->mnt_root;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
diff --git a/fs/nullfs.c b/fs/nullfs.c
new file mode 100644
index 000000000000..fdbd3e5d3d71
--- /dev/null
+++ b/fs/nullfs.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+#include <linux/fs/super_types.h>
+#include <linux/fs_context.h>
+#include <linux/magic.h>
+
+static const struct super_operations nullfs_super_operations = {
+ .statfs = simple_statfs,
+};
+
+static int nullfs_fs_fill_super(struct super_block *s, struct fs_context *fc)
+{
+ struct inode *inode;
+
+ s->s_maxbytes = MAX_LFS_FILESIZE;
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
+ s->s_magic = NULL_FS_MAGIC;
+ s->s_op = &nullfs_super_operations;
+ s->s_export_op = NULL;
+ s->s_xattr = NULL;
+ s->s_time_gran = 1;
+ s->s_d_flags = 0;
+
+ inode = new_inode(s);
+ if (!inode)
+ return -ENOMEM;
+
+ /* nullfs is permanently empty... */
+ make_empty_dir_inode(inode);
+ simple_inode_init_ts(inode);
+ inode->i_ino = 1;
+ /* ... and immutable. */
+ inode->i_flags |= S_IMMUTABLE;
+
+ s->s_root = d_make_root(inode);
+ if (!s->s_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * For now this is a single global instance. If needed we can make it
+ * mountable by userspace at which point we will need to make it
+ * multi-instance.
+ */
+static int nullfs_fs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, nullfs_fs_fill_super);
+}
+
+static const struct fs_context_operations nullfs_fs_context_ops = {
+ .get_tree = nullfs_fs_get_tree,
+};
+
+static int nullfs_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &nullfs_fs_context_ops;
+ fc->global = true;
+ fc->sb_flags = SB_NOUSER;
+ fc->s_iflags = SB_I_NOEXEC | SB_I_NODEV;
+ return 0;
+}
+
+struct file_system_type nullfs_fs_type = {
+ .name = "nullfs",
+ .init_fs_context = nullfs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 638ca21b7a90..4f2da935a76c 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -104,5 +104,6 @@
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
#define PID_FS_MAGIC 0x50494446 /* "PIDF" */
#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */
+#define NULL_FS_MAGIC 0x4E554C4C /* "NULL" */
#endif /* __LINUX_MAGIC_H__ */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index defbbf1d55f7..675397c8a7a4 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -492,6 +492,20 @@ void __init prepare_namespace(void)
mount_root(saved_root_name);
out:
devtmpfs_mount();
+
+ if (nullfs_rootfs) {
+ if (init_pivot_root(".", ".")) {
+ pr_err("VFS: Failed to pivot into new rootfs\n");
+ return;
+ }
+ if (init_umount(".", MNT_DETACH)) {
+ pr_err("VFS: Failed to unmount old rootfs\n");
+ return;
+ }
+ pr_info("VFS: Pivoted into new rootfs\n");
+ return;
+ }
+
init_mount(".", "/", NULL, MS_MOVE, NULL);
init_chroot(".");
}
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 6069ea3eb80d..fbfee810aa89 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -15,6 +15,7 @@
void mount_root_generic(char *name, char *pretty_name, int flags);
void mount_root(char *root_device_name);
extern int root_mountflags;
+extern bool nullfs_rootfs;
static inline __init int create_dev(char *name, dev_t dev)
{