From fd1a1dc6f5aa7361e3562790336e116935f8fcfa Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 19 Aug 2020 18:19:49 -0400 Subject: virtiofs: implement FUSE_INIT map_alignment field The device communicates FUSE_SETUPMAPPING/FUSE_REMOVMAPPING alignment constraints via the FUST_INIT map_alignment field. Parse this field and ensure our DAX mappings meet the alignment constraints. We don't actually align anything differently since our mappings are already 2MB aligned. Just check the value when the connection is established. If it becomes necessary to honor arbitrary alignments in the future we'll have to adjust how mappings are sized. The upshot of this commit is that we can be confident that mappings will work even when emulating x86 on Power and similar combinations where the host page sizes are different. Signed-off-by: Stefan Hajnoczi Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 373cada89815..5b85819e045f 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -313,7 +313,9 @@ struct fuse_file_lock { * FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request - * FUSE_MAP_ALIGNMENT: map_alignment field is valid + * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for + * foffset and moffset fields in struct + * fuse_setupmapping_out and fuse_removemapping_one. */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) -- cgit v1.2.3 From ceec02d4354a317cacce4b053a580ea3c7fc6cdc Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 19 Aug 2020 18:19:50 -0400 Subject: virtiofs: introduce setupmapping/removemapping commands Introduce two new fuse commands to setup/remove memory mappings. This will be used to setup/tear down file mapping in dax window. Signed-off-by: Vivek Goyal Signed-off-by: Peng Tao Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5b85819e045f..60a7bfc787ce 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -894,4 +894,33 @@ struct fuse_copy_file_range_in { uint64_t flags; }; +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) +struct fuse_setupmapping_in { + /* An already open handle */ + uint64_t fh; + /* Offset into the file to start the mapping */ + uint64_t foffset; + /* Length of mapping required */ + uint64_t len; + /* Flags, FUSE_SETUPMAPPING_FLAG_* */ + uint64_t flags; + /* Offset in Memory Window */ + uint64_t moffset; +}; + +struct fuse_removemapping_in { + /* number of fuse_removemapping_one follows */ + uint32_t count; +}; + +struct fuse_removemapping_one { + /* Offset into the dax window start the unmapping */ + uint64_t moffset; + /* Length of mapping required */ + uint64_t len; +}; + +#define FUSE_REMOVEMAPPING_MAX_ENTRY \ + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From c2d0ad00d948de73c78f05d2b3e5bdfa605035cc Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 19 Aug 2020 18:19:51 -0400 Subject: virtiofs: implement dax read/write operations This patch implements basic DAX support. mmap() is not implemented yet and will come in later patches. This patch looks into implemeting read/write. We make use of interval tree to keep track of per inode dax mappings. Do not use dax for file extending writes, instead just send WRITE message to daemon (like we do for direct I/O path). This will keep write and i_size change atomic w.r.t crash. Signed-off-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal Signed-off-by: Liu Bo Signed-off-by: Peng Tao Cc: Dave Chinner Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 60a7bfc787ce..8899e4862309 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -895,6 +895,7 @@ struct fuse_copy_file_range_in { }; #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) +#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) struct fuse_setupmapping_in { /* An already open handle */ uint64_t fh; -- cgit v1.2.3 From c6ff213fe5b8696c9539a1b34ff03de9306dfff9 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 8 Sep 2020 18:01:48 +0200 Subject: fuse: add submount support to - Add fuse_attr.flags - Add FUSE_ATTR_SUBMOUNT This is a flag for fuse_attr.flags that indicates that the given entry resides on a different filesystem than the parent, and as such should have a different st_dev. - Add FUSE_SUBMOUNTS The client sets this flag if it supports automounting directories. Signed-off-by: Max Reitz Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 8899e4862309..7233502ea991 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -172,6 +172,9 @@ * - add FUSE_WRITE_KILL_PRIV flag * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag + * + * 7.32 + * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS */ #ifndef _LINUX_FUSE_H @@ -207,7 +210,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 31 +#define FUSE_KERNEL_MINOR_VERSION 32 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -231,7 +234,7 @@ struct fuse_attr { uint32_t gid; uint32_t rdev; uint32_t blksize; - uint32_t padding; + uint32_t flags; }; struct fuse_kstatfs { @@ -316,6 +319,7 @@ struct fuse_file_lock { * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for * foffset and moffset fields in struct * fuse_setupmapping_out and fuse_removemapping_one. + * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -344,6 +348,7 @@ struct fuse_file_lock { #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) #define FUSE_MAP_ALIGNMENT (1 << 26) +#define FUSE_SUBMOUNTS (1 << 27) /** * CUSE INIT request/reply flags @@ -419,6 +424,13 @@ struct fuse_file_lock { */ #define FUSE_FSYNC_FDATASYNC (1 << 0) +/** + * fuse_attr flags + * + * FUSE_ATTR_SUBMOUNT: Object is a submount root + */ +#define FUSE_ATTR_SUBMOUNT (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ -- cgit v1.2.3 From 63f9909ff602082597849f684655e93336c50b11 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 9 Oct 2020 14:15:07 -0400 Subject: fuse: introduce the notion of FUSE_HANDLE_KILLPRIV_V2 We already have FUSE_HANDLE_KILLPRIV flag that says that file server will remove suid/sgid/caps on truncate/chown/write. But that's little different from what Linux VFS implements. To be consistent with Linux VFS behavior what we want is. - caps are always cleared on chown/write/truncate - suid is always cleared on chown, while for truncate/write it is cleared only if caller does not have CAP_FSETID. - sgid is always cleared on chown, while for truncate/write it is cleared only if caller does not have CAP_FSETID as well as file has group execute permission. As previous flag did not provide above semantics. Implement a V2 of the protocol with above said constraints. Server does not know if caller has CAP_FSETID or not. So for the case of write()/truncate(), client will send information in special flag to indicate whether to kill priviliges or not. These changes are in subsequent patches. FUSE_HANDLE_KILLPRIV_V2 relies on WRITE being sent to server to clear suid/sgid/security.capability. But with ->writeback_cache, WRITES are cached in guest. So it is not recommended to use FUSE_HANDLE_KILLPRIV_V2 and writeback_cache together. Though it probably might be good enough for lot of use cases. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 7233502ea991..29bd2e007947 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -175,6 +175,9 @@ * * 7.32 * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS + * + * 7.33 + * - add FUSE_HANDLE_KILLPRIV_V2 */ #ifndef _LINUX_FUSE_H @@ -210,7 +213,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 32 +#define FUSE_KERNEL_MINOR_VERSION 33 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -320,6 +323,11 @@ struct fuse_file_lock { * foffset and moffset fields in struct * fuse_setupmapping_out and fuse_removemapping_one. * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts + * FUSE_HANDLE_KILLPRIV_V2: fs kills suid/sgid/cap on write/chown/trunc. + * Upon write/truncate suid/sgid is only killed if caller + * does not have CAP_FSETID. Additionally upon + * write/truncate sgid is killed only if file has group + * execute permission. (Same as Linux VFS behavior). */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -349,6 +357,7 @@ struct fuse_file_lock { #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) #define FUSE_MAP_ALIGNMENT (1 << 26) #define FUSE_SUBMOUNTS (1 << 27) +#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From 10c52c84e3f4872689a64ac7666b34d67e630691 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 Nov 2020 17:22:32 +0100 Subject: fuse: rename FUSE_WRITE_KILL_PRIV to FUSE_WRITE_KILL_SUIDGID Kernel has: ATTR_KILL_PRIV -> clear "security.capability" ATTR_KILL_SUID -> clear S_ISUID ATTR_KILL_SGID -> clear S_ISGID if executable Fuse has: FUSE_WRITE_KILL_PRIV -> clear S_ISUID and S_ISGID if executable So FUSE_WRITE_KILL_PRIV implies the complement of ATTR_KILL_PRIV, which is somewhat confusing. Also PRIV implies all privileges, including "security.capability". Change the name to FUSE_WRITE_KILL_SUIDGID and make FUSE_WRITE_KILL_PRIV an alias to perserve API compatibility Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 29bd2e007947..2623c75b94a5 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -177,7 +177,7 @@ * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS * * 7.33 - * - add FUSE_HANDLE_KILLPRIV_V2 + * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID */ #ifndef _LINUX_FUSE_H @@ -387,11 +387,14 @@ struct fuse_file_lock { * * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed * FUSE_WRITE_LOCKOWNER: lock_owner field is valid - * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits + * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits */ #define FUSE_WRITE_CACHE (1 << 0) #define FUSE_WRITE_LOCKOWNER (1 << 1) -#define FUSE_WRITE_KILL_PRIV (1 << 2) +#define FUSE_WRITE_KILL_SUIDGID (1 << 2) + +/* Obsolete alias; this flag implies killing suid/sgid only. */ +#define FUSE_WRITE_KILL_PRIV FUSE_WRITE_KILL_SUIDGID /** * Read flags -- cgit v1.2.3 From 3179216135ec09825d7c7875580951a6e69dc5df Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 9 Oct 2020 14:15:09 -0400 Subject: fuse: setattr should set FATTR_KILL_SUIDGID If fc->handle_killpriv_v2 is enabled, we expect file server to clear suid/sgid/security.capbility upon chown/truncate/write as appropriate. Upon truncate (ATTR_SIZE), suid/sgid are cleared only if caller does not have CAP_FSETID. File server does not know whether caller has CAP_FSETID or not. Hence set FATTR_KILL_SUIDGID upon truncate to let file server know that caller does not have CAP_FSETID and it should kill suid/sgid as appropriate. On chown (ATTR_UID/ATTR_GID) suid/sgid need to be cleared irrespective of capabilities of calling process, so set FATTR_KILL_SUIDGID unconditionally in that case. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2623c75b94a5..9eb96e0564be 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -177,7 +177,7 @@ * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS * * 7.33 - * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID + * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID */ #ifndef _LINUX_FUSE_H @@ -274,6 +274,7 @@ struct fuse_file_lock { #define FATTR_MTIME_NOW (1 << 8) #define FATTR_LOCKOWNER (1 << 9) #define FATTR_CTIME (1 << 10) +#define FATTR_KILL_SUIDGID (1 << 11) /** * Flags returned by the OPEN request -- cgit v1.2.3 From 643a666a89c358ef588d2b3ef9f2dc1efc421e61 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 9 Oct 2020 14:15:11 -0400 Subject: fuse: add a flag FUSE_OPEN_KILL_SUIDGID for open() request With FUSE_HANDLE_KILLPRIV_V2 support, server will need to kill suid/sgid/ security.capability on open(O_TRUNC), if server supports FUSE_ATOMIC_O_TRUNC. But server needs to kill suid/sgid only if caller does not have CAP_FSETID. Given server does not have this information, client needs to send this info to server. So add a flag FUSE_OPEN_KILL_SUIDGID to fuse_open_in request which tells server to kill suid/sgid (only if group execute is set). This flag is added to the FUSE_OPEN request, as well as the FUSE_CREATE request if the create was non-exclusive, since that might result in an existing file being opened/truncated. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux/fuse.h') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 9eb96e0564be..98ca64d1beb6 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -178,6 +178,7 @@ * * 7.33 * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID + * - add FUSE_OPEN_KILL_SUIDGID */ #ifndef _LINUX_FUSE_H @@ -444,6 +445,12 @@ struct fuse_file_lock { */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +/** + * Open flags + * FUSE_OPEN_KILL_SUIDGID: Kill suid and sgid if executable + */ +#define FUSE_OPEN_KILL_SUIDGID (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -605,14 +612,14 @@ struct fuse_setattr_in { struct fuse_open_in { uint32_t flags; - uint32_t unused; + uint32_t open_flags; /* FUSE_OPEN_... */ }; struct fuse_create_in { uint32_t flags; uint32_t mode; uint32_t umask; - uint32_t padding; + uint32_t open_flags; /* FUSE_OPEN_... */ }; struct fuse_open_out { -- cgit v1.2.3