From 88bc7d5097a11d9bdcf08ecf85c81ba998353437 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 21 Aug 2018 14:36:31 +0200 Subject: fuse: add support for copy_file_range() There are several FUSE filesystems that can implement server-side copy or other efficient copy/duplication/clone methods. The copy_file_range() syscall is the standard interface that users have access to while not depending on external libraries that bypass FUSE. Signed-off-by: Niels de Vos Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 106 ++++++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 92fa24c24c92..d27b50a44f74 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -116,6 +116,9 @@ * * 7.27 * - add FUSE_ABORT_ERROR + * + * 7.28 + * - add FUSE_COPY_FILE_RANGE */ #ifndef _LINUX_FUSE_H @@ -151,7 +154,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 27 +#define FUSE_KERNEL_MINOR_VERSION 28 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -337,53 +340,54 @@ struct fuse_file_lock { #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) enum fuse_opcode { - FUSE_LOOKUP = 1, - FUSE_FORGET = 2, /* no reply */ - FUSE_GETATTR = 3, - FUSE_SETATTR = 4, - FUSE_READLINK = 5, - FUSE_SYMLINK = 6, - FUSE_MKNOD = 8, - FUSE_MKDIR = 9, - FUSE_UNLINK = 10, - FUSE_RMDIR = 11, - FUSE_RENAME = 12, - FUSE_LINK = 13, - FUSE_OPEN = 14, - FUSE_READ = 15, - FUSE_WRITE = 16, - FUSE_STATFS = 17, - FUSE_RELEASE = 18, - FUSE_FSYNC = 20, - FUSE_SETXATTR = 21, - FUSE_GETXATTR = 22, - FUSE_LISTXATTR = 23, - FUSE_REMOVEXATTR = 24, - FUSE_FLUSH = 25, - FUSE_INIT = 26, - FUSE_OPENDIR = 27, - FUSE_READDIR = 28, - FUSE_RELEASEDIR = 29, - FUSE_FSYNCDIR = 30, - FUSE_GETLK = 31, - FUSE_SETLK = 32, - FUSE_SETLKW = 33, - FUSE_ACCESS = 34, - FUSE_CREATE = 35, - FUSE_INTERRUPT = 36, - FUSE_BMAP = 37, - FUSE_DESTROY = 38, - FUSE_IOCTL = 39, - FUSE_POLL = 40, - FUSE_NOTIFY_REPLY = 41, - FUSE_BATCH_FORGET = 42, - FUSE_FALLOCATE = 43, - FUSE_READDIRPLUS = 44, - FUSE_RENAME2 = 45, - FUSE_LSEEK = 46, + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, + FUSE_FSYNC = 20, + FUSE_SETXATTR = 21, + FUSE_GETXATTR = 22, + FUSE_LISTXATTR = 23, + FUSE_REMOVEXATTR = 24, + FUSE_FLUSH = 25, + FUSE_INIT = 26, + FUSE_OPENDIR = 27, + FUSE_READDIR = 28, + FUSE_RELEASEDIR = 29, + FUSE_FSYNCDIR = 30, + FUSE_GETLK = 31, + FUSE_SETLK = 32, + FUSE_SETLKW = 33, + FUSE_ACCESS = 34, + FUSE_CREATE = 35, + FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, + FUSE_IOCTL = 39, + FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, + FUSE_RENAME2 = 45, + FUSE_LSEEK = 46, + FUSE_COPY_FILE_RANGE = 47, /* CUSE specific operations */ - CUSE_INIT = 4096, + CUSE_INIT = 4096, }; enum fuse_notify_code { @@ -792,4 +796,14 @@ struct fuse_lseek_out { uint64_t offset; }; +struct fuse_copy_file_range_in { + uint64_t fh_in; + uint64_t off_in; + uint64_t nodeid_out; + uint64_t fh_out; + uint64_t off_out; + uint64_t len; + uint64_t flags; +}; + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From 6433b8998a21dc597002731c4ceb4144e856edc4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:23 +0200 Subject: fuse: add FOPEN_CACHE_DIR Add flag returned by OPENDIR request to allow kernel to cache directory contents in page cache. The effect of FOPEN_CACHE_DIR is twofold: a) if not already cached, it writes entries into the cache b) if already cached, it allows reading entries from the cache The FOPEN_KEEP_CACHE has the same effect as on regular files: unless this flag is given the cache is cleared upon completion of open. So FOPEN_KEEP_CACHE and FOPEN_KEEP_CACHE flags should be used together to make use of the directory caching facility introduced in the following patches. The FUSE_AUTO_INVAL_DATA flag returned in INIT reply also has the same affect on the directory cache as it has on data cache for regular files. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d27b50a44f74..31a504f1ee60 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -119,6 +119,7 @@ * * 7.28 * - add FUSE_COPY_FILE_RANGE + * - add FOPEN_CACHE_DIR */ #ifndef _LINUX_FUSE_H @@ -222,10 +223,12 @@ struct fuse_file_lock { * FOPEN_DIRECT_IO: bypass page cache for this open file * FOPEN_KEEP_CACHE: don't invalidate the data cache on open * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) +#define FOPEN_CACHE_DIR (1 << 3) /** * INIT request/reply flags -- cgit v1.2.3 From 5da784cce4308ae10a79e3c8c41b13fb9568e4e0 Mon Sep 17 00:00:00 2001 From: Constantine Shulyupin Date: Thu, 6 Sep 2018 15:37:06 +0300 Subject: fuse: add max_pages to init_out Replace FUSE_MAX_PAGES_PER_REQ with the configurable parameter max_pages to improve performance. Old RFC with detailed description of the problem and many fixes by Mitsuo Hayasaka (mitsuo.hayasaka.hu@hitachi.com): - https://lkml.org/lkml/2012/7/5/136 We've encountered performance degradation and fixed it on a big and complex virtual environment. Environment to reproduce degradation and improvement: 1. Add lag to user mode FUSE Add nanosleep(&(struct timespec){ 0, 1000 }, NULL); to xmp_write_buf in passthrough_fh.c 2. patch UM fuse with configurable max_pages parameter. The patch will be provided latter. 3. run test script and perform test on tmpfs fuse_test() { cd /tmp mkdir -p fusemnt passthrough_fh -o max_pages=$1 /tmp/fusemnt grep fuse /proc/self/mounts dd conv=fdatasync oflag=dsync if=/dev/zero of=fusemnt/tmp/tmp \ count=1K bs=1M 2>&1 | grep -v records rm fusemnt/tmp/tmp killall passthrough_fh } Test results: passthrough_fh /tmp/fusemnt fuse.passthrough_fh \ rw,nosuid,nodev,relatime,user_id=0,group_id=0 0 0 1073741824 bytes (1.1 GB) copied, 1.73867 s, 618 MB/s passthrough_fh /tmp/fusemnt fuse.passthrough_fh \ rw,nosuid,nodev,relatime,user_id=0,group_id=0,max_pages=256 0 0 1073741824 bytes (1.1 GB) copied, 1.15643 s, 928 MB/s Obviously with bigger lag the difference between 'before' and 'after' will be more significant. Mitsuo Hayasaka, in 2012 (https://lkml.org/lkml/2012/7/5/136), observed improvement from 400-550 to 520-740. Signed-off-by: Constantine Shulyupin Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 31a504f1ee60..76f46f159992 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -120,6 +120,7 @@ * 7.28 * - add FUSE_COPY_FILE_RANGE * - add FOPEN_CACHE_DIR + * - add FUSE_MAX_PAGES, add max_pages to init_out */ #ifndef _LINUX_FUSE_H @@ -255,6 +256,7 @@ struct fuse_file_lock { * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc * FUSE_POSIX_ACL: filesystem supports posix acls * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -278,6 +280,7 @@ struct fuse_file_lock { #define FUSE_HANDLE_KILLPRIV (1 << 19) #define FUSE_POSIX_ACL (1 << 20) #define FUSE_ABORT_ERROR (1 << 21) +#define FUSE_MAX_PAGES (1 << 22) /** * CUSE INIT request/reply flags @@ -617,7 +620,9 @@ struct fuse_init_out { uint16_t congestion_threshold; uint32_t max_write; uint32_t time_gran; - uint32_t unused[9]; + uint16_t max_pages; + uint16_t padding; + uint32_t unused[8]; }; #define CUSE_INIT_INFO_MAX 4096 -- cgit v1.2.3 From 18127429a854e7607b859484880b8e26cee9ddab Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 15 Oct 2018 15:43:06 +0200 Subject: bitops: protect variables in set_mask_bits() macro Unprotected naming of local variables within the set_mask_bits() can easily lead to using the wrong scope. Noticed this when "set_mask_bits(&foo->bar, 0, mask)" behaved as no-op. Signed-off-by: Miklos Szeredi Fixes: 00a1a053ebe5 ("ext4: atomically set inode->i_flags in ext4_set_inode_flags()") Cc: Theodore Ts'o --- include/linux/bitops.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 7ddb1349394d..d18ee0e63c32 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -236,17 +236,17 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, #ifdef __KERNEL__ #ifndef set_mask_bits -#define set_mask_bits(ptr, _mask, _bits) \ +#define set_mask_bits(ptr, mask, bits) \ ({ \ - const typeof(*ptr) mask = (_mask), bits = (_bits); \ - typeof(*ptr) old, new; \ + const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ + typeof(*(ptr)) old__, new__; \ \ do { \ - old = READ_ONCE(*ptr); \ - new = (old & ~mask) | bits; \ - } while (cmpxchg(ptr, old, new) != old); \ + old__ = READ_ONCE(*(ptr)); \ + new__ = (old__ & ~mask__) | bits__; \ + } while (cmpxchg(ptr, old__, new__) != old__); \ \ - new; \ + new__; \ }) #endif -- cgit v1.2.3 From edfa87281f4fa1b78a21f6db999935a2faa2f6b8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 15 Oct 2018 15:43:06 +0200 Subject: bitops: protect variables in bit_clear_unless() macro Unprotected naming of local variables within bit_clear_unless() can easily lead to using the wrong scope. Noticed this by code review after having hit this issue in set_mask_bits() Signed-off-by: Miklos Szeredi Fixes: 85ad1d13ee9b ("md: set MD_CHANGE_PENDING in a atomic region") Cc: Guoqing Jiang --- include/linux/bitops.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index d18ee0e63c32..705f7c442691 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -251,18 +251,18 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, #endif #ifndef bit_clear_unless -#define bit_clear_unless(ptr, _clear, _test) \ +#define bit_clear_unless(ptr, clear, test) \ ({ \ - const typeof(*ptr) clear = (_clear), test = (_test); \ - typeof(*ptr) old, new; \ + const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ + typeof(*(ptr)) old__, new__; \ \ do { \ - old = READ_ONCE(*ptr); \ - new = old & ~clear; \ - } while (!(old & test) && \ - cmpxchg(ptr, old, new) != old); \ + old__ = READ_ONCE(*(ptr)); \ + new__ = old__ & ~clear__; \ + } while (!(old__ & test__) && \ + cmpxchg(ptr, old__, new__) != old__); \ \ - !(old & test); \ + !(old__ & test__); \ }) #endif -- cgit v1.2.3 From 5571f1e65486be025f73fa6aa30fb03725d362a2 Mon Sep 17 00:00:00 2001 From: Dan Schatzberg Date: Thu, 11 Oct 2018 08:17:00 -0700 Subject: fuse: enable caching of symlinks FUSE file reads are cached in the page cache, but symlink reads are not. This patch enables FUSE READLINK operations to be cached which can improve performance of some FUSE workloads. In particular, I'm working on a FUSE filesystem for access to source code and discovered that about a 10% improvement to build times is achieved with this patch (there are a lot of symlinks in the source tree). Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 76f46f159992..b4967d48bfda 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -121,6 +121,7 @@ * - add FUSE_COPY_FILE_RANGE * - add FOPEN_CACHE_DIR * - add FUSE_MAX_PAGES, add max_pages to init_out + * - add FUSE_CACHE_SYMLINKS */ #ifndef _LINUX_FUSE_H @@ -257,6 +258,7 @@ struct fuse_file_lock { * FUSE_POSIX_ACL: filesystem supports posix acls * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages + * FUSE_CACHE_SYMLINKS: cache READLINK responses */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -281,6 +283,7 @@ struct fuse_file_lock { #define FUSE_POSIX_ACL (1 << 20) #define FUSE_ABORT_ERROR (1 << 21) #define FUSE_MAX_PAGES (1 << 22) +#define FUSE_CACHE_SYMLINKS (1 << 23) /** * CUSE INIT request/reply flags -- cgit v1.2.3