From 4f8d37020e1fd0bf6ee9381ba918135ef3712efd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 28 Oct 2022 14:25:21 +0200
Subject: fuse: add "expire only" mode to FUSE_NOTIFY_INVAL_ENTRY

Add a flag to entry expiration that lets the filesystem expire a dentry
without kicking it out from the cache immediately.

This makes a difference for overmounted dentries, where plain invalidation
would detach all submounts before dropping the dentry from the cache.  If
only expiry is set on the dentry, then any overmounts are left alone and
until ->d_revalidate() is called.

Note: ->d_revalidate() is not called for the case of following a submount,
so invalidation will only be triggered for the non-overmounted case.  The
dentry could also be mounted in a different mount instance, in which case
any submounts will still be detached.

Suggested-by: Jakob Blomer <jblomer@cern.ch>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 76ee8f9e024a..39cfb343faa8 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -197,6 +197,9 @@
  *
  *  7.37
  *  - add FUSE_TMPFILE
+ *
+ *  7.38
+ *  - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry
  */
 
 #ifndef _LINUX_FUSE_H
@@ -232,7 +235,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 37
+#define FUSE_KERNEL_MINOR_VERSION 38
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -491,6 +494,12 @@ struct fuse_file_lock {
  */
 #define FUSE_SETXATTR_ACL_KILL_SGID	(1 << 0)
 
+/**
+ * notify_inval_entry flags
+ * FUSE_EXPIRE_ONLY
+ */
+#define FUSE_EXPIRE_ONLY		(1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
@@ -919,7 +928,7 @@ struct fuse_notify_inval_inode_out {
 struct fuse_notify_inval_entry_out {
 	uint64_t	parent;
 	uint32_t	namelen;
-	uint32_t	padding;
+	uint32_t	flags;
 };
 
 struct fuse_notify_delete_out {
-- 
cgit v1.2.3


From 153524053bbb0d27bb2e0be36d1b46862e9ce74c Mon Sep 17 00:00:00 2001
From: Dharmendra Singh <dsingh@ddn.com>
Date: Fri, 17 Jun 2022 12:40:27 +0530
Subject: fuse: allow non-extending parallel direct writes on the same file

In general, as of now, in FUSE, direct writes on the same file are
serialized over inode lock i.e we hold inode lock for the full duration of
the write request.  I could not find in fuse code and git history a comment
which clearly explains why this exclusive lock is taken for direct writes.
Following might be the reasons for acquiring an exclusive lock but not be
limited to

 1) Our guess is some USER space fuse implementations might be relying on
    this lock for serialization.

 2) The lock protects against file read/write size races.

 3) Ruling out any issues arising from partial write failures.

This patch relaxes the exclusive lock for direct non-extending writes only.
File size extending writes might not need the lock either, but we are not
entirely sure if there is a risk to introduce any kind of regression.
Furthermore, benchmarking with fio does not show a difference between patch
versions that take on file size extension a) an exclusive lock and b) a
shared lock.

A possible example of an issue with i_size extending writes are write error
cases.  Some writes might succeed and others might fail for file system
internal reasons - for example ENOSPACE.  With parallel file size extending
writes it _might_ be difficult to revert the action of the failing write,
especially to restore the right i_size.

With these changes, we allow non-extending parallel direct writes on the
same file with the help of a flag called FOPEN_PARALLEL_DIRECT_WRITES.  If
this flag is set on the file (flag is passed from libfuse to fuse kernel as
part of file open/create), we do not take exclusive lock anymore, but
instead use a shared lock that allows non-extending writes to run in
parallel.  FUSE implementations which rely on this inode lock for
serialization can continue to do so and serialized direct writes are still
the default.  Implementations that do not do write serialization need to be
updated and need to set the FOPEN_PARALLEL_DIRECT_WRITES flag in their file
open/create reply.

On patch review there were concerns that network file systems (or vfs
multiple mounts of the same file system) might have issues with parallel
writes.  We believe this is not the case, as this is just a local lock,
which network file systems could not rely on anyway.  I.e. this lock is
just for local consistency.

Signed-off-by: Dharmendra Singh <dsingh@ddn.com>
Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 39cfb343faa8..e3c54109bae9 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -200,6 +200,7 @@
  *
  *  7.38
  *  - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry
+ *  - add FOPEN_PARALLEL_DIRECT_WRITES
  */
 
 #ifndef _LINUX_FUSE_H
@@ -307,6 +308,7 @@ struct fuse_file_lock {
  * FOPEN_CACHE_DIR: allow caching this directory
  * FOPEN_STREAM: the file is stream-like (no file position at all)
  * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
+ * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
@@ -314,6 +316,7 @@ struct fuse_file_lock {
 #define FOPEN_CACHE_DIR		(1 << 3)
 #define FOPEN_STREAM		(1 << 4)
 #define FOPEN_NOFLUSH		(1 << 5)
+#define FOPEN_PARALLEL_DIRECT_WRITES	(1 << 6)
 
 /**
  * INIT request/reply flags
-- 
cgit v1.2.3