From daab110e47f8d7aa6da66923e3ac1a8dbd2b2a72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:14 -0500 Subject: nfsd: add a new EXPORT_OP_NOWCC flag to struct export_operations With NFSv3 nfsd will always attempt to send along WCC data to the client. This generally involves saving off the in-core inode information prior to doing the operation on the given filehandle, and then issuing a vfs_getattr to it after the op. Some filesystems (particularly clustered or networked ones) have an expensive ->getattr inode operation. Atomicity is also often difficult or impossible to guarantee on such filesystems. For those, we're best off not trying to provide WCC information to the client at all, and to simply allow it to poll for that information as needed with a GETATTR RPC. This patch adds a new flags field to struct export_operations, and defines a new EXPORT_OP_NOWCC flag that filesystems can use to indicate that nfsd should not attempt to provide WCC info in NFSv3 replies. It also adds a blurb about the new flags field and flag to the exporting documentation. The server will also now skip collecting this information for NFSv2 as well, since that info is never used there anyway. Note that this patch does not add this flag to any filesystem export_operations structures. This was originally developed to allow reexporting nfs via nfsd. Other filesystems may want to consider enabling this flag too. It's hard to tell however which ones have export operations to enable export via knfsd and which ones mostly rely on them for open-by-filehandle support, so I'm leaving that up to the individual maintainers to decide. I am cc'ing the relevant lists for those filesystems that I think may want to consider adding this though. Cc: HPDD-discuss@lists.01.org Cc: ceph-devel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: fuse-devel@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/exportfs.h') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3ceb72b67a7a..e7de0103a32e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,6 +213,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); +#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ + unsigned long flags; }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, -- cgit v1.2.3 From ba5e8187c55555519ae0b63c0fb681391bc42af9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:15 -0500 Subject: nfsd: allow filesystems to opt out of subtree checking When we start allowing NFS to be reexported, then we have some problems when it comes to subtree checking. In principle, we could allow it, but it would mean encoding parent info in the filehandles and there may not be enough space for that in a NFSv3 filehandle. To enforce this at export upcall time, we add a new export_ops flag that declares the filesystem ineligible for subtree checking. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/exportfs.h') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index e7de0103a32e..2fcbab0f6b61 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -214,6 +214,7 @@ struct export_operations { int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); #define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ unsigned long flags; }; -- cgit v1.2.3 From 7f84b488f9add1d5cca3e6197c95914c7bd3c1cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:16 -0500 Subject: nfsd: close cached files prior to a REMOVE or RENAME that would replace target It's not uncommon for some workloads to do a bunch of I/O to a file and delete it just afterward. If knfsd has a cached open file however, then the file may still be open when the dentry is unlinked. If the underlying filesystem is nfs, then that could trigger it to do a sillyrename. On a REMOVE or RENAME scan the nfsd_file cache for open files that correspond to the inode, and proactively unhash and put their references. This should prevent any delete-on-last-close activity from occurring, solely due to knfsd's open file cache. This must be done synchronously though so we use the variants that call flush_delayed_fput. There are deadlock possibilities if you call flush_delayed_fput while holding locks, however. In the case of nfsd_rename, we don't even do the lookups of the dentries to be renamed until we've locked for rename. Once we've figured out what the target dentry is for a rename, check to see whether there are cached open files associated with it. If there are, then unwind all of the locking, close them all, and then reattempt the rename. None of this is really necessary for "typical" filesystems though. It's mostly of use for NFS, so declare a new export op flag and use that to determine whether to close the files beforehand. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/exportfs.h') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 2fcbab0f6b61..d829403ffd3b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,8 +213,9 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); -#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ -#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ +#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ +#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ unsigned long flags; }; -- cgit v1.2.3 From d045465fc6cbfa4acfb5a7d817a7c1a57a078109 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:17 -0500 Subject: exportfs: Add a function to return the raw output from fh_to_dentry() In order to allow nfsd to accept return values that are not acceptable to overlayfs and others, add a new function. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/exportfs.h') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d829403ffd3b..846df3c96730 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -223,6 +223,11 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable); +extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, + struct fid *fid, int fh_len, + int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context); -- cgit v1.2.3 From 01cbf3853959feec40ec9b9a399e12a021cd4d81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:19 -0500 Subject: nfsd: Set PF_LOCAL_THROTTLE on local filesystems only Don't set PF_LOCAL_THROTTLE on remote filesystems like NFS, since they aren't expected to ever be subject to double buffering. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/exportfs.h') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 846df3c96730..d93e8a6737bb 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -216,6 +216,7 @@ struct export_operations { #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ +#define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ unsigned long flags; }; -- cgit v1.2.3 From 716a8bc7f706eeef80ab42c99d9f210eda845c81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 23:14:27 -0500 Subject: nfsd: Record NFSv4 pre/post-op attributes as non-atomic For the case of NFSv4, specify to the client that the pre/post-op attributes were not recorded atomically with the main operation. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/exportfs.h') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d93e8a6737bb..9f4d4bcbf251 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -217,6 +217,9 @@ struct export_operations { #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ #define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ +#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply + atomic attribute updates + */ unsigned long flags; }; -- cgit v1.2.3