From 7e40145eb111a5192e6d819f764db9d6828d1abb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jun 2011 19:12:17 -0400 Subject: ->permission() sanitizing: don't pass flags to ->check_acl() not used in the instances anymore. Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 57d827d6071d..9b6ed7c9f34f 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -52,7 +52,7 @@ ata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); - int (*check_acl)(struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 88b9f5519af9..8b4c8e04d879 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -334,7 +334,7 @@ struct inode_operations { void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); - int (*check_acl)(struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); -- cgit v1.2.3 From 10556cb21a0d0b24d95f00ea6df16f599a3345b2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jun 2011 19:28:19 -0400 Subject: ->permission() sanitizing: don't pass flags to ->permission() not used by the instances anymore. Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 8b4c8e04d879..d56151fe77dc 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -333,7 +333,7 @@ struct inode_operations { void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, unsigned int); + int (*permission) (struct inode *, int); int (*check_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); @@ -423,7 +423,7 @@ otherwise noted. permission: called by the VFS to check for access rights on a POSIX-like filesystem. - May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk + May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk mode, the filesystem must check the permission without blocking or storing to the inode. -- cgit v1.2.3 From 76fe3276be26cff2e609cdcfbc1265cf1dd72b2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jun 2011 21:56:31 -0400 Subject: ->permission() sanitizing: document API changes Signed-off-by: Al Viro --- Documentation/filesystems/porting | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 6e29954851a2..0eeb3954dea3 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -398,12 +398,16 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, so the i_size should not change when hole punching, even when puching the end of a file off. --- -[mandatory] - -- [mandatory] ->get_sb() is gone. Switch to use of ->mount(). Typically it's just a matter of switching from calling get_sb_... to mount_... and changing the function type. If you were doing it manually, just switch from setting ->mnt_root to some pointer to returning that pointer. On errors return ERR_PTR(...). + +-- +[mandatory] + ->permission(), generic_permission() and ->check_acl() have lost flags +argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask. + generic_permission() has also lost the check_acl argument; if you want +non-NULL to be used for that inode, put it into ->i_op->check_acl. -- cgit v1.2.3 From 0e1fdafd93980eac62e778798549ce0f6073905c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jul 2011 14:14:44 +1000 Subject: superblock: add filesystem shrinker operations Now we have a per-superblock shrinker implementation, we can add a filesystem specific callout to it to allow filesystem internal caches to be shrunk by the superblock shrinker. Rather than perpetuate the multipurpose shrinker callback API (i.e. nr_to_scan == 0 meaning "tell me how many objects freeable in the cache), two operations will be added. The first will return the number of objects that are freeable, the second is the actual shrinker call. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d56151fe77dc..fd24f34f120f 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -229,6 +229,8 @@ struct super_operations { ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); + int (*nr_cached_objects)(struct super_block *); + void (*free_cached_objects)(struct super_block *, int); }; All methods are called without any locks being held, unless otherwise @@ -301,6 +303,20 @@ or bottom half). quota_write: called by the VFS to write to filesystem quota file. + nr_cached_objects: called by the sb cache shrinking function for the + filesystem to return the number of freeable cached objects it contains. + Optional. + + free_cache_objects: called by the sb cache shrinking function for the + filesystem to scan the number of objects indicated to try to free them. + Optional, but any filesystem implementing this method needs to also + implement ->nr_cached_objects for it to be called correctly. + + We can't do anything with any errors that the filesystem might + encountered, hence the void return type. This will never be called if + the VM is trying to reclaim under GFP_NOFS conditions, hence this + method does not need to handle that situation itself. + Whoever sets up the inode is responsible for filling in the "i_op" field. This is a pointer to a "struct inode_operations" which describes the methods that can be performed on individual inodes. -- cgit v1.2.3 From 8ab47664d51a69ea79fe70bb07ca80664f74f76b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jul 2011 14:14:45 +1000 Subject: vfs: increase shrinker batch size Now that the per-sb shrinker is responsible for shrinking 2 or more caches, increase the batch size to keep econmies of scale for shrinking each cache. Increase the shrinker batch size to 1024 objects. To allow for a large increase in batch size, add a conditional reschedule to prune_icache_sb() so that we don't hold the LRU spin lock for too long. This mirrors the behaviour of the __shrink_dcache_sb(), and allows us to increase the batch size without needing to worry about problems caused by long lock hold times. To ensure that filesystems using the per-sb shrinker callouts don't cause problems, document that the object freeing method must reschedule appropriately inside loops. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- Documentation/filesystems/vfs.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fd24f34f120f..6bf85b78cfea 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -317,6 +317,12 @@ or bottom half). the VM is trying to reclaim under GFP_NOFS conditions, hence this method does not need to handle that situation itself. + Implementations must include conditional reschedule calls inside any + scanning loop that is done. This allows the VFS to determine + appropriate scan batch sizes without having to worry about whether + implementations will cause holdoff problems due to large scan batch + sizes. + Whoever sets up the inode is responsible for filling in the "i_op" field. This is a pointer to a "struct inode_operations" which describes the methods that can be performed on individual inodes. -- cgit v1.2.3 From 982d816581eeeacfe5b2b7c6d47d13a157616eff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 18 Jul 2011 13:21:35 -0400 Subject: fs: add SEEK_HOLE and SEEK_DATA flags This just gets us ready to support the SEEK_HOLE and SEEK_DATA flags. Turns out using fiemap in things like cp cause more problems than it solves, so lets try and give userspace an interface that doesn't suck. We need to match solaris here, and the definitions are *o* If /whence/ is SEEK_HOLE, the offset of the start of the next hole greater than or equal to the supplied offset is returned. The definition of a hole is provided near the end of the DESCRIPTION. *o* If /whence/ is SEEK_DATA, the file pointer is set to the start of the next non-hole file region greater than or equal to the supplied offset. So in the generic case the entire file is data and there is a virtual hole at the end. That means we will just return i_size for SEEK_HOLE and will return the same offset for SEEK_DATA. This is how Solaris does it so we have to do it the same way. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- Documentation/filesystems/porting | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0eeb3954dea3..6b96773e27cb 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -411,3 +411,13 @@ to some pointer to returning that pointer. On errors return ERR_PTR(...). argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask. generic_permission() has also lost the check_acl argument; if you want non-NULL to be used for that inode, put it into ->i_op->check_acl. + +-- +[mandatory] + If you implement your own ->llseek() you must handle SEEK_HOLE and +SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to +support it in some way. The generic handler assumes that the entire file is +data and there is a virtual hole at the end of the file. So if the provided +offset is less than i_size and SEEK_DATA is specified, return the same offset. +If the above is true for the offset and you are given SEEK_HOLE, return the end +of the file. If the offset is i_size or greater return -ENXIO in either case. -- cgit v1.2.3 From 02c24a82187d5a628c68edfe71ae60dc135cd178 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 16 Jul 2011 20:44:56 -0400 Subject: fs: push i_mutex and filemap_write_and_wait down into ->fsync() handlers Btrfs needs to be able to control how filemap_write_and_wait_range() is called in fsync to make it less of a painful operation, so push down taking i_mutex and the calling of filemap_write_and_wait() down into the ->fsync() handlers. Some file systems can drop taking the i_mutex altogether it seems, like ext3 and ocfs2. For correctness sake I just pushed everything down in all cases to make sure that we keep the current behavior the same for everybody, and then each individual fs maintainer can make up their mind about what to do from there. Thanks, Acked-by: Jan Kara Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 6 ++---- Documentation/filesystems/porting | 7 +++++++ Documentation/filesystems/vfs.txt | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9b6ed7c9f34f..ca7e25292542 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -412,7 +412,7 @@ prototypes: int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); @@ -438,9 +438,7 @@ prototypes: locking rules: All may block except for ->setlease. - No VFS locks held on entry except for ->fsync and ->setlease. - -->fsync() has i_mutex on inode. + No VFS locks held on entry except for ->setlease. ->setlease has the file_list_lock held and must not sleep. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 6b96773e27cb..7f8861d341ea 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -421,3 +421,10 @@ data and there is a virtual hole at the end of the file. So if the provided offset is less than i_size and SEEK_DATA is specified, return the same offset. If the above is true for the offset and you are given SEEK_HOLE, return the end of the file. If the offset is i_size or greater return -ENXIO in either case. + +[mandatory] + If you have your own ->fsync() you must make sure to call +filemap_write_and_wait_range() so that all dirty pages are synced out properly. +You must also keep in mind that ->fsync() is not called with i_mutex held +anymore, so if you require i_mutex locking you must make sure to take it and +release it yourself. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 6bf85b78cfea..eff6617c9a0f 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -777,7 +777,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); -- cgit v1.2.3