diff options
Diffstat (limited to 'fs')
163 files changed, 3753 insertions, 3668 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index ab5547ff29a1..38d695d66a0b 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,7 +37,6 @@ #include <linux/mount.h> #include <linux/idr.h> #include <linux/sched.h> -#include <linux/smp_lock.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -231,10 +230,8 @@ v9fs_umount_begin(struct super_block *sb) { struct v9fs_session_info *v9ses; - lock_kernel(); v9ses = sb->s_fs_info; v9fs_session_cancel(v9ses); - unlock_kernel(); } static const struct super_operations v9fs_super_ops = { diff --git a/fs/Kconfig b/fs/Kconfig index 525da2e8f73b..d78e950402c1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -39,6 +39,13 @@ config FS_POSIX_ACL bool default n +source "fs/xfs/Kconfig" +source "fs/gfs2/Kconfig" +source "fs/ocfs2/Kconfig" +source "fs/btrfs/Kconfig" + +endif # BLOCK + config FILE_LOCKING bool "Enable POSIX file locking API" if EMBEDDED default y @@ -47,13 +54,6 @@ config FILE_LOCKING for filesystems like NFS and for the flock() system call. Disabling this option saves about 11k. -source "fs/xfs/Kconfig" -source "fs/gfs2/Kconfig" -source "fs/ocfs2/Kconfig" -source "fs/btrfs/Kconfig" - -endif # BLOCK - source "fs/notify/Kconfig" source "fs/quota/Kconfig" @@ -134,7 +134,7 @@ config TMPFS_POSIX_ACL config HUGETLBFS bool "HugeTLB file system support" depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ - (S390 && 64BIT) || BROKEN + (S390 && 64BIT) || SYS_SUPPORTS_HUGETLBFS || BROKEN help hugetlbfs is a filesystem backing for HugeTLB pages, based on ramfs. For architectures that support it, say Y here and read diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index a6665f37f456..9cc18775b832 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -1,3 +1,6 @@ +#include <linux/fs.h> +#include <linux/adfs_fs.h> + /* Internal data structures for ADFS */ #define ADFS_FREE_FRAG 0 @@ -17,6 +20,58 @@ struct buffer_head; /* + * adfs file system inode data in memory + */ +struct adfs_inode_info { + loff_t mmu_private; + unsigned long parent_id; /* object id of parent */ + __u32 loadaddr; /* RISC OS load address */ + __u32 execaddr; /* RISC OS exec address */ + unsigned int filetype; /* RISC OS file type */ + unsigned int attr; /* RISC OS permissions */ + unsigned int stamped:1; /* RISC OS file has date/time */ + struct inode vfs_inode; +}; + +/* + * Forward-declare this + */ +struct adfs_discmap; +struct adfs_dir_ops; + +/* + * ADFS file system superblock data in memory + */ +struct adfs_sb_info { + struct adfs_discmap *s_map; /* bh list containing map */ + struct adfs_dir_ops *s_dir; /* directory operations */ + + uid_t s_uid; /* owner uid */ + gid_t s_gid; /* owner gid */ + umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ + umode_t s_other_mask; /* ADFS other perm -> unix perm */ + + __u32 s_ids_per_zone; /* max. no ids in one zone */ + __u32 s_idlen; /* length of ID in map */ + __u32 s_map_size; /* sector size of a map */ + unsigned long s_size; /* total size (in blocks) of this fs */ + signed int s_map2blk; /* shift left by this for map->sector */ + unsigned int s_log2sharesize;/* log2 share size */ + __le32 s_version; /* disc format version */ + unsigned int s_namelen; /* maximum number of characters in name */ +}; + +static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct adfs_inode_info *ADFS_I(struct inode *inode) +{ + return container_of(inode, struct adfs_inode_info, vfs_inode); +} + +/* * Directory handling */ struct adfs_dir { diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 4d4073447d1a..23aa52f548a0 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -9,15 +9,7 @@ * * Common directory handling for ADFS */ -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/adfs_fs.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/spinlock.h> #include <linux/smp_lock.h> -#include <linux/buffer_head.h> /* for file_fsync() */ - #include "adfs.h" /* diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index 31df6adf0de6..bafc71222e25 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -9,15 +9,7 @@ * * E and F format directory handling */ -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/adfs_fs.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/spinlock.h> #include <linux/buffer_head.h> -#include <linux/string.h> - #include "adfs.h" #include "dir_f.h" diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 139e0f345f18..1796bb352d05 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -7,15 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/adfs_fs.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/spinlock.h> #include <linux/buffer_head.h> -#include <linux/string.h> - #include "adfs.h" #include "dir_fplus.h" diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 8224d54a2afb..005ea34d1758 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -19,10 +19,6 @@ * * adfs regular file handling primitives */ -#include <linux/fs.h> -#include <linux/buffer_head.h> /* for file_fsync() */ -#include <linux/adfs_fs.h> - #include "adfs.h" const struct file_operations adfs_file_operations = { diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 05b3a677201d..798cb071d132 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -7,17 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/adfs_fs.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/string.h> -#include <linux/mm.h> #include <linux/smp_lock.h> -#include <linux/module.h> #include <linux/buffer_head.h> - #include "adfs.h" /* @@ -395,4 +386,3 @@ int adfs_write_inode(struct inode *inode, int wait) unlock_kernel(); return ret; } -MODULE_LICENSE("GPL"); diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 568081b93f73..d1a5932bb0f1 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -7,14 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/adfs_fs.h> -#include <linux/spinlock.h> #include <linux/buffer_head.h> - #include <asm/unaligned.h> - #include "adfs.h" /* diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ec5aaf47aa7..aad92f0a1048 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -8,26 +8,12 @@ * published by the Free Software Foundation. */ #include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/adfs_fs.h> -#include <linux/slab.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/string.h> #include <linux/init.h> #include <linux/buffer_head.h> -#include <linux/vfs.h> #include <linux/parser.h> -#include <linux/bitops.h> #include <linux/mount.h> #include <linux/seq_file.h> - -#include <asm/uaccess.h> -#include <asm/system.h> - -#include <stdarg.h> - +#include <linux/statfs.h> #include "adfs.h" #include "dir_f.h" #include "dir_fplus.h" @@ -534,3 +520,4 @@ static void __exit exit_adfs_fs(void) module_init(init_adfs_fs) module_exit(exit_adfs_fs) +MODULE_LICENSE("GPL"); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 2d33a5f7d218..0dd4dafee10b 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> +#include <rxrpc/packet.h> #include "internal.h" #include "afs_fs.h" @@ -54,6 +55,21 @@ int afs_abort_to_error(u32 abort_code) case 0x2f6df24: return -ENOLCK; case 0x2f6df26: return -ENOTEMPTY; case 0x2f6df78: return -EDQUOT; + + case RXKADINCONSISTENCY: return -EPROTO; + case RXKADPACKETSHORT: return -EPROTO; + case RXKADLEVELFAIL: return -EKEYREJECTED; + case RXKADTICKETLEN: return -EKEYREJECTED; + case RXKADOUTOFSEQUENCE: return -EPROTO; + case RXKADNOAUTH: return -EKEYREJECTED; + case RXKADBADKEY: return -EKEYREJECTED; + case RXKADBADTICKET: return -EKEYREJECTED; + case RXKADUNKNOWNKEY: return -EKEYREJECTED; + case RXKADEXPIRED: return -EKEYEXPIRED; + case RXKADSEALEDINCON: return -EKEYREJECTED; + case RXKADDATALEN: return -EKEYREJECTED; + case RXKADILLEGALLEVEL: return -EKEYREJECTED; + default: return -EREMOTEIO; } } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index ec2a7431e458..6e689208def2 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -65,6 +65,8 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, goto out; goto rotate; case -ENOMEDIUM: + case -EKEYREJECTED: + case -EKEYEXPIRED: goto out; default: ret = -EIO; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9367b6297d84..615d5496fe0f 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in, { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; - wchar_t uni; + unicode_t uni; int unilen, utflen; char *result; /* The utf8->nls conversion won't make the final nls string bigger @@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in, for (i = o = 0; i < in_len; i += utflen, o += unilen) { /* convert from UTF-8 to Unicode */ - utflen = utf8_mbtowc(&uni, &in[i], in_len - i); - if (utflen < 0) { + utflen = utf8_to_utf32(&in[i], in_len - i, &uni); + if (utflen < 0) goto conv_err; - } /* convert from Unicode to nls */ + if (uni > MAX_WCHAR_T) + goto conv_err; unilen = nls->uni2char(uni, &result[o], in_len - o); - if (unilen < 0) { + if (unilen < 0) goto conv_err; - } } result[o] = '\0'; *out_len = o; @@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in, /* convert from nls to unicode */ unilen = nls->char2uni(&in[i], in_len - i, &uni); - if (unilen < 0) { + if (unilen < 0) goto conv_err; - } /* convert from unicode to UTF-8 */ - utflen = utf8_wctomb(&result[o], uni, 3); - if (utflen <= 0) { + utflen = utf32_to_utf8(uni, &result[o], 3); + if (utflen <= 0) goto conv_err; - } } result[o] = '\0'; @@ -737,8 +735,6 @@ parse_options(char *options, befs_mount_options * opts) static void befs_put_super(struct super_block *sb) { - lock_kernel(); - kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; @@ -749,8 +745,6 @@ befs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; - - unlock_kernel(); } /* Allocate private field of the superblock, fill it. @@ -25,7 +25,6 @@ #include <linux/module.h> #include <linux/mempool.h> #include <linux/workqueue.h> -#include <linux/blktrace_api.h> #include <scsi/sg.h> /* for struct sg_iovec */ #include <trace/events/block.h> @@ -358,9 +357,9 @@ static void bio_kmalloc_destructor(struct bio *bio) * * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate * a bio. This is due to the mempool guarantees. To make this work, callers - * must never allocate more than 1 bio at the time from this pool. Callers + * must never allocate more than 1 bio at a time from this pool. Callers * that need to allocate more than 1 bio must always submit the previously - * allocate bio for IO before attempting to allocate a new one. Failure to + * allocated bio for IO before attempting to allocate a new one. Failure to * do so can cause livelocks under memory pressure. * **/ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d50d49d990a..d28d29c95f7c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -42,6 +42,8 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); + /* * end_io_wq structs are used to do processing in task context when an IO is * complete. This is used during reads to verify checksums, and it is used @@ -1342,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) free_extent_map(em); } +/* + * If this fails, caller must call bdi_destroy() to get rid of the + * bdi again. + */ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { - bdi_init(bdi); + int err; + + bdi->capabilities = BDI_CAP_MAP_COPY; + err = bdi_init(bdi); + if (err) + return err; + + err = bdi_register(bdi, NULL, "btrfs-%d", + atomic_inc_return(&btrfs_bdi_num)); + if (err) + return err; + bdi->ra_pages = default_backing_dev_info.ra_pages; - bdi->state = 0; - bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; bdi->unplug_io_data = info; bdi->congested_fn = btrfs_congested_fn; @@ -1569,7 +1584,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - setup_bdi(fs_info, &fs_info->bdi); + if (setup_bdi(fs_info, &fs_info->bdi)) + goto fail_bdi; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; @@ -1946,8 +1962,8 @@ fail_iput: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); +fail_bdi: bdi_destroy(&fs_info->bdi); - fail: kfree(extent_root); kfree(tree_root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2e177d7f4bb9..4e83457ea253 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -543,13 +543,13 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, btrfs_free_log(trans, root); btrfs_update_reloc_root(trans, root); - if (root->commit_root == root->node) - continue; - - free_extent_buffer(root->commit_root); - root->commit_root = btrfs_root_node(root); + if (root->commit_root != root->node) { + free_extent_buffer(root->commit_root); + root->commit_root = btrfs_root_node(root); + btrfs_set_root_node(&root->root_item, + root->node); + } - btrfs_set_root_node(&root->root_item, root->node); err = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); diff --git a/fs/compat.c b/fs/compat.c index 6aefb776dfeb..cdd51a3a7c53 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -471,7 +471,7 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, ret = sys_fcntl(fd, cmd, (unsigned long)&f); set_fs(old_fs); if (cmd == F_GETLK && ret == 0) { - /* GETLK was successfule and we need to return the data... + /* GETLK was successful and we need to return the data... * but it needs to fit in the compat structure. * l_start shouldn't be too big, unless the original * start + end is greater than COMPAT_OFF_T_MAX, in which diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 905523cc281f..c5ded5ff72b5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1759,7 +1759,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that - * use compatiable ioctls + * use compatible ioctls */ static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 762d287123ca..da6061a6df40 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -39,6 +39,9 @@ struct configfs_dirent { umode_t s_mode; struct dentry * s_dentry; struct iattr * s_iattr; +#ifdef CONFIG_LOCKDEP + int s_depth; +#endif }; #define CONFIGFS_ROOT 0x0001 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 05373db21a4e..8e48b52205aa 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -78,11 +78,97 @@ static const struct dentry_operations configfs_dentry_ops = { .d_delete = configfs_d_delete, }; +#ifdef CONFIG_LOCKDEP + +/* + * Helpers to make lockdep happy with our recursive locking of default groups' + * inodes (see configfs_attach_group() and configfs_detach_group()). + * We put default groups i_mutexes in separate classes according to their depth + * from the youngest non-default group ancestor. + * + * For a non-default group A having default groups A/B, A/C, and A/C/D, default + * groups A/B and A/C will have their inode's mutex in class + * default_group_class[0], and default group A/C/D will be in + * default_group_class[1]. + * + * The lock classes are declared and assigned in inode.c, according to the + * s_depth value. + * The s_depth value is initialized to -1, adjusted to >= 0 when attaching + * default groups, and reset to -1 when all default groups are attached. During + * attachment, if configfs_create() sees s_depth > 0, the lock class of the new + * inode's mutex is set to default_group_class[s_depth - 1]. + */ + +static void configfs_init_dirent_depth(struct configfs_dirent *sd) +{ + sd->s_depth = -1; +} + +static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, + struct configfs_dirent *sd) +{ + int parent_depth = parent_sd->s_depth; + + if (parent_depth >= 0) + sd->s_depth = parent_depth + 1; +} + +static void +configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) +{ + /* + * item's i_mutex class is already setup, so s_depth is now only + * used to set new sub-directories s_depth, which is always done + * with item's i_mutex locked. + */ + /* + * sd->s_depth == -1 iff we are a non default group. + * else (we are a default group) sd->s_depth > 0 (see + * create_dir()). + */ + if (sd->s_depth == -1) + /* + * We are a non default group and we are going to create + * default groups. + */ + sd->s_depth = 0; +} + +static void +configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) +{ + /* We will not create default groups anymore. */ + sd->s_depth = -1; +} + +#else /* CONFIG_LOCKDEP */ + +static void configfs_init_dirent_depth(struct configfs_dirent *sd) +{ +} + +static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, + struct configfs_dirent *sd) +{ +} + +static void +configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) +{ +} + +static void +configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) +{ +} + +#endif /* CONFIG_LOCKDEP */ + /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ -static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd, - void * element) +static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, + void *element, int type) { struct configfs_dirent * sd; @@ -94,6 +180,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare INIT_LIST_HEAD(&sd->s_links); INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; + sd->s_type = type; + configfs_init_dirent_depth(sd); spin_lock(&configfs_dirent_lock); if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); @@ -138,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, { struct configfs_dirent * sd; - sd = configfs_new_dirent(parent_sd, element); + sd = configfs_new_dirent(parent_sd, element, type); if (IS_ERR(sd)) return PTR_ERR(sd); sd->s_mode = mode; - sd->s_type = type; sd->s_dentry = dentry; if (dentry) { dentry->d_fsdata = configfs_get(sd); @@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p, error = configfs_make_dirent(p->d_fsdata, d, k, mode, CONFIGFS_DIR | CONFIGFS_USET_CREATING); if (!error) { + configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata); error = configfs_create(d, mode, init_dir); if (!error) { inc_nlink(p->d_inode); @@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item, * error, as rmdir() would. */ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + configfs_adjust_dir_dirent_depth_before_populate(sd); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; } + configfs_adjust_dir_dirent_depth_after_populate(sd); mutex_unlock(&dentry->d_inode->i_mutex); if (ret) d_delete(dentry); @@ -916,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * Note, btw, that this can be called at *any* time, even when a configfs * subsystem isn't registered, or when configfs is loading or unloading. * Just like configfs_register_subsystem(). So we take the same - * precautions. We pin the filesystem. We lock each i_mutex _in_order_ - * on our way down the tree. If we can find the target item in the + * precautions. We pin the filesystem. We lock configfs_dirent_lock. + * If we can find the target item in the * configfs tree, it must be part of the subsystem tree as well, so we - * do not need the subsystem semaphore. Holding the i_mutex chain locks - * out mkdir() and rmdir(), who might be racing us. + * do not need the subsystem semaphore. Holding configfs_dirent_lock helps + * locking out mkdir() and rmdir(), who might be racing us. */ /* @@ -933,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * do that so we can unlock it if we find nothing. * * Here we do a depth-first search of the dentry hierarchy looking for - * our object. We take i_mutex on each step of the way down. IT IS - * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, - * we'll drop the i_mutex. + * our object. + * We deliberately ignore items tagged as dropping since they are virtually + * dead, as well as items in the middle of attachment since they virtually + * do not exist yet. This completes the locking out of racing mkdir() and + * rmdir(). + * Note: subdirectories in the middle of attachment start with s_type = + * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When + * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of + * s_type is in configfs_new_dirent(), which has configfs_dirent_lock. * - * If the target is not found, -ENOENT is bubbled up and we have released - * all locks. If the target was found, the locks will be cleared by - * configfs_depend_rollback(). + * If the target is not found, -ENOENT is bubbled up. * * This adds a requirement that all config_items be unique! * - * This is recursive because the locking traversal is tricky. There isn't + * This is recursive. There isn't * much on the stack, though, so folks that need this function - be careful * about your stack! Patches will be accepted to make it iterative. */ @@ -955,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin, BUG_ON(!origin || !sd); - /* Lock this guy on the way down */ - mutex_lock(&sd->s_dentry->d_inode->i_mutex); if (sd->s_element == target) /* Boo-yah */ goto out; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { - if (child_sd->s_type & CONFIGFS_DIR) { + if ((child_sd->s_type & CONFIGFS_DIR) && + !(child_sd->s_type & CONFIGFS_USET_DROPPING) && + !(child_sd->s_type & CONFIGFS_USET_CREATING)) { ret = configfs_depend_prep(child_sd->s_dentry, target); if (!ret) @@ -970,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin, } /* We looped all our children and didn't find target */ - mutex_unlock(&sd->s_dentry->d_inode->i_mutex); ret = -ENOENT; out: return ret; } -/* - * This is ONLY called if configfs_depend_prep() did its job. So we can - * trust the entire path from item back up to origin. - * - * We walk backwards from item, unlocking each i_mutex. We finish by - * unlocking origin. - */ -static void configfs_depend_rollback(struct dentry *origin, - struct config_item *item) -{ - struct dentry *dentry = item->ci_dentry; - - while (dentry != origin) { - mutex_unlock(&dentry->d_inode->i_mutex); - dentry = dentry->d_parent; - } - - mutex_unlock(&origin->d_inode->i_mutex); -} - int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target) { @@ -1037,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys, /* Ok, now we can trust subsys/s_item */ - /* Scan the tree, locking i_mutex recursively, return 0 if found */ + spin_lock(&configfs_dirent_lock); + /* Scan the tree, return 0 if found */ ret = configfs_depend_prep(subsys_sd->s_dentry, target); if (ret) - goto out_unlock_fs; + goto out_unlock_dirent_lock; - /* We hold all i_mutexes from the subsystem down to the target */ + /* + * We are sure that the item is not about to be removed by rmdir(), and + * not in the middle of attachment by mkdir(). + */ p = target->ci_dentry->d_fsdata; p->s_dependent_count += 1; - configfs_depend_rollback(subsys_sd->s_dentry, target); - +out_unlock_dirent_lock: + spin_unlock(&configfs_dirent_lock); out_unlock_fs: mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); @@ -1072,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, struct configfs_dirent *sd; /* - * Since we can trust everything is pinned, we just need i_mutex - * on the item. + * Since we can trust everything is pinned, we just need + * configfs_dirent_lock. */ - mutex_lock(&target->ci_dentry->d_inode->i_mutex); + spin_lock(&configfs_dirent_lock); sd = target->ci_dentry->d_fsdata; BUG_ON(sd->s_dependent_count < 1); @@ -1086,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, * After this unlock, we cannot trust the item to stay alive! * DO NOT REFERENCE item after this unlock. */ - mutex_unlock(&target->ci_dentry->d_inode->i_mutex); + spin_unlock(&configfs_dirent_lock); } EXPORT_SYMBOL(configfs_undepend_item); @@ -1286,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; - /* - * Here's where we check for dependents. We're protected by - * i_mutex. - */ - if (sd->s_dependent_count) - return -EBUSY; - /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; @@ -1316,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); - ret = configfs_detach_prep(dentry, &wait_mutex); - if (ret) - configfs_detach_rollback(dentry); + /* + * Here's where we check for dependents. We're protected by + * configfs_dirent_lock. + * If no dependent, atomically tag the item as dropping. + */ + ret = sd->s_dependent_count ? -EBUSY : 0; + if (!ret) { + ret = configfs_detach_prep(dentry, &wait_mutex); + if (ret) + configfs_detach_rollback(dentry); + } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); @@ -1429,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { - file->private_data = configfs_new_dirent(parent_sd, NULL); + file->private_data = configfs_new_dirent(parent_sd, NULL, 0); if (IS_ERR(file->private_data)) err = PTR_ERR(file->private_data); else diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5d349d38e056..4921e7426d95 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -33,10 +33,15 @@ #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/sched.h> +#include <linux/lockdep.h> #include <linux/configfs.h> #include "configfs_internal.h" +#ifdef CONFIG_LOCKDEP +static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; +#endif + extern struct super_block * configfs_sb; static const struct address_space_operations configfs_aops = { @@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) return inode; } +#ifdef CONFIG_LOCKDEP + +static void configfs_set_inode_lock_class(struct configfs_dirent *sd, + struct inode *inode) +{ + int depth = sd->s_depth; + + if (depth > 0) { + if (depth <= ARRAY_SIZE(default_group_class)) { + lockdep_set_class(&inode->i_mutex, + &default_group_class[depth - 1]); + } else { + /* + * In practice the maximum level of locking depth is + * already reached. Just inform about possible reasons. + */ + printk(KERN_INFO "configfs: Too many levels of inodes" + " for the locking correctness validator.\n"); + printk(KERN_INFO "Spurious warnings may appear.\n"); + } + } +} + +#else /* CONFIG_LOCKDEP */ + +static void configfs_set_inode_lock_class(struct configfs_dirent *sd, + struct inode *inode) +{ +} + +#endif /* CONFIG_LOCKDEP */ + int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) { int error = 0; @@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode * struct inode *p_inode = dentry->d_parent->d_inode; p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; } + configfs_set_inode_lock_class(sd, inode); goto Proceed; } else diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 33a90120f6ad..4d74fc72c195 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -67,6 +67,8 @@ static int debugfs_u8_get(void *data, u64 *val) return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value @@ -95,6 +97,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); struct dentry *debugfs_create_u8(const char *name, mode_t mode, struct dentry *parent, u8 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u8_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u8_wo); + return debugfs_create_file(name, mode, parent, value, &fops_u8); } EXPORT_SYMBOL_GPL(debugfs_create_u8); @@ -110,6 +119,8 @@ static int debugfs_u16_get(void *data, u64 *val) return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); /** * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value @@ -138,6 +149,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); struct dentry *debugfs_create_u16(const char *name, mode_t mode, struct dentry *parent, u16 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u16_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u16_wo); + return debugfs_create_file(name, mode, parent, value, &fops_u16); } EXPORT_SYMBOL_GPL(debugfs_create_u16); @@ -153,6 +171,8 @@ static int debugfs_u32_get(void *data, u64 *val) return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); /** * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value @@ -181,6 +201,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); struct dentry *debugfs_create_u32(const char *name, mode_t mode, struct dentry *parent, u32 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u32_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u32_wo); + return debugfs_create_file(name, mode, parent, value, &fops_u32); } EXPORT_SYMBOL_GPL(debugfs_create_u32); @@ -197,6 +224,8 @@ static int debugfs_u64_get(void *data, u64 *val) return 0; } DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); /** * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value @@ -225,15 +254,28 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); struct dentry *debugfs_create_u64(const char *name, mode_t mode, struct dentry *parent, u64 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u64_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_u64_wo); + return debugfs_create_file(name, mode, parent, value, &fops_u64); } EXPORT_SYMBOL_GPL(debugfs_create_u64); DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n"); DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n"); DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); /* * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value @@ -256,6 +298,13 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n" struct dentry *debugfs_create_x8(const char *name, mode_t mode, struct dentry *parent, u8 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_x8_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_x8_wo); + return debugfs_create_file(name, mode, parent, value, &fops_x8); } EXPORT_SYMBOL_GPL(debugfs_create_x8); @@ -273,6 +322,13 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8); struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_x16_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_x16_wo); + return debugfs_create_file(name, mode, parent, value, &fops_x16); } EXPORT_SYMBOL_GPL(debugfs_create_x16); @@ -290,6 +346,13 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value) { + /* if there are no write bits set, make read only */ + if (!(mode & S_IWUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_x32_ro); + /* if there are no read bits set, make write only */ + if (!(mode & S_IRUGO)) + return debugfs_create_file(name, mode, parent, value, &fops_x32_wo); + return debugfs_create_file(name, mode, parent, value, &fops_x32); } EXPORT_SYMBOL_GPL(debugfs_create_x32); @@ -419,7 +482,7 @@ static const struct file_operations fops_blob = { }; /** - * debugfs_create_blob - create a debugfs file that is used to read and write a binary blob + * debugfs_create_blob - create a debugfs file that is used to read a binary blob * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 0662ba6de85a..d22438ef7674 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -403,6 +403,7 @@ void debugfs_remove_recursive(struct dentry *dentry) } child = list_entry(parent->d_subdirs.next, struct dentry, d_u.d_child); + next_sibling: /* * If "child" isn't empty, walk down the tree and @@ -417,6 +418,16 @@ void debugfs_remove_recursive(struct dentry *dentry) __debugfs_remove(child, parent); if (parent->d_subdirs.next == &child->d_u.d_child) { /* + * Try the next sibling. + */ + if (child->d_u.d_child.next != &parent->d_subdirs) { + child = list_entry(child->d_u.d_child.next, + struct dentry, + d_u.d_child); + goto next_sibling; + } + + /* * Avoid infinite loop if we fail to remove * one dentry. */ diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 858fba14aaa6..c4dfa1dcc86f 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) spin_unlock(&ls->ls_recover_list_lock); if (!found) - de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); + de = kzalloc(sizeof(struct dlm_direntry) + len, + ls->ls_allocation); return de; } @@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls) dlm_dir_clear(ls); - last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); + last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation); if (!last_name) goto out; @@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, if (namelen > DLM_RESNAME_MAXLEN) return -EINVAL; - de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); + de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation); if (!de) return -ENOMEM; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index cd8e2df3c295..d489fcc86713 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -384,7 +384,7 @@ static void threads_stop(void) dlm_astd_stop(); } -static int new_lockspace(char *name, int namelen, void **lockspace, +static int new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { struct dlm_ls *ls; @@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace, break; } ls->ls_create_count++; - module_put(THIS_MODULE); - error = 1; /* not an error, return 0 */ + *lockspace = ls; + error = 1; break; } spin_unlock(&lslist_lock); - if (error < 0) - goto out; if (error) - goto ret_zero; + goto out; error = -ENOMEM; @@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace, dlm_create_debug_file(ls); log_debug(ls, "join complete"); - ret_zero: *lockspace = ls; return 0; @@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, return error; } -int dlm_new_lockspace(char *name, int namelen, void **lockspace, +int dlm_new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { int error = 0; @@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, error = new_lockspace(name, namelen, lockspace, flags, lvblen); if (!error) ls_count++; - else if (!ls_count) + if (error > 0) + error = 0; + if (!ls_count) threads_stop(); out: mutex_unlock(&ls_lock); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 609108a83267..cdb580a9c7a2 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk) lowcomms_write_space(sk); } +int dlm_lowcomms_connect_node(int nodeid) +{ + struct connection *con; + + if (nodeid == dlm_our_nodeid()) + return 0; + + con = nodeid2con(nodeid, GFP_NOFS); + if (!con) + return -ENOMEM; + lowcomms_connect_sock(con); + return 0; +} + /* Make a socket active */ static int add_sock(struct socket *sock, struct connection *con) { @@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con, return; } - new_con = nodeid2con(nodeid, GFP_KERNEL); + new_con = nodeid2con(nodeid, GFP_NOFS); if (!new_con) return; @@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con) * the same time and the connections cross on the wire. * In this case we store the incoming one in "othercon" */ - newcon = nodeid2con(nodeid, GFP_KERNEL); + newcon = nodeid2con(nodeid, GFP_NOFS); if (!newcon) { result = -ENOMEM; goto accept_err; @@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con) struct connection *othercon = newcon->othercon; if (!othercon) { - othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); + othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); if (!othercon) { log_print("failed to allocate incoming socket"); mutex_unlock(&newcon->sock_mutex); @@ -1421,7 +1435,7 @@ static int work_start(void) static void stop_conn(struct connection *con) { con->flags |= 0x0F; - if (con->sock) + if (con->sock && con->sock->sk) con->sock->sk->sk_user_data = NULL; } diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index a9a9618c0d3f..1311e6426287 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void); int dlm_lowcomms_close(int nodeid); void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void dlm_lowcomms_commit_buffer(void *mh); +int dlm_lowcomms_connect_node(int nodeid); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 26133f05ae3a..b128775913b2 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -17,6 +17,7 @@ #include "recover.h" #include "rcom.h" #include "config.h" +#include "lowcomms.h" static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { @@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) static int dlm_add_member(struct dlm_ls *ls, int nodeid) { struct dlm_member *memb; - int w; + int w, error; - memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); if (!memb) return -ENOMEM; @@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid) return w; } + error = dlm_lowcomms_connect_node(nodeid); + if (error < 0) { + kfree(memb); + return error; + } + memb->nodeid = nodeid; memb->weight = w; add_ordered_member(ls, memb); @@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls) ls->ls_total_weight = total; - array = kmalloc(sizeof(int) * total, GFP_KERNEL); + array = kmalloc(sizeof(int) * total, ls->ls_allocation); if (!array) return; @@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) continue; log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); - memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); if (!memb) return -ENOMEM; memb->nodeid = rv->new[i]; @@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls) int *ids = NULL, *new = NULL; int error, ids_count = 0, new_count = 0; - rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); + rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation); if (!rv) return -ENOMEM; diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index daa4183fbb84..7a2307c08911 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) struct rq_entry *e; int length = ms->m_header.h_length - sizeof(struct dlm_message); - e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); + e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation); if (!e) { log_print("dlm_add_requestqueue: out of memory len %d", length); return; diff --git a/fs/drop_caches.c b/fs/drop_caches.c index b6a719a909f8..a2edb7913447 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -24,7 +24,7 @@ static void drop_pagecache_sb(struct super_block *sb) continue; __iget(inode); spin_unlock(&inode_lock); - __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); + invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; spin_lock(&inode_lock); diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 49308a29798a..7ee6f7e3a608 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -5,12 +5,12 @@ */ #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include "efs.h" static int efs_readdir(struct file *, void *, filldir_t); const struct file_operations efs_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = efs_readdir, }; @@ -33,8 +33,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { if (inode->i_size & (EFS_DIRBSIZE-1)) printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); - lock_kernel(); - /* work out where this entry can be found */ block = filp->f_pos >> EFS_DIRBSIZE_BITS; @@ -107,7 +105,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; out: - unlock_kernel(); return 0; } diff --git a/fs/efs/namei.c b/fs/efs/namei.c index c3fb5f9c4a44..1511bf9e5f80 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -8,7 +8,6 @@ #include <linux/buffer_head.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/exportfs.h> #include "efs.h" @@ -63,16 +62,12 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei efs_ino_t inodenum; struct inode * inode = NULL; - lock_kernel(); inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); if (inodenum) { inode = efs_iget(dir->i_sb, inodenum); - if (IS_ERR(inode)) { - unlock_kernel(); + if (IS_ERR(inode)) return ERR_CAST(inode); - } } - unlock_kernel(); return d_splice_alias(inode, dentry); } @@ -115,11 +110,9 @@ struct dentry *efs_get_parent(struct dentry *child) struct dentry *parent = ERR_PTR(-ENOENT); efs_ino_t ino; - lock_kernel(); ino = efs_find_entry(child->d_inode, "..", 2); if (ino) parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino)); - unlock_kernel(); return parent; } diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 41911ec83aaf..75117d0dac2b 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> #include "efs.h" static int efs_symlink_readpage(struct file *file, struct page *page) @@ -22,9 +21,8 @@ static int efs_symlink_readpage(struct file *file, struct page *page) err = -ENAMETOOLONG; if (size > 2 * EFS_BLOCKSIZE) - goto fail_notlocked; + goto fail; - lock_kernel(); /* read first 512 bytes of link target */ err = -EIO; bh = sb_bread(inode->i_sb, efs_bmap(inode, 0)); @@ -40,14 +38,11 @@ static int efs_symlink_readpage(struct file *file, struct page *page) brelse(bh); } link[size] = '\0'; - unlock_kernel(); SetPageUptodate(page); kunmap(page); unlock_page(page); return 0; fail: - unlock_kernel(); -fail_notlocked: SetPageError(page); kunmap(page); unlock_page(page); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index b2bbf45039e0..f2e5811936d0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -27,7 +27,7 @@ struct ext2_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index d81ef2fdb08e..e0c745451715 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -129,12 +129,15 @@ fail: static inline struct posix_acl * ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) { - struct posix_acl *acl = EXT3_ACL_NOT_CACHED; + struct posix_acl *acl = ACCESS_ONCE(*i_acl); - spin_lock(&inode->i_lock); - if (*i_acl != EXT3_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); + if (acl) { + spin_lock(&inode->i_lock); + acl = *i_acl; + if (acl != EXT3_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + } return acl; } diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 647e0d65a284..605aeed96d68 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -129,12 +129,15 @@ fail: static inline struct posix_acl * ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) { - struct posix_acl *acl = EXT4_ACL_NOT_CACHED; + struct posix_acl *acl = ACCESS_ONCE(*i_acl); - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); + if (acl) { + spin_lock(&inode->i_lock); + acl = *i_acl; + if (acl != EXT4_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + } return acl; } diff --git a/fs/fat/cache.c b/fs/fat/cache.c index b42602298087..923990e4f16e 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -241,7 +241,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) while (*fclus < cluster) { /* prevent the infinite loop of cluster chain */ if (*fclus > limit) { - fat_fs_panic(sb, "%s: detected the cluster chain loop" + fat_fs_error(sb, "%s: detected the cluster chain loop" " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; @@ -252,7 +252,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) if (nr < 0) goto out; else if (nr == FAT_ENT_FREE) { - fat_fs_panic(sb, "%s: invalid cluster chain" + fat_fs_error(sb, "%s: invalid cluster chain" " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; @@ -285,7 +285,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) if (ret < 0) return ret; else if (ret == FAT_ENT_EOF) { - fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", + fat_fs_error(sb, "%s: request beyond EOF (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); return -EIO; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index f3500294eec5..38ff75a0fe22 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -22,6 +22,19 @@ #include <asm/uaccess.h> #include "fat.h" +/* + * Maximum buffer size of short name. + * [(MSDOS_NAME + '.') * max one char + nul] + * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] + */ +#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) +/* + * Maximum buffer size of unicode chars from slots. + * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] + */ +#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) +#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) + static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, struct msdos_dir_entry *de) @@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, unsigned char *buf, int size) { if (sbi->options.utf8) - return utf8_wcstombs(buf, uni, size); + return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, + UTF16_HOST_ENDIAN, buf, size); else return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, sbi->nls_io); @@ -325,19 +339,6 @@ parse_long: } /* - * Maximum buffer size of short name. - * [(MSDOS_NAME + '.') * max one char + nul] - * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] - */ -#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) -/* - * Maximum buffer size of unicode chars from slots. - * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] - */ -#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) -#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) - -/* * Return values: negative -> error, 0 -> not found, positive -> found, * value is the total amount of slots, including the shortname entry. */ @@ -1334,7 +1335,7 @@ found: goto error_remove; } if (dir->i_size & (sbi->cluster_size - 1)) { - fat_fs_panic(sb, "Odd directory size"); + fat_fs_error(sb, "Odd directory size"); dir->i_size = (dir->i_size + sbi->cluster_size - 1) & ~((loff_t)sbi->cluster_size - 1); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e4d88527b5dd..adb0e72a176d 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -17,6 +17,10 @@ #define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ #define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ +#define FAT_ERRORS_CONT 1 /* ignore error and continue */ +#define FAT_ERRORS_PANIC 2 /* panic on error */ +#define FAT_ERRORS_RO 3 /* remount r/o on error */ + struct fat_mount_options { uid_t fs_uid; gid_t fs_gid; @@ -26,6 +30,7 @@ struct fat_mount_options { char *iocharset; /* Charset used for filename input/display */ unsigned short shortname; /* flags for shortname display/create rule */ unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned char errors; /* On error: continue, panic, remount-ro */ unsigned short allow_utime;/* permission for setting the [am]time */ unsigned quiet:1, /* set = fake successful chmods and chowns */ showexec:1, /* set = only set x bit for com/exe/bat */ @@ -316,7 +321,7 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); /* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) +extern void fat_fs_error(struct super_block *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))) __cold; extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 618f5305c2e4..a81037721a6f 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -348,7 +348,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) if (entry < FAT_START_ENT || sbi->max_cluster <= entry) { fatent_brelse(fatent); - fat_fs_panic(sb, "invalid access to FAT (entry 0x%08x)", entry); + fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } @@ -560,7 +560,7 @@ int fat_free_clusters(struct inode *inode, int cluster) err = cluster; goto error; } else if (cluster == FAT_ENT_FREE) { - fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", + fat_fs_error(sb, "%s: deleting FAT entry beyond EOF", __func__); err = -EIO; goto error; diff --git a/fs/fat/file.c b/fs/fat/file.c index e955a56b4e5e..b28ea646ff60 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -18,106 +18,112 @@ #include <linux/security.h> #include "fat.h" -int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { + u32 attr; + + mutex_lock(&inode->i_mutex); + attr = fat_make_attrs(inode); + mutex_unlock(&inode->i_mutex); + + return put_user(attr, user_attr); +} + +static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) +{ + struct inode *inode = file->f_path.dentry->d_inode; struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - u32 __user *user_attr = (u32 __user *)arg; + int is_dir = S_ISDIR(inode->i_mode); + u32 attr, oldattr; + struct iattr ia; + int err; - switch (cmd) { - case FAT_IOCTL_GET_ATTRIBUTES: - { - u32 attr; + err = get_user(attr, user_attr); + if (err) + goto out; - mutex_lock(&inode->i_mutex); - attr = fat_make_attrs(inode); - mutex_unlock(&inode->i_mutex); + mutex_lock(&inode->i_mutex); + err = mnt_want_write(file->f_path.mnt); + if (err) + goto out_unlock_inode; - return put_user(attr, user_attr); + /* + * ATTR_VOLUME and ATTR_DIR cannot be changed; this also + * prevents the user from turning us into a VFAT + * longname entry. Also, we obviously can't set + * any of the NTFS attributes in the high 24 bits. + */ + attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); + /* Merge in ATTR_VOLUME and ATTR_DIR */ + attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | + (is_dir ? ATTR_DIR : 0); + oldattr = fat_make_attrs(inode); + + /* Equivalent to a chmod() */ + ia.ia_valid = ATTR_MODE | ATTR_CTIME; + ia.ia_ctime = current_fs_time(inode->i_sb); + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } - case FAT_IOCTL_SET_ATTRIBUTES: - { - u32 attr, oldattr; - int err, is_dir = S_ISDIR(inode->i_mode); - struct iattr ia; - err = get_user(attr, user_attr); - if (err) - return err; + /* The root directory has no attributes */ + if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { + err = -EINVAL; + goto out_drop_write; + } - mutex_lock(&inode->i_mutex); - - err = mnt_want_write(filp->f_path.mnt); - if (err) - goto up_no_drop_write; - - /* - * ATTR_VOLUME and ATTR_DIR cannot be changed; this also - * prevents the user from turning us into a VFAT - * longname entry. Also, we obviously can't set - * any of the NTFS attributes in the high 24 bits. - */ - attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); - /* Merge in ATTR_VOLUME and ATTR_DIR */ - attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | - (is_dir ? ATTR_DIR : 0); - oldattr = fat_make_attrs(inode); - - /* Equivalent to a chmod() */ - ia.ia_valid = ATTR_MODE | ATTR_CTIME; - ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) - ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); - else { - ia.ia_mode = fat_make_mode(sbi, attr, - S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); - } + if (sbi->options.sys_immutable && + ((attr | oldattr) & ATTR_SYS) && + !capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto out_drop_write; + } - /* The root directory has no attributes */ - if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { - err = -EINVAL; - goto up; - } + /* + * The security check is questionable... We single + * out the RO attribute for checking by the security + * module, just because it maps to a file mode. + */ + err = security_inode_setattr(file->f_path.dentry, &ia); + if (err) + goto out_drop_write; - if (sbi->options.sys_immutable) { - if ((attr | oldattr) & ATTR_SYS) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto up; - } - } - } + /* This MUST be done before doing anything irreversible... */ + err = fat_setattr(file->f_path.dentry, &ia); + if (err) + goto out_drop_write; + + fsnotify_change(file->f_path.dentry, ia.ia_valid); + if (sbi->options.sys_immutable) { + if (attr & ATTR_SYS) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= S_IMMUTABLE; + } - /* - * The security check is questionable... We single - * out the RO attribute for checking by the security - * module, just because it maps to a file mode. - */ - err = security_inode_setattr(filp->f_path.dentry, &ia); - if (err) - goto up; - - /* This MUST be done before doing anything irreversible... */ - err = fat_setattr(filp->f_path.dentry, &ia); - if (err) - goto up; - - fsnotify_change(filp->f_path.dentry, ia.ia_valid); - if (sbi->options.sys_immutable) { - if (attr & ATTR_SYS) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= S_IMMUTABLE; - } + fat_save_attrs(inode, attr); + mark_inode_dirty(inode); +out_drop_write: + mnt_drop_write(file->f_path.mnt); +out_unlock_inode: + mutex_unlock(&inode->i_mutex); +out: + return err; +} - fat_save_attrs(inode, attr); - mark_inode_dirty(inode); -up: - mnt_drop_write(filp->f_path.mnt); -up_no_drop_write: - mutex_unlock(&inode->i_mutex); - return err; - } +int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + u32 __user *user_attr = (u32 __user *)arg; + + switch (cmd) { + case FAT_IOCTL_GET_ATTRIBUTES: + return fat_ioctl_get_attributes(inode, user_attr); + case FAT_IOCTL_SET_ATTRIBUTES: + return fat_ioctl_set_attributes(filp, user_attr); default: return -ENOTTY; /* Inappropriate ioctl for device */ } @@ -225,7 +231,7 @@ static int fat_free(struct inode *inode, int skip) fatent_brelse(&fatent); return 0; } else if (ret == FAT_ENT_FREE) { - fat_fs_panic(sb, + fat_fs_error(sb, "%s: invalid cluster chain (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); ret = -EIO; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 51a5ecf9000a..304b411cb8bc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -76,7 +76,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, return 0; if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { - fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", + fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)", MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); return -EIO; } @@ -856,6 +856,12 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",flush"); if (opts->tz_utc) seq_puts(m, ",tz=UTC"); + if (opts->errors == FAT_ERRORS_CONT) + seq_puts(m, ",errors=continue"); + else if (opts->errors == FAT_ERRORS_PANIC) + seq_puts(m, ",errors=panic"); + else + seq_puts(m, ",errors=remount-ro"); return 0; } @@ -868,7 +874,8 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, + Opt_err_panic, Opt_err_ro, Opt_err, }; static const match_table_t fat_tokens = { @@ -891,6 +898,11 @@ static const match_table_t fat_tokens = { {Opt_showexec, "showexec"}, {Opt_debug, "debug"}, {Opt_immutable, "sys_immutable"}, + {Opt_flush, "flush"}, + {Opt_tz_utc, "tz=UTC"}, + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, {Opt_obsolate, "conv=binary"}, {Opt_obsolate, "conv=text"}, {Opt_obsolate, "conv=auto"}, @@ -902,8 +914,6 @@ static const match_table_t fat_tokens = { {Opt_obsolate, "cvf_format=%20s"}, {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, - {Opt_flush, "flush"}, - {Opt_tz_utc, "tz=UTC"}, {Opt_err, NULL}, }; static const match_table_t msdos_tokens = { @@ -973,6 +983,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->numtail = 1; opts->usefree = opts->nocase = 0; opts->tz_utc = 0; + opts->errors = FAT_ERRORS_RO; *debug = 0; if (!options) @@ -1065,6 +1076,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_tz_utc: opts->tz_utc = 1; break; + case Opt_err_cont: + opts->errors = FAT_ERRORS_CONT; + break; + case Opt_err_panic: + opts->errors = FAT_ERRORS_PANIC; + break; + case Opt_err_ro: + opts->errors = FAT_ERRORS_RO; + break; /* msdos specific */ case Opt_dots: diff --git a/fs/fat/misc.c b/fs/fat/misc.c index ac39ebcc1496..a6c20473dfd7 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -12,14 +12,19 @@ #include "fat.h" /* - * fat_fs_panic reports a severe file system problem and sets the file system - * read-only. The file system can be made writable again by remounting it. + * fat_fs_error reports a file system problem that might indicate fa data + * corruption/inconsistency. Depending on 'errors' mount option the + * panic() is called, or error message is printed FAT and nothing is done, + * or filesystem is remounted read-only (default behavior). + * In case the file system is remounted read-only, it can be made writable + * again by remounting it. */ -void fat_fs_panic(struct super_block *s, const char *fmt, ...) +void fat_fs_error(struct super_block *s, const char *fmt, ...) { + struct fat_mount_options *opts = &MSDOS_SB(s)->options; va_list args; - printk(KERN_ERR "FAT: Filesystem panic (dev %s)\n", s->s_id); + printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); printk(KERN_ERR " "); va_start(args, fmt); @@ -27,13 +32,14 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...) va_end(args); printk("\n"); - if (!(s->s_flags & MS_RDONLY)) { + if (opts->errors == FAT_ERRORS_PANIC) + panic(" FAT fs panic from previous error\n"); + else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { s->s_flags |= MS_RDONLY; printk(KERN_ERR " File system has been set read-only\n"); } } - -EXPORT_SYMBOL_GPL(fat_fs_panic); +EXPORT_SYMBOL_GPL(fat_fs_error); /* Flushes the number of free clusters on FAT32 */ /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ @@ -124,7 +130,7 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_panic(sb, "clusters badly computed (%d != %llu)", + fat_fs_error(sb, "clusters badly computed (%d != %llu)", new_fclus, (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 20f522861355..82f88733b681 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -608,7 +608,7 @@ error_inode: sinfo.bh = NULL; } if (corrupt < 0) { - fat_fs_panic(new_dir->i_sb, + fat_fs_error(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", __func__, sinfo.i_pos); } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b50ecbe97f83..73471b7ecc8c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, if (utf8) { int name_len = strlen(name); - *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX); + *outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname); /* * We stripped '.'s before and set len appropriately, - * but utf8_mbstowcs doesn't care about len + * but utf8s_to_utf16s doesn't care about len */ *outlen -= (name_len - len); @@ -1030,7 +1030,7 @@ error_inode: sinfo.bh = NULL; } if (corrupt < 0) { - fat_fs_panic(new_dir->i_sb, + fat_fs_error(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", __func__, sinfo.i_pos); } diff --git a/fs/fcntl.c b/fs/fcntl.c index 1ad703150dee..a040b764f8e3 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -198,15 +198,19 @@ static int setfl(int fd, struct file * filp, unsigned long arg) } static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, - uid_t uid, uid_t euid, int force) + int force) { write_lock_irq(&filp->f_owner.lock); if (force || !filp->f_owner.pid) { put_pid(filp->f_owner.pid); filp->f_owner.pid = get_pid(pid); filp->f_owner.pid_type = type; - filp->f_owner.uid = uid; - filp->f_owner.euid = euid; + + if (pid) { + const struct cred *cred = current_cred(); + filp->f_owner.uid = cred->uid; + filp->f_owner.euid = cred->euid; + } } write_unlock_irq(&filp->f_owner.lock); } @@ -214,14 +218,13 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - const struct cred *cred = current_cred(); int err; - + err = security_file_set_fowner(filp); if (err) return err; - f_modown(filp, pid, type, cred->uid, cred->euid, force); + f_modown(filp, pid, type, force); return 0; } EXPORT_SYMBOL(__f_setown); @@ -247,7 +250,7 @@ EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { - f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1); + f_modown(filp, NULL, PIDTYPE_PID, 1); } pid_t f_getown(struct file *filp) @@ -425,14 +428,20 @@ static inline int sigio_perm(struct task_struct *p, } static void send_sigio_to_task(struct task_struct *p, - struct fown_struct *fown, + struct fown_struct *fown, int fd, int reason) { - if (!sigio_perm(p, fown, fown->signum)) + /* + * F_SETSIG can change ->signum lockless in parallel, make + * sure we read it once and use the same value throughout. + */ + int signum = ACCESS_ONCE(fown->signum); + + if (!sigio_perm(p, fown, signum)) return; - switch (fown->signum) { + switch (signum) { siginfo_t si; default: /* Queue a rt signal with the appropriate fd as its @@ -441,7 +450,7 @@ static void send_sigio_to_task(struct task_struct *p, delivered even if we can't queue. Failure to queue in this case _should_ be reported; we fall back to SIGIO in that case. --sct */ - si.si_signo = fown->signum; + si.si_signo = signum; si.si_errno = 0; si.si_code = reason; /* Make sure we are called with one of the POLL_* @@ -453,7 +462,7 @@ static void send_sigio_to_task(struct task_struct *p, else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!group_send_sig_info(fown->signum, &si, p)) + if (!group_send_sig_info(signum, &si, p)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 40308e98c6a4..caf049146ca2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -321,7 +321,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); inode->i_state &= ~I_SYNC; - if (!(inode->i_state & I_FREEING)) { + if (!(inode->i_state & (I_FREEING | I_CLEAR))) { if (!(inode->i_state & I_DIRTY) && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { /* @@ -492,7 +492,7 @@ void generic_sync_sb_inodes(struct super_block *sb, break; } - if (inode->i_state & I_NEW) { + if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; } @@ -523,7 +523,7 @@ void generic_sync_sb_inodes(struct super_block *sb, if (current_is_pdflush() && !writeback_acquire(bdi)) break; - BUG_ON(inode->i_state & I_FREEING); + BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); __iget(inode); pages_skipped = wbc->pages_skipped; __writeback_single_inode(inode, wbc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0df55a52929..d8673ccf90b7 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -19,7 +19,6 @@ #include <linux/random.h> #include <linux/sched.h> #include <linux/exportfs.h> -#include <linux/smp_lock.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -260,9 +259,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, static void fuse_umount_begin(struct super_block *sb) { - lock_kernel(); fuse_abort_conn(get_fuse_conn_super(sb)); - unlock_kernel(); } static void fuse_send_destroy(struct fuse_conn *fc) diff --git a/fs/inode.c b/fs/inode.c index a88baebf77cf..f643be565df8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(touch_atime); * for writeback. Note that this function is meant exclusively for * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the - * S_NOCTIME inode flag, e.g. for network filesystem where these + * S_NOCMTIME inode flag, e.g. for network filesystem where these * timestamps are handled by the server. */ diff --git a/fs/ioctl.c b/fs/ioctl.c index 286f38dfc6c0..001f8d3118f2 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -70,9 +70,7 @@ static int ioctl_fibmap(struct file *filp, int __user *p) res = get_user(block, p); if (res) return res; - lock_kernel(); res = mapping->a_ops->bmap(mapping, block); - unlock_kernel(); return put_user(res, p); } diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 92c14b850e9c..a048de81c093 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) return (op - ascii); } -/* Convert big endian wide character string to utf8 */ -static int -wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen) -{ - const __u8 *ip; - __u8 *op; - int size; - __u16 c; - - op = s; - ip = pwcs; - while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) { - c = (*ip << 8) | ip[1]; - if (c > 0x7f) { - size = utf8_wctomb(op, c, maxlen); - if (size == -1) { - /* Ignore character and move on */ - maxlen--; - } else { - op += size; - maxlen -= size; - } - } else { - *op++ = (__u8) c; - } - ip += 2; - inlen--; - } - return (op - s); -} - int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { @@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; if (utf8) { - len = wcsntombs_be(outname, de->name, - de->name_len[0] >> 1, PAGE_SIZE); + len = utf16s_to_utf8s((const wchar_t *) de->name, + de->name_len[0] >> 1, UTF16_BIG_ENDIAN, + outname, PAGE_SIZE); } else { len = uni16_to_x8(outname, (__be16 *) de->name, de->name_len[0] >> 1, nls); diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index bbbd5f202e37..41d6045dbeb0 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -391,6 +391,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) } XADaddress(xp, xaddr); XADlength(xp, xlen); + XADoffset(xp, prev); /* * only preserve the abnr flag within the xad flags * of the returned hint. diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 3aebe322271a..6ac693faae49 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -12,13 +12,14 @@ /* bitmap.c contains the code that handles the inode and block bitmaps */ #include "minix.h" -#include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/bitops.h> #include <linux/sched.h> static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; +static DEFINE_SPINLOCK(bitmap_lock); + static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits) { unsigned i, j, sum = 0; @@ -69,11 +70,11 @@ void minix_free_block(struct inode *inode, unsigned long block) return; } bh = sbi->s_zmap[zone]; - lock_kernel(); + spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_block (%s:%lu): bit already cleared\n", sb->s_id, block); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); return; } @@ -88,18 +89,18 @@ int minix_new_block(struct inode * inode) struct buffer_head *bh = sbi->s_zmap[i]; int j; - lock_kernel(); + spin_lock(&bitmap_lock); j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); if (j < bits_per_zone) { minix_set_bit(j, bh->b_data); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone + sbi->s_firstdatazone-1; if (j < sbi->s_firstdatazone || j >= sbi->s_nzones) break; return j; } - unlock_kernel(); + spin_unlock(&bitmap_lock); } return 0; } @@ -211,10 +212,10 @@ void minix_free_inode(struct inode * inode) minix_clear_inode(inode); /* clear on-disk copy */ bh = sbi->s_imap[ino]; - lock_kernel(); + spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_inode: bit %lu already cleared\n", bit); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); out: clear_inode(inode); /* clear in-memory copy */ @@ -237,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) j = bits_per_zone; bh = NULL; *error = -ENOSPC; - lock_kernel(); + spin_lock(&bitmap_lock); for (i = 0; i < sbi->s_imap_blocks; i++) { bh = sbi->s_imap[i]; j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); @@ -245,17 +246,17 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) break; } if (!bh || j >= bits_per_zone) { - unlock_kernel(); + spin_unlock(&bitmap_lock); iput(inode); return NULL; } if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */ - unlock_kernel(); + spin_unlock(&bitmap_lock); printk("minix_new_inode: bit already set\n"); iput(inode); return NULL; } - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone; if (!j || j > sbi->s_ninodes) { diff --git a/fs/minix/dir.c b/fs/minix/dir.c index e5f206467e40..d407e7a0b6fe 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -11,7 +11,6 @@ #include "minix.h" #include <linux/buffer_head.h> #include <linux/highmem.h> -#include <linux/smp_lock.h> #include <linux/swap.h> typedef struct minix_dir_entry minix_dirent; @@ -20,6 +19,7 @@ typedef struct minix3_dir_entry minix3_dirent; static int minix_readdir(struct file *, void *, filldir_t); const struct file_operations minix_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = minix_readdir, .fsync = simple_fsync, @@ -102,8 +102,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) char *name; __u32 inumber; - lock_kernel(); - pos = (pos + chunk_size-1) & ~(chunk_size-1); if (pos >= inode->i_size) goto done; @@ -146,7 +144,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) done: filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; - unlock_kernel(); return 0; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index f91a23693597..74ea82d72164 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -35,8 +35,6 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); - lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; @@ -50,8 +48,6 @@ static void minix_put_super(struct super_block *sb) kfree(sbi->s_imap); sb->s_fs_info = NULL; kfree(sbi); - - unlock_kernel(); } static struct kmem_cache * minix_inode_cachep; diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 97645f112114..0ec6237a5970 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen, if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { int k; + unicode_t u; - k = utf8_mbtowc(&ec, iname, iname_end - iname); - if (k < 0) + k = utf8_to_utf32(iname, iname_end - iname, &u); + if (k < 0 || u > MAX_WCHAR_T) return -EINVAL; iname += k; + ec = u; } else { if (*iname == NCP_ESC) { int k; @@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen, if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { int k; - k = utf8_wctomb(iname, ec, iname_end - iname); + k = utf32_to_utf8(ec, iname, iname_end - iname); if (k < 0) { err = -ENAMETOOLONG; goto quit; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index a2ab2529b5ca..ceda50aad73c 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -31,7 +31,7 @@ static inline void nfs_inc_server_stats(const struct nfs_server *server, cpu = get_cpu(); iostats = per_cpu_ptr(server->io_stats, cpu); iostats->events[stat]++; - put_cpu_no_resched(); + put_cpu(); } static inline void nfs_inc_stats(const struct inode *inode, @@ -50,7 +50,7 @@ static inline void nfs_add_server_stats(const struct nfs_server *server, cpu = get_cpu(); iostats = per_cpu_ptr(server->io_stats, cpu); iostats->bytes[stat] += addend; - put_cpu_no_resched(); + put_cpu(); } static inline void nfs_add_stats(const struct inode *inode, @@ -71,7 +71,7 @@ static inline void nfs_add_fscache_stats(struct inode *inode, cpu = get_cpu(); iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); iostats->fscache[stat] += addend; - put_cpu_no_resched(); + put_cpu(); } #endif diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 064279e33bbb..36df60b6d8a4 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -31,21 +31,26 @@ #include "dat.h" #include "alloc.h" +struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) +{ + return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); +} + int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) { - __u64 ptr; + sector_t blocknr; int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); if (ret < 0) goto out; - if (bmap->b_pops->bpop_translate != NULL) { - ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); - if (ret < 0) - goto out; - *ptrp = ptr; + if (NILFS_BMAP_USE_VBN(bmap)) { + ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, + &blocknr); + if (!ret) + *ptrp = blocknr; } out: @@ -53,6 +58,16 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, return ret; } +int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, + unsigned maxblocks) +{ + int ret; + + down_read(&bmap->b_sem); + ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); + up_read(&bmap->b_sem); + return ret; +} /** * nilfs_bmap_lookup - find a record @@ -101,8 +116,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (n < 0) return n; ret = nilfs_btree_convert_and_insert( - bmap, key, ptr, keys, ptrs, n, - NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH); + bmap, key, ptr, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags |= NILFS_BMAP_LARGE; @@ -158,8 +172,7 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) if (n < 0) return n; ret = nilfs_direct_delete_and_convert( - bmap, key, keys, ptrs, n, - NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH); + bmap, key, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; @@ -417,38 +430,6 @@ void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) mark_inode_dirty(bmap->b_inode); } -int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr, - struct buffer_head **bhp) -{ - return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, - ptr, 0, bhp, 0); -} - -void nilfs_bmap_put_block(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - brelse(bh); -} - -int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr, - struct buffer_head **bhp) -{ - int ret; - - ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, - ptr, 0, bhp, 1); - if (ret < 0) - return ret; - set_buffer_nilfs_volatile(*bhp); - return 0; -} - -void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - nilfs_btnode_delete(bh); -} - __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { @@ -476,11 +457,6 @@ __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) return NILFS_BMAP_INVALID_PTR; } -static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) -{ - return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); -} - #define NILFS_BMAP_GROUP_DIV 8 __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) { @@ -493,64 +469,51 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) (entries_per_group / NILFS_BMAP_GROUP_DIV); } -static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req, + sector_t blocknr) { - return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req, - sector_t blocknr) -{ - nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req, - blocknr); -} + struct inode *dat = nilfs_bmap_get_dat(bmap); + int ret; -static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); + ret = nilfs_dat_prepare_start(dat, &req->bpr_req); + if (likely(!ret)) + nilfs_dat_commit_start(dat, &req->bpr_req, blocknr); + return ret; } -static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0); -} - -static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { - nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); + nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, + bmap->b_ptr_type == NILFS_BMAP_PTR_VS); } -static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); } @@ -566,128 +529,44 @@ int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); } -int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) +int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) { + struct inode *dat = nilfs_bmap_get_dat(bmap); int ret; - ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); + ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req); if (ret < 0) return ret; - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); + ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req); if (ret < 0) - bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); + nilfs_dat_abort_end(dat, &oldreq->bpr_req); return ret; } -void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) +void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) { - bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); - bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq); -} + struct inode *dat = nilfs_bmap_get_dat(bmap); -void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) -{ - bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); - bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq); + nilfs_dat_commit_end(dat, &oldreq->bpr_req, + bmap->b_ptr_type == NILFS_BMAP_PTR_VS); + nilfs_dat_commit_alloc(dat, &newreq->bpr_req); } -static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, - __u64 *ptrp) +void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) { - sector_t blocknr; - int ret; - - ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr); - if (ret < 0) - return ret; - if (ptrp != NULL) - *ptrp = blocknr; - return 0; -} + struct inode *dat = nilfs_bmap_get_dat(bmap); -static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - /* ignore target ptr */ - req->bpr_ptr = bmap->b_last_allocated_ptr++; - return 0; + nilfs_dat_abort_end(dat, &oldreq->bpr_req); + nilfs_dat_abort_alloc(dat, &newreq->bpr_req); } -static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - /* do nothing */ -} - -static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - bmap->b_last_allocated_ptr--; -} - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, - .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, - .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, - .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, - .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, - .bpop_commit_end_ptr = nilfs_bmap_commit_end_v, - .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, - - .bpop_translate = nilfs_bmap_translate_v, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, - .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, - .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, - .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, - .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, - .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt, - .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, - - .bpop_translate = nilfs_bmap_translate_v, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p, - .bpop_prepare_start_ptr = NULL, - .bpop_commit_start_ptr = NULL, - .bpop_abort_start_ptr = NULL, - .bpop_prepare_end_ptr = NULL, - .bpop_commit_end_ptr = NULL, - .bpop_abort_end_ptr = NULL, - - .bpop_translate = NULL, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { - .bpop_prepare_alloc_ptr = NULL, - .bpop_commit_alloc_ptr = NULL, - .bpop_abort_alloc_ptr = NULL, - .bpop_prepare_start_ptr = NULL, - .bpop_commit_start_ptr = NULL, - .bpop_abort_start_ptr = NULL, - .bpop_prepare_end_ptr = NULL, - .bpop_commit_end_ptr = NULL, - .bpop_abort_end_ptr = NULL, - - .bpop_translate = NULL, -}; - static struct lock_class_key nilfs_bmap_dat_lock_key; /** @@ -714,31 +593,26 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; switch (bmap->b_inode->i_ino) { case NILFS_DAT_INO: - bmap->b_pops = &nilfs_bmap_ptr_ops_p; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_P; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: - bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_VS; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; default: - bmap->b_pops = &nilfs_bmap_ptr_ops_v; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_VM; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; } return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? - nilfs_btree_init(bmap, - NILFS_BMAP_LARGE_LOW, - NILFS_BMAP_LARGE_HIGH) : - nilfs_direct_init(bmap, - NILFS_BMAP_SMALL_LOW, - NILFS_BMAP_SMALL_HIGH); + nilfs_btree_init(bmap) : nilfs_direct_init(bmap); } /** @@ -764,7 +638,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; - bmap->b_pops = &nilfs_bmap_ptr_ops_gc; + bmap->b_ptr_type = NILFS_BMAP_PTR_U; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; bmap->b_state = 0; diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 4f2708abb1ba..b2890cdcef12 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -64,6 +64,8 @@ struct nilfs_bmap_stats { */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); + int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *, + unsigned); int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); int (*bop_delete)(struct nilfs_bmap *, __u64); void (*bop_clear)(struct nilfs_bmap *); @@ -86,34 +88,6 @@ struct nilfs_bmap_operations { }; -/** - * struct nilfs_bmap_ptr_operations - bmap ptr operation table - */ -struct nilfs_bmap_ptr_operations { - int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - int (*bpop_prepare_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - sector_t); - void (*bpop_abort_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - int (*bpop_prepare_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_abort_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - - int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *); -}; - - #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) #define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) #define NILFS_BMAP_NEW_PTR_INIT \ @@ -131,11 +105,9 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) * @b_sem: semaphore * @b_inode: owner of bmap * @b_ops: bmap operation table - * @b_pops: bmap ptr operation table - * @b_low: low watermark of conversion - * @b_high: high watermark of conversion * @b_last_allocated_key: last allocated key for data block * @b_last_allocated_ptr: last allocated ptr for data block + * @b_ptr_type: pointer type * @b_state: state */ struct nilfs_bmap { @@ -146,14 +118,22 @@ struct nilfs_bmap { struct rw_semaphore b_sem; struct inode *b_inode; const struct nilfs_bmap_operations *b_ops; - const struct nilfs_bmap_ptr_operations *b_pops; - __u64 b_low; - __u64 b_high; __u64 b_last_allocated_key; __u64 b_last_allocated_ptr; + int b_ptr_type; int b_state; }; +/* pointer type */ +#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */ +#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single + version) */ +#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple + versions) */ +#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */ + +#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0) + /* state */ #define NILFS_BMAP_DIRTY 0x00000001 @@ -162,6 +142,7 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); +int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); @@ -182,7 +163,67 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); /* * Internal use only */ +struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); +int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); +static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + return nilfs_bmap_prepare_alloc_v(bmap, req); + /* ignore target ptr */ + req->bpr_ptr = bmap->b_last_allocated_ptr++; + return 0; +} + +static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_commit_alloc_v(bmap, req); +} + +static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_abort_alloc_v(bmap, req); + else + bmap->b_last_allocated_ptr--; +} + +int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); + +static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_bmap_prepare_end_v(bmap, req) : 0; +} + +static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_commit_end_v(bmap, req); +} + +static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_abort_end_v(bmap, req); +} + +int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *, + sector_t); int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); @@ -193,28 +234,20 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); -int nilfs_bmap_prepare_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); -void nilfs_bmap_commit_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); -void nilfs_bmap_abort_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); +int nilfs_bmap_prepare_update_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_update_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_update_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); -int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64, - struct buffer_head **); -void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *); -int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64, - struct buffer_head **); -void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *); - - /* Assume that bmap semaphore is locked. */ static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) { diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4cc07b2c30e0..7e0b61be212e 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -46,15 +46,18 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); } -static struct address_space_operations def_btnode_aops; +static struct address_space_operations def_btnode_aops = { + .sync_page = block_sync_page, +}; -void nilfs_btnode_cache_init(struct address_space *btnc) +void nilfs_btnode_cache_init(struct address_space *btnc, + struct backing_dev_info *bdi) { btnc->host = NULL; /* can safely set to host inode ? */ btnc->flags = 0; mapping_set_gfp_mask(btnc, GFP_NOFS); btnc->assoc_mapping = NULL; - btnc->backing_dev_info = &default_backing_dev_info; + btnc->backing_dev_info = bdi; btnc->a_ops = &def_btnode_aops; } diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 35faa86444a7..3e2275172ed6 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -38,7 +38,7 @@ struct nilfs_btnode_chkey_ctxt { }; void nilfs_btnode_cache_init_once(struct address_space *); -void nilfs_btnode_cache_init(struct address_space *); +void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, struct buffer_head **, int); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 6b37a2767293..aa412724b64e 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -29,6 +29,7 @@ #include "btnode.h" #include "btree.h" #include "alloc.h" +#include "dat.h" /** * struct nilfs_btree_path - A path on which B-tree operations are executed @@ -109,8 +110,7 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree, level < NILFS_BTREE_LEVEL_MAX; level++) { if (path[level].bp_bh != NULL) { - nilfs_bmap_put_block(&btree->bt_bmap, - path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = NULL; } /* sib_bh is released or deleted by prepare or commit @@ -123,10 +123,29 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree, } } - /* * B-tree node operations */ +static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, + struct buffer_head **bhp) +{ + struct address_space *btnc = + &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; + return nilfs_btnode_get(btnc, ptr, 0, bhp, 0); +} + +static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, + __u64 ptr, struct buffer_head **bhp) +{ + struct address_space *btnc = + &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; + int ret; + + ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1); + if (!ret) + set_buffer_nilfs_volatile(*bhp); + return ret; +} static inline int nilfs_btree_node_get_flags(const struct nilfs_btree *btree, @@ -488,8 +507,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, path[level].bp_index = index; for (level--; level >= minlevel; level--) { - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, - &path[level].bp_bh); + ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(btree, path, level); @@ -535,8 +553,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, path[level].bp_index = index; for (level--; level > 0; level--) { - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, - &path[level].bp_bh); + ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(btree, path, level); @@ -579,6 +596,87 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, return ret; } +static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, + __u64 key, __u64 *ptrp, unsigned maxblocks) +{ + struct nilfs_btree *btree = (struct nilfs_btree *)bmap; + struct nilfs_btree_path *path; + struct nilfs_btree_node *node; + struct inode *dat = NULL; + __u64 ptr, ptr2; + sector_t blocknr; + int level = NILFS_BTREE_LEVEL_NODE_MIN; + int ret, cnt, index, maxlevel; + + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); + if (ret < 0) + goto out; + + if (NILFS_BMAP_USE_VBN(bmap)) { + dat = nilfs_bmap_get_dat(bmap); + ret = nilfs_dat_translate(dat, ptr, &blocknr); + if (ret < 0) + goto out; + ptr = blocknr; + } + cnt = 1; + if (cnt == maxblocks) + goto end; + + maxlevel = nilfs_btree_height(btree) - 1; + node = nilfs_btree_get_node(btree, path, level); + index = path[level].bp_index + 1; + for (;;) { + while (index < nilfs_btree_node_get_nchildren(btree, node)) { + if (nilfs_btree_node_get_key(btree, node, index) != + key + cnt) + goto end; + ptr2 = nilfs_btree_node_get_ptr(btree, node, index); + if (dat) { + ret = nilfs_dat_translate(dat, ptr2, &blocknr); + if (ret < 0) + goto out; + ptr2 = blocknr; + } + if (ptr2 != ptr + cnt || ++cnt == maxblocks) + goto end; + index++; + continue; + } + if (level == maxlevel) + break; + + /* look-up right sibling node */ + node = nilfs_btree_get_node(btree, path, level + 1); + index = path[level + 1].bp_index + 1; + if (index >= nilfs_btree_node_get_nchildren(btree, node) || + nilfs_btree_node_get_key(btree, node, index) != key + cnt) + break; + ptr2 = nilfs_btree_node_get_ptr(btree, node, index); + path[level + 1].bp_index = index; + + brelse(path[level].bp_bh); + path[level].bp_bh = NULL; + ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); + if (ret < 0) + goto out; + node = nilfs_btree_get_nonroot_node(btree, path, level); + index = 0; + path[level].bp_index = index; + } + end: + *ptrp = ptr; + ret = cnt; + out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + return ret; +} + static void nilfs_btree_promote_key(struct nilfs_btree *btree, struct nilfs_btree_path *path, int level, __u64 key) @@ -669,13 +767,13 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, nilfs_btree_node_get_key(btree, node, 0)); if (move) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += lnchildren; path[level + 1].bp_index--; } else { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index -= n; } @@ -722,14 +820,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, path[level + 1].bp_index--; if (move) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index -= nilfs_btree_node_get_nchildren(btree, node); path[level + 1].bp_index++; } else { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } @@ -781,7 +879,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, *keyp = nilfs_btree_node_get_key(btree, right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; } else { @@ -790,7 +888,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, *keyp = nilfs_btree_node_get_key(btree, right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } @@ -897,12 +995,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ - if (btree->bt_ops->btop_find_target != NULL) + if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) path[level].bp_newreq.bpr_ptr = - btree->bt_ops->btop_find_target(btree, path, key); + nilfs_btree_find_target_v(btree, path, key); - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); if (ret < 0) goto err_out_data; @@ -924,8 +1022,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, if (pindex > 0) { sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex - 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -936,7 +1033,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; goto out; } else - nilfs_bmap_put_block(&btree->bt_bmap, bh); + brelse(bh); } /* right sibling */ @@ -944,8 +1041,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, nilfs_btree_node_get_nchildren(btree, parent) - 1) { sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex + 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -956,19 +1052,19 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; goto out; } else - nilfs_bmap_put_block(&btree->bt_bmap, bh); + brelse(bh); } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); if (ret < 0) goto err_out_child_node; - ret = nilfs_bmap_get_new_block(&btree->bt_bmap, - path[level].bp_newreq.bpr_ptr, - &bh); + ret = nilfs_btree_get_new_block(btree, + path[level].bp_newreq.bpr_ptr, + &bh); if (ret < 0) goto err_out_curr_node; @@ -994,12 +1090,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); if (ret < 0) goto err_out_child_node; - ret = nilfs_bmap_get_new_block(&btree->bt_bmap, - path[level].bp_newreq.bpr_ptr, &bh); + ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, + &bh); if (ret < 0) goto err_out_curr_node; @@ -1023,18 +1119,16 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* error */ err_out_curr_node: - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq); + nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + nilfs_btnode_delete(path[level].bp_sib_bh); + nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); } - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq); + nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); err_out_data: *levelp = level; stats->bs_nblocks = 0; @@ -1049,14 +1143,12 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree, set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; - if (btree->bt_ops->btop_set_target != NULL) - btree->bt_ops->btop_set_target(btree, key, ptr); + if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) + nilfs_btree_set_target_v(btree, key, ptr); for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { - btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( - &btree->bt_bmap, &path[level - 1].bp_newreq); - } + nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, + &path[level - 1].bp_newreq); path[level].bp_op(btree, path, level, &key, &ptr); } @@ -1153,7 +1245,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(btree, node, 0)); - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index += n; } @@ -1192,7 +1284,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, nilfs_btree_node_get_key(btree, right, 0)); path[level + 1].bp_index--; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } @@ -1221,7 +1313,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, unlock_buffer(path[level].bp_bh); unlock_buffer(path[level].bp_sib_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); @@ -1252,7 +1344,7 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, unlock_buffer(path[level].bp_bh); unlock_buffer(path[level].bp_sib_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); + nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; } @@ -1276,7 +1368,7 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, nilfs_btree_node_move_left(btree, root, child, n); unlock_buffer(path[level].bp_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } @@ -1300,12 +1392,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); - if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); - if (ret < 0) - goto err_out_child_node; - } + ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); + if (ret < 0) + goto err_out_child_node; if (nilfs_btree_node_get_nchildren(btree, node) > nilfs_btree_node_nchildren_min(btree, node)) { @@ -1321,8 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* left sibling */ sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex - 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -1343,8 +1432,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* right sibling */ sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex + 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -1381,12 +1469,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); - if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); - if (ret < 0) - goto err_out_child_node; - } + + ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); + if (ret < 0) + goto err_out_child_node; + /* child of the root node is deleted */ path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; @@ -1398,15 +1486,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* error */ err_out_curr_node: - if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_abort_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); - if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_abort_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + brelse(path[level].bp_sib_bh); + nilfs_bmap_abort_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); } *levelp = level; stats->bs_nblocks = 0; @@ -1420,9 +1505,8 @@ static void nilfs_btree_commit_delete(struct nilfs_btree *btree, int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_commit_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + nilfs_bmap_commit_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); path[level].bp_op(btree, path, level, NULL, NULL); } @@ -1501,7 +1585,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); - ret = nilfs_bmap_get_block(bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; @@ -1515,9 +1599,9 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; if (bh != NULL) - nilfs_bmap_put_block(bmap, bh); + brelse(bh); - return (maxkey == key) && (nextmaxkey < bmap->b_low); + return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, @@ -1542,7 +1626,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, nchildren = nilfs_btree_node_get_nchildren(btree, root); WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); - ret = nilfs_bmap_get_block(bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; @@ -1563,7 +1647,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, } if (bh != NULL) - nilfs_bmap_put_block(bmap, bh); + brelse(bh); return nitems; } @@ -1584,10 +1668,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* for data */ /* cannot find near ptr */ - if (btree->bt_ops->btop_find_target != NULL) - dreq->bpr_ptr - = btree->bt_ops->btop_find_target(btree, NULL, key); - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); + if (NILFS_BMAP_USE_VBN(bmap)) + dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); + + ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq); if (ret < 0) return ret; @@ -1595,11 +1679,11 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); + ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq); if (ret < 0) goto err_out_dreq; - ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); + ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); if (ret < 0) goto err_out_nreq; @@ -1612,9 +1696,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* error */ err_out_nreq: - bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); + nilfs_bmap_abort_alloc_ptr(bmap, nreq); err_out_dreq: - bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); + nilfs_bmap_abort_alloc_ptr(bmap, dreq); stats->bs_nblocks = 0; return ret; @@ -1624,7 +1708,7 @@ static void nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, - int n, __u64 low, __u64 high, + int n, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) @@ -1642,12 +1726,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, /* convert and insert */ btree = (struct nilfs_btree *)bmap; - nilfs_btree_init(bmap, low, high); + nilfs_btree_init(bmap); if (nreq != NULL) { - if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { - bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); - bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq); - } + nilfs_bmap_commit_alloc_ptr(bmap, dreq); + nilfs_bmap_commit_alloc_ptr(bmap, nreq); /* create child node at level 1 */ lock_buffer(bh); @@ -1661,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_bmap_set_dirty(bmap); unlock_buffer(bh); - nilfs_bmap_put_block(bmap, bh); + brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); @@ -1669,8 +1751,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 2, 1, &keys[0], &tmpptr); } else { - if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) - bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); + nilfs_bmap_commit_alloc_ptr(bmap, dreq); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); @@ -1682,8 +1763,8 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_bmap_set_dirty(bmap); } - if (btree->bt_ops->btop_set_target != NULL) - btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr); } /** @@ -1694,13 +1775,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, * @keys: * @ptrs: * @n: - * @low: - * @high: */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr, - const __u64 *keys, const __u64 *ptrs, - int n, __u64 low, __u64 high) + const __u64 *keys, const __u64 *ptrs, int n) { struct buffer_head *bh; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; @@ -1725,7 +1803,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, if (ret < 0) return ret; nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, - low, high, di, ni, bh); + di, ni, bh); nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); return 0; } @@ -1754,9 +1832,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, nilfs_btree_node_get_ptr(btree, parent, path[level + 1].bp_index); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; - ret = nilfs_bmap_prepare_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); if (ret < 0) return ret; @@ -1768,9 +1846,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, &path[level].bp_ctxt); if (ret < 0) { - nilfs_bmap_abort_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_bmap_abort_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); return ret; } } @@ -1784,9 +1862,9 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, { struct nilfs_btree_node *parent; - nilfs_bmap_commit_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_bmap_commit_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( @@ -1805,9 +1883,9 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, struct nilfs_btree_path *path, int level) { - nilfs_bmap_abort_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_bmap_abort_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, @@ -1930,7 +2008,9 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, goto out; } - ret = btree->bt_ops->btop_propagate(btree, path, level, bh); + ret = NILFS_BMAP_USE_VBN(bmap) ? + nilfs_btree_propagate_v(btree, path, level, bh) : + nilfs_btree_propagate_p(btree, path, level, bh); out: nilfs_btree_clear_path(btree, path); @@ -2066,12 +2146,9 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, ptr = nilfs_btree_node_get_ptr(btree, parent, path[level + 1].bp_index); req.bpr_ptr = ptr; - ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, - &req); - if (ret < 0) + ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr); + if (unlikely(ret < 0)) return ret; - btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap, - &req, blocknr); key = nilfs_btree_node_get_key(btree, parent, path[level + 1].bp_index); @@ -2114,8 +2191,9 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, goto out; } - ret = btree->bt_ops->btop_assign(btree, path, level, bh, - blocknr, binfo); + ret = NILFS_BMAP_USE_VBN(bmap) ? + nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : + nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); out: nilfs_btree_clear_path(btree, path); @@ -2171,7 +2249,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) WARN_ON(ret == -ENOENT); goto out; } - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; @@ -2179,7 +2257,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) if (!buffer_dirty(bh)) nilfs_btnode_mark_dirty(bh); - nilfs_bmap_put_block(&btree->bt_bmap, bh); + brelse(bh); if (!nilfs_bmap_dirty(&btree->bt_bmap)) nilfs_bmap_set_dirty(&btree->bt_bmap); @@ -2191,6 +2269,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_lookup = nilfs_btree_lookup, + .bop_lookup_contig = nilfs_btree_lookup_contig, .bop_insert = nilfs_btree_insert, .bop_delete = nilfs_btree_delete, .bop_clear = NULL, @@ -2210,6 +2289,7 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = { static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_lookup = NULL, + .bop_lookup_contig = NULL, .bop_insert = NULL, .bop_delete = NULL, .bop_clear = NULL, @@ -2227,43 +2307,13 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_gather_data = NULL, }; -static const struct nilfs_btree_operations nilfs_btree_ops_v = { - .btop_find_target = nilfs_btree_find_target_v, - .btop_set_target = nilfs_btree_set_target_v, - .btop_propagate = nilfs_btree_propagate_v, - .btop_assign = nilfs_btree_assign_v, -}; - -static const struct nilfs_btree_operations nilfs_btree_ops_p = { - .btop_find_target = NULL, - .btop_set_target = NULL, - .btop_propagate = nilfs_btree_propagate_p, - .btop_assign = nilfs_btree_assign_p, -}; - -int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +int nilfs_btree_init(struct nilfs_bmap *bmap) { - struct nilfs_btree *btree; - - btree = (struct nilfs_btree *)bmap; bmap->b_ops = &nilfs_btree_ops; - bmap->b_low = low; - bmap->b_high = high; - switch (bmap->b_inode->i_ino) { - case NILFS_DAT_INO: - btree->bt_ops = &nilfs_btree_ops_p; - break; - default: - btree->bt_ops = &nilfs_btree_ops_v; - break; - } - return 0; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { - bmap->b_low = NILFS_BMAP_LARGE_LOW; - bmap->b_high = NILFS_BMAP_LARGE_HIGH; bmap->b_ops = &nilfs_btree_ops_gc; } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 4766deb52fb1..0e72bbbc6b64 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -34,28 +34,6 @@ struct nilfs_btree; struct nilfs_btree_path; /** - * struct nilfs_btree_operations - B-tree operation table - */ -struct nilfs_btree_operations { - __u64 (*btop_find_target)(const struct nilfs_btree *, - const struct nilfs_btree_path *, __u64); - void (*btop_set_target)(struct nilfs_btree *, __u64, __u64); - - struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *); - - int (*btop_propagate)(struct nilfs_btree *, - struct nilfs_btree_path *, - int, - struct buffer_head *); - int (*btop_assign)(struct nilfs_btree *, - struct nilfs_btree_path *, - int, - struct buffer_head **, - sector_t, - union nilfs_binfo *); -}; - -/** * struct nilfs_btree_node - B-tree node * @bn_flags: flags * @bn_level: level @@ -80,13 +58,9 @@ struct nilfs_btree_node { /** * struct nilfs_btree - B-tree structure * @bt_bmap: bmap base structure - * @bt_ops: B-tree operation table */ struct nilfs_btree { struct nilfs_bmap bt_bmap; - - /* B-tree-specific members */ - const struct nilfs_btree_operations *bt_ops; }; @@ -108,10 +82,9 @@ struct nilfs_btree { int nilfs_btree_path_cache_init(void); void nilfs_btree_path_cache_destroy(void); -int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_btree_init(struct nilfs_bmap *); int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, - const __u64 *, const __u64 *, - int, __u64, __u64); + const __u64 *, const __u64 *, int); void nilfs_btree_init_gc(struct nilfs_bmap *); #endif /* _NILFS_BTREE_H */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index cadd36b14d07..7d49813f66d6 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -295,10 +295,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, return -EINVAL; } - /* cannot delete the latest checkpoint */ - if (start == nilfs_mdt_cno(cpfile) - 1) - return -EPERM; - down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); @@ -384,9 +380,10 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, } static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { struct nilfs_checkpoint *cp; + struct nilfs_cpinfo *ci = buf; struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; @@ -410,17 +407,22 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, kaddr = kmap_atomic(bh->b_page, KM_USER0); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { - if (!nilfs_checkpoint_invalid(cp)) - nilfs_cpfile_checkpoint_to_cpinfo( - cpfile, cp, &ci[n++]); + if (!nilfs_checkpoint_invalid(cp)) { + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, + ci); + ci = (void *)ci + cisz; + n++; + } } kunmap_atomic(kaddr, KM_USER0); brelse(bh); } ret = n; - if (n > 0) - *cnop = ci[n - 1].ci_cno + 1; + if (n > 0) { + ci = (void *)ci - cisz; + *cnop = ci->ci_cno + 1; + } out: up_read(&NILFS_MDT(cpfile)->mi_sem); @@ -428,11 +430,12 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { struct buffer_head *bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; + struct nilfs_cpinfo *ci = buf; __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; void *kaddr; @@ -472,7 +475,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, if (unlikely(nilfs_checkpoint_invalid(cp) || !nilfs_checkpoint_snapshot(cp))) break; - nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, ci); + ci = (void *)ci + cisz; + n++; next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); if (next == 0) break; /* reach end of the snapshot list */ @@ -511,13 +516,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, */ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { switch (mode) { case NILFS_CHECKPOINT: - return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); + return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, buf, cisz, nci); case NILFS_SNAPSHOT: - return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); + return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, buf, cisz, nci); default: return -EINVAL; } @@ -533,20 +538,14 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) struct nilfs_cpinfo ci; __u64 tcno = cno; ssize_t nci; - int ret; - nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); + nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, sizeof(ci), 1); if (nci < 0) return nci; else if (nci == 0 || ci.ci_cno != cno) return -ENOENT; - - /* cannot delete the latest checkpoint nor snapshots */ - ret = nilfs_cpinfo_snapshot(&ci); - if (ret < 0) - return ret; - else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1) - return -EPERM; + else if (nilfs_cpinfo_snapshot(&ci)) + return -EBUSY; return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); } diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 1a8a1008c342..788a45950197 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -39,7 +39,7 @@ int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); int nilfs_cpfile_is_snapshot(struct inode *, __u64); int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); -ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, - struct nilfs_cpinfo *, size_t); +ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, + size_t); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index bb8a5818e7f1..0b2710e2d565 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -92,21 +92,6 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) nilfs_palloc_abort_alloc_entry(dat, req); } -int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req) -{ - int ret; - - ret = nilfs_palloc_prepare_free_entry(dat, req); - if (ret < 0) - return ret; - ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - nilfs_palloc_abort_free_entry(dat, req); - return ret; - } - return 0; -} - void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; @@ -391,36 +376,37 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) return ret; } -ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, +ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, size_t nvi) { struct buffer_head *entry_bh; struct nilfs_dat_entry *entry; + struct nilfs_vinfo *vinfo = buf; __u64 first, last; void *kaddr; unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; int i, j, n, ret; for (i = 0; i < nvi; i += n) { - ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, + ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 0, &entry_bh); if (ret < 0) return ret; kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); /* last virtual block number in this block */ - first = vinfo[i].vi_vblocknr; + first = vinfo->vi_vblocknr; do_div(first, entries_per_block); first *= entries_per_block; last = first + entries_per_block - 1; for (j = i, n = 0; - j < nvi && vinfo[j].vi_vblocknr >= first && - vinfo[j].vi_vblocknr <= last; - j++, n++) { + j < nvi && vinfo->vi_vblocknr >= first && + vinfo->vi_vblocknr <= last; + j++, n++, vinfo = (void *)vinfo + visz) { entry = nilfs_palloc_block_get_entry( - dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); - vinfo[j].vi_start = le64_to_cpu(entry->de_start); - vinfo[j].vi_end = le64_to_cpu(entry->de_end); - vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); + dat, vinfo->vi_vblocknr, entry_bh, kaddr); + vinfo->vi_start = le64_to_cpu(entry->de_start); + vinfo->vi_end = le64_to_cpu(entry->de_end); + vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } kunmap_atomic(kaddr, KM_USER0); brelse(entry_bh); diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d9560654a4b7..d328b81eead4 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -47,6 +47,6 @@ void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); int nilfs_dat_mark_dirty(struct inode *, __u64); int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); -ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); +ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index c6379e482781..342d9765df8d 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -25,6 +25,7 @@ #include "page.h" #include "direct.h" #include "alloc.h" +#include "dat.h" static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) { @@ -62,6 +63,47 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, return 0; } +static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, + __u64 key, __u64 *ptrp, + unsigned maxblocks) +{ + struct nilfs_direct *direct = (struct nilfs_direct *)bmap; + struct inode *dat = NULL; + __u64 ptr, ptr2; + sector_t blocknr; + int ret, cnt; + + if (key > NILFS_DIRECT_KEY_MAX || + (ptr = nilfs_direct_get_ptr(direct, key)) == + NILFS_BMAP_INVALID_PTR) + return -ENOENT; + + if (NILFS_BMAP_USE_VBN(bmap)) { + dat = nilfs_bmap_get_dat(bmap); + ret = nilfs_dat_translate(dat, ptr, &blocknr); + if (ret < 0) + return ret; + ptr = blocknr; + } + + maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1); + for (cnt = 1; cnt < maxblocks && + (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) != + NILFS_BMAP_INVALID_PTR; + cnt++) { + if (dat) { + ret = nilfs_dat_translate(dat, ptr2, &blocknr); + if (ret < 0) + return ret; + ptr2 = blocknr; + } + if (ptr2 != ptr + cnt) + break; + } + *ptrp = ptr; + return cnt; +} + static __u64 nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) { @@ -90,10 +132,9 @@ static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, { int ret; - if (direct->d_ops->dop_find_target != NULL) - req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); - ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, - req); + if (NILFS_BMAP_USE_VBN(&direct->d_bmap)) + req->bpr_ptr = nilfs_direct_find_target_v(direct, key); + ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req); if (ret < 0) return ret; @@ -111,16 +152,14 @@ static void nilfs_direct_commit_insert(struct nilfs_direct *direct, bh = (struct buffer_head *)((unsigned long)ptr); set_buffer_nilfs_volatile(bh); - if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) - direct->d_bmap.b_pops->bpop_commit_alloc_ptr( - &direct->d_bmap, req); + nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req); nilfs_direct_set_ptr(direct, key, req->bpr_ptr); if (!nilfs_bmap_dirty(&direct->d_bmap)) nilfs_bmap_set_dirty(&direct->d_bmap); - if (direct->d_ops->dop_set_target != NULL) - direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); + if (NILFS_BMAP_USE_VBN(&direct->d_bmap)) + nilfs_direct_set_target_v(direct, key, req->bpr_ptr); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) @@ -152,25 +191,18 @@ static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, { int ret; - if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - req->bpr_ptr = nilfs_direct_get_ptr(direct, key); - ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( - &direct->d_bmap, req); - if (ret < 0) - return ret; - } - - stats->bs_nblocks = 1; - return 0; + req->bpr_ptr = nilfs_direct_get_ptr(direct, key); + ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req); + if (!ret) + stats->bs_nblocks = 1; + return ret; } static void nilfs_direct_commit_delete(struct nilfs_direct *direct, union nilfs_bmap_ptr_req *req, __u64 key) { - if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) - direct->d_bmap.b_pops->bpop_commit_end_ptr( - &direct->d_bmap, req); + nilfs_bmap_commit_end_ptr(&direct->d_bmap, req); nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); } @@ -244,8 +276,7 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, } int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, - __u64 key, __u64 *keys, __u64 *ptrs, - int n, __u64 low, __u64 high) + __u64 key, __u64 *keys, __u64 *ptrs, int n) { struct nilfs_direct *direct; __le64 *dptrs; @@ -275,8 +306,7 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, dptrs[i] = NILFS_BMAP_INVALID_PTR; } - nilfs_direct_init(bmap, low, high); - + nilfs_direct_init(bmap); return 0; } @@ -293,11 +323,11 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct, if (!buffer_nilfs_volatile(bh)) { oldreq.bpr_ptr = ptr; newreq.bpr_ptr = ptr; - ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, - &newreq); + ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq, + &newreq); if (ret < 0) return ret; - nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); + nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq); set_buffer_nilfs_volatile(bh); nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); } else @@ -309,12 +339,10 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct, static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, struct buffer_head *bh) { - struct nilfs_direct *direct; + struct nilfs_direct *direct = (struct nilfs_direct *)bmap; - direct = (struct nilfs_direct *)bmap; - return (direct->d_ops->dop_propagate != NULL) ? - direct->d_ops->dop_propagate(direct, bh) : - 0; + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_direct_propagate_v(direct, bh) : 0; } static int nilfs_direct_assign_v(struct nilfs_direct *direct, @@ -327,12 +355,9 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct, int ret; req.bpr_ptr = ptr; - ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( - &direct->d_bmap, &req); - if (ret < 0) + ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr); + if (unlikely(ret < 0)) return ret; - direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap, - &req, blocknr); binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); @@ -377,12 +402,14 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, return -EINVAL; } - return direct->d_ops->dop_assign(direct, key, ptr, bh, - blocknr, binfo); + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) : + nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo); } static const struct nilfs_bmap_operations nilfs_direct_ops = { .bop_lookup = nilfs_direct_lookup, + .bop_lookup_contig = nilfs_direct_lookup_contig, .bop_insert = nilfs_direct_insert, .bop_delete = nilfs_direct_delete, .bop_clear = NULL, @@ -401,36 +428,8 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = { }; -static const struct nilfs_direct_operations nilfs_direct_ops_v = { - .dop_find_target = nilfs_direct_find_target_v, - .dop_set_target = nilfs_direct_set_target_v, - .dop_propagate = nilfs_direct_propagate_v, - .dop_assign = nilfs_direct_assign_v, -}; - -static const struct nilfs_direct_operations nilfs_direct_ops_p = { - .dop_find_target = NULL, - .dop_set_target = NULL, - .dop_propagate = NULL, - .dop_assign = nilfs_direct_assign_p, -}; - -int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +int nilfs_direct_init(struct nilfs_bmap *bmap) { - struct nilfs_direct *direct; - - direct = (struct nilfs_direct *)bmap; bmap->b_ops = &nilfs_direct_ops; - bmap->b_low = low; - bmap->b_high = high; - switch (bmap->b_inode->i_ino) { - case NILFS_DAT_INO: - direct->d_ops = &nilfs_direct_ops_p; - break; - default: - direct->d_ops = &nilfs_direct_ops_v; - break; - } - return 0; } diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index 45d2c5cda812..a5ffd66e25d0 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -31,18 +31,6 @@ struct nilfs_direct; /** - * struct nilfs_direct_operations - direct mapping operation table - */ -struct nilfs_direct_operations { - __u64 (*dop_find_target)(const struct nilfs_direct *, __u64); - void (*dop_set_target)(struct nilfs_direct *, __u64, __u64); - int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *); - int (*dop_assign)(struct nilfs_direct *, __u64, __u64, - struct buffer_head **, sector_t, - union nilfs_binfo *); -}; - -/** * struct nilfs_direct_node - direct node * @dn_flags: flags * @dn_pad: padding @@ -55,13 +43,9 @@ struct nilfs_direct_node { /** * struct nilfs_direct - direct mapping * @d_bmap: bmap structure - * @d_ops: direct mapping operation table */ struct nilfs_direct { struct nilfs_bmap d_bmap; - - /* direct-mapping-specific members */ - const struct nilfs_direct_operations *d_ops; }; @@ -70,9 +54,9 @@ struct nilfs_direct { #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) -int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_direct_init(struct nilfs_bmap *); int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, - __u64 *, int, __u64, __u64); + __u64 *, int); #endif /* _NILFS_DIRECT_H */ diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 19d2102b6a69..1b3c2bb20da9 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -52,8 +52,9 @@ #include "dat.h" #include "ifile.h" -static struct address_space_operations def_gcinode_aops = {}; -/* XXX need def_gcinode_iops/fops? */ +static struct address_space_operations def_gcinode_aops = { + .sync_page = block_sync_page, +}; /* * nilfs_gccache_submit_read_data() - add data buffer and submit read request diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 49ab4a49bb4f..2696d6b513b7 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -43,22 +43,23 @@ * * This function does not issue actual read request of the specified data * block. It is done by VFS. - * Bulk read for direct-io is not supported yet. (should be supported) */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) { struct nilfs_inode_info *ii = NILFS_I(inode); - unsigned long blknum = 0; + __u64 blknum = 0; int err = 0, ret; struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); + unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; - /* This exclusion control is a workaround; should be revised */ - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - if (ret == 0) { /* found */ + down_read(&NILFS_MDT(dat)->mi_sem); + ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); + up_read(&NILFS_MDT(dat)->mi_sem); + if (ret >= 0) { /* found */ map_bh(bh_result, inode->i_sb, blknum); + if (ret > 0) + bh_result->b_size = (ret << inode->i_blkbits); goto out; } /* data block was not found */ @@ -240,7 +241,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct address_space_operations nilfs_aops = { .writepage = nilfs_writepage, .readpage = nilfs_readpage, - /* .sync_page = nilfs_sync_page, */ + .sync_page = block_sync_page, .writepages = nilfs_writepages, .set_page_dirty = nilfs_set_page_dirty, .readpages = nilfs_readpages, @@ -249,6 +250,7 @@ struct address_space_operations nilfs_aops = { /* .releasepage = nilfs_releasepage, */ .invalidatepage = block_invalidatepage, .direct_IO = nilfs_direct_IO, + .is_partially_uptodate = block_is_partially_uptodate, }; struct inode *nilfs_new_inode(struct inode *dir, int mode) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index d6759b92006f..6ea5f872e2de 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -152,7 +152,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, - nmembs); + size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -182,7 +182,8 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); + ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, size, + nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -212,7 +213,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -435,24 +436,6 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, return nmembs; } -static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, void *buf) -{ - size_t nmembs = argv->v_nmembs; - struct nilfs_sb_info *sbi = nilfs->ns_writer; - int ret; - - if (unlikely(!sbi)) { - /* never happens because called for a writable mount */ - WARN_ON(1); - return -EROFS; - } - ret = nilfs_segctor_add_segments_to_be_freed( - NILFS_SC(sbi), buf, nmembs); - - return (ret < 0) ? ret : nmembs; -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, struct nilfs_argv *argv, void **kbufs) { @@ -491,14 +474,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); - if (ret < 0) { - /* - * can safely abort because this operation is atomic. - */ - msg = "cannot set segments to be freed"; - goto failed; - } return 0; failed: @@ -615,7 +590,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - if (argv.v_size != membsz) + if (argv.v_size < membsz) return -EINVAL; ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index bb78745a0e30..3d3ddb3f5177 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -430,6 +430,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) static struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, + .sync_page = block_sync_page, }; static struct inode_operations def_mdt_iops; @@ -449,7 +450,7 @@ struct inode * nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, ino_t ino, gfp_t gfp_mask) { - struct inode *inode = nilfs_alloc_inode(sb); + struct inode *inode = nilfs_alloc_inode_common(nilfs); if (!inode) return NULL; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index da6fc0bba2e5..edf6a59d9f2a 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -263,6 +263,7 @@ extern void nilfs_dirty_inode(struct inode *); extern struct dentry *nilfs_get_parent(struct dentry *); /* super.c */ +extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *); extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); extern void nilfs_error(struct super_block *, const char *, const char *, ...) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 57afa9d24061..d80cc71be749 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -28,7 +28,6 @@ #include "segment.h" #include "sufile.h" #include "page.h" -#include "seglist.h" #include "segbuf.h" /* @@ -395,6 +394,24 @@ static void dispose_recovery_list(struct list_head *head) } } +struct nilfs_segment_entry { + struct list_head list; + __u64 segnum; +}; + +static int nilfs_segment_list_add(struct list_head *head, __u64 segnum) +{ + struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); + + if (unlikely(!ent)) + return -ENOMEM; + + ent->segnum = segnum; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, head); + return 0; +} + void nilfs_dispose_segment_list(struct list_head *head) { while (!list_empty(head)) { @@ -402,7 +419,7 @@ void nilfs_dispose_segment_list(struct list_head *head) = list_entry(head->next, struct nilfs_segment_entry, list); list_del(&ent->list); - nilfs_free_segment_entry(ent); + kfree(ent); } } @@ -431,12 +448,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, if (unlikely(err)) goto failed; - err = -ENOMEM; for (i = 1; i < 4; i++) { - ent = nilfs_alloc_segment_entry(segnum[i]); - if (unlikely(!ent)) + err = nilfs_segment_list_add(head, segnum[i]); + if (unlikely(err)) goto failed; - list_add_tail(&ent->list, head); } /* @@ -450,7 +465,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, goto failed; } list_del(&ent->list); - nilfs_free_segment_entry(ent); + kfree(ent); } /* Allocate new segments for recovery */ @@ -791,7 +806,6 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, u64 seg_seq; __u64 segnum, nextnum = 0; __u64 cno; - struct nilfs_segment_entry *ent; LIST_HEAD(segments); int empty_seg = 0, scan_newer = 0; int ret; @@ -892,12 +906,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (empty_seg++) goto super_root_found; /* found a valid super root */ - ent = nilfs_alloc_segment_entry(segnum); - if (unlikely(!ent)) { - ret = -ENOMEM; + ret = nilfs_segment_list_add(&segments, segnum); + if (unlikely(ret)) goto failed; - } - list_add_tail(&ent->list, &segments); seg_seq++; segnum = nextnum; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1e68821b4a9b..9e3fe17bb96b 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -26,7 +26,6 @@ #include <linux/crc32.h> #include "page.h" #include "segbuf.h" -#include "seglist.h" static struct kmem_cache *nilfs_segbuf_cachep; @@ -394,7 +393,7 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= (1 << BIO_RW_SYNCIO); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); res = nilfs_submit_seg_bio(wi, rw); if (unlikely(res)) goto failed_bio; diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h deleted file mode 100644 index d39df9144e99..000000000000 --- a/fs/nilfs2/seglist.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * seglist.h - expediential structure and routines to handle list of segments - * (would be removed in a future release) - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> - * - */ -#ifndef _NILFS_SEGLIST_H -#define _NILFS_SEGLIST_H - -#include <linux/fs.h> -#include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> -#include "sufile.h" - -struct nilfs_segment_entry { - __u64 segnum; - -#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally. - It must be cancelled if - construction aborted */ - - unsigned flags; - struct list_head list; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; -}; - - -void nilfs_dispose_segment_list(struct list_head *); - -static inline struct nilfs_segment_entry * -nilfs_alloc_segment_entry(__u64 segnum) -{ - struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); - - if (likely(ent)) { - ent->segnum = segnum; - ent->flags = 0; - ent->bh_su = NULL; - ent->raw_su = NULL; - INIT_LIST_HEAD(&ent->list); - } - return ent; -} - -static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent, - struct inode *sufile) -{ - return nilfs_sufile_get_segment_usage(sufile, ent->segnum, - &ent->raw_su, &ent->bh_su); -} - -static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent, - struct inode *sufile) -{ - if (!ent->bh_su) - return; - nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su); - ent->bh_su = NULL; - ent->raw_su = NULL; -} - -static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent) -{ - kfree(ent); -} - -#endif /* _NILFS_SEGLIST_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 22c7f65c2403..aa977549919e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -39,7 +39,6 @@ #include "sufile.h" #include "cpfile.h" #include "ifile.h" -#include "seglist.h" #include "segbuf.h" @@ -79,7 +78,8 @@ enum { /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ -#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) +#define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ +#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) /* Operations depending on the construction mode and file type */ struct nilfs_sc_operations { @@ -810,7 +810,7 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci) { return list_empty(&sci->sc_dirty_files) && !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && - list_empty(&sci->sc_cleaning_segments) && + sci->sc_nfreesegs == 0 && (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); } @@ -1005,44 +1005,6 @@ static void nilfs_drop_collected_inodes(struct list_head *head) } } -static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, - struct inode *sufile) - -{ - struct list_head *head = &sci->sc_cleaning_segments; - struct nilfs_segment_entry *ent; - int err; - - list_for_each_entry(ent, head, list) { - if (!(ent->flags & NILFS_SLH_FREED)) - break; - err = nilfs_sufile_cancel_free(sufile, ent->segnum); - WARN_ON(err); /* do not happen */ - ent->flags &= ~NILFS_SLH_FREED; - } -} - -static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, - struct inode *sufile) -{ - struct list_head *head = &sci->sc_cleaning_segments; - struct nilfs_segment_entry *ent; - int err; - - list_for_each_entry(ent, head, list) { - err = nilfs_sufile_free(sufile, ent->segnum); - if (unlikely(err)) - return err; - ent->flags |= NILFS_SLH_FREED; - } - return 0; -} - -static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) -{ - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); -} - static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, struct inode *inode, struct list_head *listp, @@ -1161,6 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) struct the_nilfs *nilfs = sbi->s_nilfs; struct list_head *head; struct nilfs_inode_info *ii; + size_t ndone; int err = 0; switch (sci->sc_stage.scnt) { @@ -1250,10 +1213,16 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) break; sci->sc_stage.scnt++; /* Fall through */ case NILFS_ST_SUFILE: - err = nilfs_segctor_prepare_free_segments(sci, - nilfs->ns_sufile); - if (unlikely(err)) + err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, + sci->sc_nfreesegs, &ndone); + if (unlikely(err)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, ndone, + NULL); break; + } + sci->sc_stage.flags |= NILFS_CF_SUFREED; + err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, &nilfs_sc_file_ops); if (unlikely(err)) @@ -1486,7 +1455,15 @@ static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, { if (unlikely(err)) { nilfs_segctor_free_incomplete_segments(sci, nilfs); - nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + int ret; + + ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(ret); /* do not happen */ + } } nilfs_segctor_clear_segment_buffers(sci); } @@ -1585,7 +1562,13 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) break; - nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(err); /* do not happen */ + } nilfs_segctor_clear_segment_buffers(sci); err = nilfs_segctor_extend_segments(sci, nilfs, nadd); @@ -2224,10 +2207,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_complete_write(sci); /* Commit segments */ - if (has_sr) { - nilfs_segctor_commit_free_segments(sci); + if (has_sr) nilfs_segctor_clear_metadata_dirty(sci); - } nilfs_segctor_end_construction(sci, nilfs, 0); @@ -2301,48 +2282,6 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino) /* assign bit 0 to data files */ } -int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, - __u64 *segnum, size_t nsegs) -{ - struct nilfs_segment_entry *ent; - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; - struct inode *sufile = nilfs->ns_sufile; - LIST_HEAD(list); - __u64 *pnum; - size_t i; - int err; - - for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { - ent = nilfs_alloc_segment_entry(*pnum); - if (unlikely(!ent)) { - err = -ENOMEM; - goto failed; - } - list_add_tail(&ent->list, &list); - - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - - if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) - printk(KERN_WARNING "NILFS: unused segment is " - "requested to be cleaned (segnum=%llu)\n", - (unsigned long long)ent->segnum); - nilfs_close_segment_entry(ent, sufile); - } - list_splice(&list, sci->sc_cleaning_segments.prev); - return 0; - - failed: - nilfs_dispose_segment_list(&list); - return err; -} - -void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) -{ - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); -} - struct nilfs_segctor_wait_request { wait_queue_t wq; __u32 seq; @@ -2607,10 +2546,13 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, err = nilfs_init_gcdat_inode(nilfs); if (unlikely(err)) goto out_unlock; + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); if (unlikely(err)) goto out_unlock; + sci->sc_freesegs = kbufs[4]; + sci->sc_nfreesegs = argv[4].v_nmembs; list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); for (;;) { @@ -2629,6 +2571,8 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, } out_unlock: + sci->sc_freesegs = NULL; + sci->sc_nfreesegs = 0; nilfs_clear_gcdat_inode(nilfs); nilfs_transaction_unlock(sbi); return err; @@ -2835,7 +2779,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_cleaning_segments); INIT_LIST_HEAD(&sci->sc_copied_buffers); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; @@ -2901,9 +2844,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); } - if (!list_empty(&sci->sc_cleaning_segments)) - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); - WARN_ON(!list_empty(&sci->sc_segbufs)); down_write(&sbi->s_nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 476bdd5df5be..0d2a475a741b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -90,8 +90,9 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_cleaning_segments: List of segments to be freed through construction * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data + * @sc_freesegs: array of segment numbers to be freed + * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation * @sc_dsync_start: start byte offset of data pages * @sc_dsync_end: end byte offset of data pages (inclusive) @@ -131,9 +132,11 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_cleaning_segments; struct list_head sc_copied_buffers; + __u64 *sc_freesegs; + size_t sc_nfreesegs; + struct nilfs_inode_info *sc_dsync_inode; loff_t sc_dsync_start; loff_t sc_dsync_end; @@ -225,10 +228,6 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); -extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, - __u64 *, size_t); -extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); - extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); @@ -240,5 +239,6 @@ extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, extern int nilfs_recover_logical_segments(struct the_nilfs *, struct nilfs_sb_info *, struct nilfs_recovery_info *); +extern void nilfs_dispose_segment_list(struct list_head *); #endif /* _NILFS_SEGMENT_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 98e68677f045..37994d4a59cc 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Written by Koji Sato <koji@osrg.net>. + * Rivised by Ryusuke Konishi <ryusuke@osrg.net>. */ #include <linux/kernel.h> @@ -108,6 +109,102 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, nilfs_mdt_mark_buffer_dirty(header_bh); } +/** + * nilfs_sufile_updatev - modify multiple segment usages at a time + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @create: creation flag + * @ndone: place to store number of modified segments on @segnumv + * @dofunc: primitive operation for the update + * + * Description: nilfs_sufile_updatev() repeatedly calls @dofunc + * against the given array of segments. The @dofunc is called with + * buffers of a header block and the sufile block in which the target + * segment usage entry is contained. If @ndone is given, the number + * of successfully modified segments from the head is stored in the + * place @ndone points to. + * + * Return Value: On success, zero is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - Given segment usage is in hole block (may be returned if + * @create is zero) + * + * %-EINVAL - Invalid segment usage number + */ +int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, + int create, size_t *ndone, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)) +{ + struct buffer_head *header_bh, *bh; + unsigned long blkoff, prev_blkoff; + __u64 *seg; + size_t nerr = 0, n = 0; + int ret = 0; + + if (unlikely(nsegs == 0)) + goto out; + + down_write(&NILFS_MDT(sufile)->mi_sem); + for (seg = segnumv; seg < segnumv + nsegs; seg++) { + if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING + "%s: invalid segment number: %llu\n", __func__, + (unsigned long long)*seg); + nerr++; + } + } + if (nerr > 0) { + ret = -EINVAL; + goto out_sem; + } + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + seg = segnumv; + blkoff = nilfs_sufile_get_blkoff(sufile, *seg); + ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); + if (ret < 0) + goto out_header; + + for (;;) { + dofunc(sufile, *seg, header_bh, bh); + + if (++seg >= segnumv + nsegs) + break; + prev_blkoff = blkoff; + blkoff = nilfs_sufile_get_blkoff(sufile, *seg); + if (blkoff == prev_blkoff) + continue; + + /* get different block */ + brelse(bh); + ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); + if (unlikely(ret < 0)) + goto out_header; + } + brelse(bh); + + out_header: + n = seg - segnumv; + brelse(header_bh); + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + out: + if (ndone) + *ndone = n; + return ret; +} + int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, void (*dofunc)(struct inode *, __u64, struct buffer_head *, @@ -490,7 +587,8 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * nilfs_sufile_get_suinfo - * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @si: array of suinfo + * @buf: array of suinfo + * @sisz: byte size of suinfo * @nsi: size of suinfo array * * Description: @@ -502,11 +600,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * * %-ENOMEM - Insufficient amount of memory available. */ -ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, - struct nilfs_suinfo *si, size_t nsi) +ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, + unsigned sisz, size_t nsi) { struct buffer_head *su_bh; struct nilfs_segment_usage *su; + struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; void *kaddr; @@ -531,20 +630,22 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, if (ret != -ENOENT) goto out; /* hole */ - memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); + memset(si, 0, sisz * n); + si = (void *)si + sisz * n; continue; } kaddr = kmap_atomic(su_bh->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); - for (j = 0; j < n; j++, su = (void *)su + susz) { - si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); - si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); - si[i + j].sui_flags = le32_to_cpu(su->su_flags) & + for (j = 0; j < n; + j++, su = (void *)su + susz, si = (void *)si + sisz) { + si->sui_lastmod = le64_to_cpu(su->su_lastmod); + si->sui_nblocks = le32_to_cpu(su->su_nblocks); + si->sui_flags = le32_to_cpu(su->su_flags) & ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); if (nilfs_segment_is_active(nilfs, segnum + j)) - si[i + j].sui_flags |= + si->sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index a2e2efd4ade1..a2c4d76c3366 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -43,43 +43,27 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); -ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, +ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); +int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)); int nilfs_sufile_update(struct inode *, __u64, int, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)); -void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, - struct buffer_head *); void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, struct buffer_head *); void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); /** - * nilfs_sufile_cancel_free - - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) -{ - return nilfs_sufile_update(sufile, segnum, 0, - nilfs_sufile_do_cancel_free); -} - -/** * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file * @segnum: segment number to be freed @@ -100,6 +84,38 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) } /** + * nilfs_sufile_freev - free segments + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @ndone: place to store the number of freed segments + */ +static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, + size_t nsegs, size_t *ndone) +{ + return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone, + nilfs_sufile_do_free); +} + +/** + * nilfs_sufile_cancel_freev - reallocate freeing segments + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @ndone: place to store the number of cancelled segments + * + * Return Value: On success, 0 is returned. On error, a negative error codes + * is returned. + */ +static inline int nilfs_sufile_cancel_freev(struct inode *sufile, + __u64 *segnumv, size_t nsegs, + size_t *ndone) +{ + return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone, + nilfs_sufile_do_cancel_free); +} + +/** * nilfs_sufile_set_error - mark a segment as erroneous * @sufile: inode of segment usage file * @segnum: segment number diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1777a3467bd2..ab785f85aa50 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -133,7 +133,7 @@ void nilfs_warning(struct super_block *sb, const char *function, static struct kmem_cache *nilfs_inode_cachep; -struct inode *nilfs_alloc_inode(struct super_block *sb) +struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) { struct nilfs_inode_info *ii; @@ -143,10 +143,15 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache); + nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi); return &ii->vfs_inode; } +struct inode *nilfs_alloc_inode(struct super_block *sb) +{ + return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs); +} + void nilfs_destroy_inode(struct inode *inode) { kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index e4e5c78bcc93..8b8889825716 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -32,7 +32,6 @@ #include "cpfile.h" #include "sufile.h" #include "dat.h" -#include "seglist.h" #include "segbuf.h" diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 9b0efdad8910..477d37d83b31 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -15,6 +15,7 @@ #include <linux/errno.h> #include <linux/kmod.h> #include <linux/spinlock.h> +#include <asm/byteorder.h> static struct nls_table default_table; static struct nls_table *tables = &default_table; @@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] = {0, /* end of table */} }; -int -utf8_mbtowc(wchar_t *p, const __u8 *s, int n) +#define UNICODE_MAX 0x0010ffff +#define PLANE_SIZE 0x00010000 + +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff + +int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) { - long l; + unsigned long l; int c0, c, nc; const struct utf8_table *t; @@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) nc++; if ((c0 & t->cmask) == t->cval) { l &= t->lmask; - if (l < t->lval) + if (l < t->lval || l > UNICODE_MAX || + (l & SURROGATE_MASK) == SURROGATE_PAIR) return -1; - *p = l; + *pu = (unicode_t) l; return nc; } - if (n <= nc) + if (len <= nc) return -1; s++; c = (*s ^ 0x80) & 0xFF; @@ -72,90 +81,133 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) } return -1; } +EXPORT_SYMBOL(utf8_to_utf32); -int -utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n) +int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) { - __u16 *op; - const __u8 *ip; - int size; - - op = pwcs; - ip = s; - while (*ip && n > 0) { - if (*ip & 0x80) { - size = utf8_mbtowc(op, ip, n); - if (size == -1) { - /* Ignore character and move on */ - ip++; - n--; - } else { - op++; - ip += size; - n -= size; - } - } else { - *op++ = *ip++; - n--; - } - } - return (op - pwcs); -} - -int -utf8_wctomb(__u8 *s, wchar_t wc, int maxlen) -{ - long l; + unsigned long l; int c, nc; const struct utf8_table *t; - + if (!s) return 0; - - l = wc; + + l = u; + if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) + return -1; + nc = 0; for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { nc++; if (l <= t->lmask) { c = t->shift; - *s = t->cval | (l >> c); + *s = (u8) (t->cval | (l >> c)); while (c > 0) { c -= 6; s++; - *s = 0x80 | ((l >> c) & 0x3F); + *s = (u8) (0x80 | ((l >> c) & 0x3F)); } return nc; } } return -1; } +EXPORT_SYMBOL(utf32_to_utf8); -int -utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen) +int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) { - const __u16 *ip; - __u8 *op; + u16 *op; int size; + unicode_t u; + + op = pwcs; + while (*s && len > 0) { + if (*s & 0x80) { + size = utf8_to_utf32(s, len, &u); + if (size < 0) { + /* Ignore character and move on */ + size = 1; + } else if (u >= PLANE_SIZE) { + u -= PLANE_SIZE; + *op++ = (wchar_t) (SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS)); + *op++ = (wchar_t) (SURROGATE_PAIR | + SURROGATE_LOW | + (u & SURROGATE_BITS)); + } else { + *op++ = (wchar_t) u; + } + s += size; + len -= size; + } else { + *op++ = *s++; + len--; + } + } + return op - pwcs; +} +EXPORT_SYMBOL(utf8s_to_utf16s); + +static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + return c; + case UTF16_LITTLE_ENDIAN: + return __le16_to_cpu(c); + case UTF16_BIG_ENDIAN: + return __be16_to_cpu(c); + } +} + +int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, + u8 *s, int maxlen) +{ + u8 *op; + int size; + unsigned long u, v; op = s; - ip = pwcs; - while (*ip && maxlen > 0) { - if (*ip > 0x7f) { - size = utf8_wctomb(op, *ip, maxlen); + while (len > 0 && maxlen > 0) { + u = get_utf16(*pwcs, endian); + if (!u) + break; + pwcs++; + len--; + if (u > 0x7f) { + if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { + if (u & SURROGATE_LOW) { + /* Ignore character and move on */ + continue; + } + if (len <= 0) + break; + v = get_utf16(*pwcs, endian); + if ((v & SURROGATE_MASK) != SURROGATE_PAIR || + !(v & SURROGATE_LOW)) { + /* Ignore character and move on */ + continue; + } + u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) + + (v & SURROGATE_BITS); + pwcs++; + len--; + } + size = utf32_to_utf8(u, op, maxlen); if (size == -1) { /* Ignore character and move on */ - maxlen--; } else { op += size; maxlen -= size; } } else { - *op++ = (__u8) *ip; + *op++ = (u8) u; + maxlen--; } - ip++; } - return (op - s); + return op - s; } +EXPORT_SYMBOL(utf16s_to_utf8s); int register_nls(struct nls_table * nls) { @@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls); EXPORT_SYMBOL(unload_nls); EXPORT_SYMBOL(load_nls); EXPORT_SYMBOL(load_nls_default); -EXPORT_SYMBOL(utf8_mbtowc); -EXPORT_SYMBOL(utf8_mbstowcs); -EXPORT_SYMBOL(utf8_wctomb); -EXPORT_SYMBOL(utf8_wcstombs); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c index aa2c42fdd977..0d60a44acacd 100644 --- a/fs/nls/nls_utf8.c +++ b/fs/nls/nls_utf8.c @@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { int n; - if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) { + if (boundlen <= 0) + return -ENAMETOOLONG; + + n = utf32_to_utf8(uni, out, boundlen); + if (n < 0) { *out = '?'; return -EINVAL; } @@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { int n; + unicode_t u; - if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) { + n = utf8_to_utf32(rawstring, boundlen, &u); + if (n < 0 || u > MAX_WCHAR_T) { *uni = 0x003f; /* ? */ - n = -EINVAL; + return -EINVAL; } + *uni = (wchar_t) u; return n; } diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 82c5085559c6..9938034762cc 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -27,6 +27,7 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/slab.h> +#include <linux/log2.h> #include "aops.h" #include "attrib.h" @@ -1570,7 +1571,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ntfs_debug("Index collation rule is 0x%x.", le32_to_cpu(ir->collation_rule)); ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); - if (ni->itype.index.block_size & (ni->itype.index.block_size - 1)) { + if (!is_power_of_2(ni->itype.index.block_size)) { ntfs_error(vi->i_sb, "Index block size (%u) is not a power of " "two.", ni->itype.index.block_size); goto unm_err_out; diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index d7932e95b1fd..89b02985c054 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -26,6 +26,7 @@ #include <linux/highmem.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include <linux/log2.h> #include "attrib.h" #include "aops.h" @@ -65,7 +66,7 @@ static bool ntfs_check_restart_page_header(struct inode *vi, logfile_log_page_size < NTFS_BLOCK_SIZE || logfile_system_page_size & (logfile_system_page_size - 1) || - logfile_log_page_size & (logfile_log_page_size - 1)) { + !is_power_of_2(logfile_log_page_size)) { ntfs_error(vi->i_sb, "$LogFile uses unsupported page size."); return false; } diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 678a067d9251..9edcde4974aa 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -475,6 +475,12 @@ struct ocfs2_path { #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) #define path_num_items(_path) ((_path)->p_tree_depth + 1) +static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, + u32 cpos); +static void ocfs2_adjust_rightmost_records(struct inode *inode, + handle_t *handle, + struct ocfs2_path *path, + struct ocfs2_extent_rec *insert_rec); /* * Reset the actual path elements so that we can re-use the structure * to build another path. Generally, this involves freeing the buffer @@ -1013,6 +1019,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) } /* + * Change range of the branches in the right most path according to the leaf + * extent block's rightmost record. + */ +static int ocfs2_adjust_rightmost_branch(handle_t *handle, + struct inode *inode, + struct ocfs2_extent_tree *et) +{ + int status; + struct ocfs2_path *path = NULL; + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + + path = ocfs2_new_path_from_et(et); + if (!path) { + status = -ENOMEM; + return status; + } + + status = ocfs2_find_path(inode, path, UINT_MAX); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_extend_trans(handle, path_num_items(path) + + handle->h_buffer_credits); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_journal_access_path(inode, handle, path); + if (status < 0) { + mlog_errno(status); + goto out; + } + + el = path_leaf_el(path); + rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; + + ocfs2_adjust_rightmost_records(inode, handle, path, rec); + +out: + ocfs2_free_path(path); + return status; +} + +/* * Add an entire tree branch to our inode. eb_bh is the extent block * to start at, if we don't want to start the branch at the dinode * structure. @@ -1038,7 +1092,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, struct ocfs2_extent_block *eb; struct ocfs2_extent_list *eb_el; struct ocfs2_extent_list *el; - u32 new_cpos; + u32 new_cpos, root_end; mlog_entry_void(); @@ -1055,6 +1109,27 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, new_blocks = le16_to_cpu(el->l_tree_depth); + eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; + new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); + root_end = ocfs2_sum_rightmost_rec(et->et_root_el); + + /* + * If there is a gap before the root end and the real end + * of the righmost leaf block, we need to remove the gap + * between new_cpos and root_end first so that the tree + * is consistent after we add a new branch(it will start + * from new_cpos). + */ + if (root_end > new_cpos) { + mlog(0, "adjust the cluster end from %u to %u\n", + root_end, new_cpos); + status = ocfs2_adjust_rightmost_branch(handle, inode, et); + if (status) { + mlog_errno(status); + goto bail; + } + } + /* allocate the number of new eb blocks we need */ new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), GFP_KERNEL); @@ -1071,9 +1146,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, goto bail; } - eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; - new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); - /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be * linked with the rest of the tree. * conversly, new_eb_bhs[0] is the new bottommost leaf. diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2a947c44e594..a1163b8b417c 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -22,6 +22,9 @@ #include <linux/crc32.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/fs.h> #include <asm/byteorder.h> #include <cluster/masklog.h> @@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize, ocfs2_hamming_fix(data, blocksize * 8, 0, fix); } + +/* + * Debugfs handling. + */ + +#ifdef CONFIG_DEBUG_FS + +static int blockcheck_u64_get(void *data, u64 *val) +{ + *val = *(u64 *)data; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n"); + +static struct dentry *blockcheck_debugfs_create(const char *name, + struct dentry *parent, + u64 *value) +{ + return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value, + &blockcheck_fops); +} + +static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ + if (stats) { + debugfs_remove(stats->b_debug_check); + stats->b_debug_check = NULL; + debugfs_remove(stats->b_debug_failure); + stats->b_debug_failure = NULL; + debugfs_remove(stats->b_debug_recover); + stats->b_debug_recover = NULL; + debugfs_remove(stats->b_debug_dir); + stats->b_debug_dir = NULL; + } +} + +static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) +{ + int rc = -EINVAL; + + if (!stats) + goto out; + + stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); + if (!stats->b_debug_dir) + goto out; + + stats->b_debug_check = + blockcheck_debugfs_create("blocks_checked", + stats->b_debug_dir, + &stats->b_check_count); + + stats->b_debug_failure = + blockcheck_debugfs_create("checksums_failed", + stats->b_debug_dir, + &stats->b_failure_count); + + stats->b_debug_recover = + blockcheck_debugfs_create("ecc_recoveries", + stats->b_debug_dir, + &stats->b_recover_count); + if (stats->b_debug_check && stats->b_debug_failure && + stats->b_debug_recover) + rc = 0; + +out: + if (rc) + ocfs2_blockcheck_debug_remove(stats); + return rc; +} +#else +static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) +{ + return 0; +} + +static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +/* Always-called wrappers for starting and stopping the debugfs files */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) +{ + return ocfs2_blockcheck_debug_install(stats, parent); +} + +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) +{ + ocfs2_blockcheck_debug_remove(stats); +} + +static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats) +{ + u64 new_count; + + if (!stats) + return; + + spin_lock(&stats->b_lock); + stats->b_check_count++; + new_count = stats->b_check_count; + spin_unlock(&stats->b_lock); + + if (!new_count) + mlog(ML_NOTICE, "Block check count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats) +{ + u64 new_count; + + if (!stats) + return; + + spin_lock(&stats->b_lock); + stats->b_failure_count++; + new_count = stats->b_failure_count; + spin_unlock(&stats->b_lock); + + if (!new_count) + mlog(ML_NOTICE, "Checksum failure count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats) +{ + u64 new_count; + + if (!stats) + return; + + spin_lock(&stats->b_lock); + stats->b_recover_count++; + new_count = stats->b_recover_count; + spin_unlock(&stats->b_lock); + + if (!new_count) + mlog(ML_NOTICE, "ECC recovery count has wrapped\n"); +} + + + +/* + * These are the low-level APIs for using the ocfs2_block_check structure. + */ + /* * This function generates check information for a block. * data is the block to be checked. bc is a pointer to the @@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize, * Again, the data passed in should be the on-disk endian. */ int ocfs2_block_check_validate(void *data, size_t blocksize, - struct ocfs2_block_check *bc) + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats) { int rc = 0; struct ocfs2_block_check check; u32 crc, ecc; + ocfs2_blockcheck_inc_check(stats); + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, if (crc == check.bc_crc32e) goto out; + ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, /* And check the crc32 again */ crc = crc32_le(~0, data, blocksize); - if (crc == check.bc_crc32e) + if (crc == check.bc_crc32e) { + ocfs2_blockcheck_inc_recover(stats); goto out; + } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, * Again, the data passed in should be the on-disk endian. */ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, - struct ocfs2_block_check *bc) + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats) { int i, rc = 0; struct ocfs2_block_check check; @@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, if (!nr) return 0; + ocfs2_blockcheck_inc_check(stats); + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, if (crc == check.bc_crc32e) goto out; + ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, /* And check the crc32 again */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == check.bc_crc32e) + if (crc == check.bc_crc32e) { + ocfs2_blockcheck_inc_recover(stats); goto out; + } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data, struct ocfs2_block_check *bc) { int rc = 0; + struct ocfs2_super *osb = OCFS2_SB(sb); - if (ocfs2_meta_ecc(OCFS2_SB(sb))) - rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc); + if (ocfs2_meta_ecc(osb)) + rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc, + &osb->osb_ecc_stats); return rc; } @@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, struct ocfs2_block_check *bc) { int rc = 0; + struct ocfs2_super *osb = OCFS2_SB(sb); - if (ocfs2_meta_ecc(OCFS2_SB(sb))) - rc = ocfs2_block_check_validate_bhs(bhs, nr, bc); + if (ocfs2_meta_ecc(osb)) + rc = ocfs2_block_check_validate_bhs(bhs, nr, bc, + &osb->osb_ecc_stats); return rc; } diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h index 70ec3feda32f..d4b69febf70a 100644 --- a/fs/ocfs2/blockcheck.h +++ b/fs/ocfs2/blockcheck.h @@ -21,6 +21,24 @@ #define OCFS2_BLOCKCHECK_H +/* Count errors and error correction from blockcheck.c */ +struct ocfs2_blockcheck_stats { + spinlock_t b_lock; + u64 b_check_count; /* Number of blocks we've checked */ + u64 b_failure_count; /* Number of failed checksums */ + u64 b_recover_count; /* Number of blocks fixed by ecc */ + + /* + * debugfs entries, used if this is passed to + * ocfs2_blockcheck_stats_debugfs_install() + */ + struct dentry *b_debug_dir; /* Parent of the debugfs files */ + struct dentry *b_debug_check; /* Exposes b_check_count */ + struct dentry *b_debug_failure; /* Exposes b_failure_count */ + struct dentry *b_debug_recover; /* Exposes b_recover_count */ +}; + + /* High level block API */ void ocfs2_compute_meta_ecc(struct super_block *sb, void *data, struct ocfs2_block_check *bc); @@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, void ocfs2_block_check_compute(void *data, size_t blocksize, struct ocfs2_block_check *bc); int ocfs2_block_check_validate(void *data, size_t blocksize, - struct ocfs2_block_check *bc); + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats); void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, struct ocfs2_block_check *bc); int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, - struct ocfs2_block_check *bc); + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats); + +/* Debug Initialization */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent); +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats); /* * Hamming code functions diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 7e72a81bc2d4..696c32e50716 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -48,34 +48,33 @@ * only emit the appropriage printk() when the caller passes in a constant * mask, as is almost always the case. * - * All this bitmask nonsense is hidden from the /proc interface so that Joel - * doesn't have an aneurism. Reading the file gives a straight forward - * indication of which bits are on or off: - * ENTRY off - * EXIT off + * All this bitmask nonsense is managed from the files under + * /sys/fs/o2cb/logmask/. Reading the files gives a straightforward + * indication of which bits are allowed (allow) or denied (off/deny). + * ENTRY deny + * EXIT deny * TCP off * MSG off * SOCKET off - * ERROR off - * NOTICE on + * ERROR allow + * NOTICE allow * * Writing changes the state of a given bit and requires a strictly formatted * single write() call: * - * write(fd, "ENTRY on", 8); + * write(fd, "allow", 5); * - * would turn the entry bit on. "1" is also accepted in the place of "on", and - * "off" and "0" behave as expected. + * Echoing allow/deny/off string into the logmask files can flip the bits + * on or off as expected; here is the bash script for example: * - * Some trivial shell can flip all the bits on or off: + * log_mask="/sys/fs/o2cb/log_mask" + * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do + * echo allow >"$log_mask"/"$node" + * done * - * log_mask="/proc/fs/ocfs2_nodemanager/log_mask" - * cat $log_mask | ( - * while read bit status; do - * # $1 is "on" or "off", say - * echo "$bit $1" > $log_mask - * done - * ) + * The debugfs.ocfs2 tool can also flip the bits with the -l option: + * + * debugfs.ocfs2 -l TCP allow */ /* for task_struct */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9fbe849f6344..334f231a422c 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -974,7 +974,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { - int ret, error = 0; + int ret; struct o2net_msg *msg = NULL; size_t veclen, caller_bytes = 0; struct kvec *vec = NULL; @@ -1015,10 +1015,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, o2net_set_nst_sock_time(&nst); - ret = wait_event_interruptible(nn->nn_sc_wq, - o2net_tx_can_proceed(nn, &sc, &error)); - if (!ret && error) - ret = error; + wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret)); if (ret) goto out; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c5752305627c..b358f3bf896d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2900,6 +2900,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, alloc = ocfs2_clusters_for_bytes(sb, bytes); dx_alloc = 0; + down_write(&oi->ip_alloc_sem); + if (ocfs2_supports_indexed_dirs(osb)) { credits += ocfs2_add_dir_index_credits(sb); @@ -2940,8 +2942,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, goto out; } - down_write(&oi->ip_alloc_sem); - /* * Prepare for worst case allocation scenario of two separate * extents in the unindexed tree. @@ -2953,7 +2953,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_sem; + goto out; } if (vfs_dq_alloc_space_nodirty(dir, @@ -3172,10 +3172,8 @@ out_commit: ocfs2_commit_trans(osb, handle); -out_sem: - up_write(&oi->ip_alloc_sem); - out: + up_write(&oi->ip_alloc_sem); if (data_ac) ocfs2_free_alloc_context(data_ac); if (meta_ac) @@ -3322,11 +3320,15 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, brelse(new_bh); new_bh = NULL; + down_write(&OCFS2_I(dir)->ip_alloc_sem); + drop_alloc_sem = 1; dir_i_size = i_size_read(dir); credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; goto do_extend; } + down_write(&OCFS2_I(dir)->ip_alloc_sem); + drop_alloc_sem = 1; dir_i_size = i_size_read(dir); mlog(0, "extending dir %llu (i_size = %lld)\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); @@ -3370,9 +3372,6 @@ do_extend: credits++; /* For attaching the new dirent block to the * dx_root */ - down_write(&OCFS2_I(dir)->ip_alloc_sem); - drop_alloc_sem = 1; - handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); @@ -3435,10 +3434,10 @@ bail_bh: *new_de_bh = new_bh; get_bh(*new_de_bh); bail: - if (drop_alloc_sem) - up_write(&OCFS2_I(dir)->ip_alloc_sem); if (handle) ocfs2_commit_trans(osb, handle); + if (drop_alloc_sem) + up_write(&OCFS2_I(dir)->ip_alloc_sem); if (data_ac) ocfs2_free_alloc_context(data_ac); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index e15fc7d50827..6cdeaa76f27f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -248,6 +248,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; +static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { + .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, +}; + static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, @@ -637,6 +641,19 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, + struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan_lvb *lvb; + + ocfs2_lock_res_init_once(res); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); + ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, + &ocfs2_orphan_scan_lops, osb); + lvb = ocfs2_dlm_lvb(&res->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; +} + void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, struct ocfs2_file_private *fp) { @@ -2352,6 +2369,37 @@ void ocfs2_inode_unlock(struct inode *inode, mlog_exit_void(); } +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +{ + struct ocfs2_lock_res *lockres; + struct ocfs2_orphan_scan_lvb *lvb; + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + int status = 0; + + lockres = &osb->osb_orphan_scan.os_lockres; + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + if (status < 0) + return status; + + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) + *seqno = be32_to_cpu(lvb->lvb_os_seqno); + return status; +} + +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +{ + struct ocfs2_lock_res *lockres; + struct ocfs2_orphan_scan_lvb *lvb; + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + + lockres = &osb->osb_orphan_scan.os_lockres; + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; + lvb->lvb_os_seqno = cpu_to_be32(seqno); + ocfs2_cluster_unlock(osb, lockres, level); +} + int ocfs2_super_lock(struct ocfs2_super *osb, int ex) { @@ -2842,6 +2890,7 @@ local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); osb->cconn = conn; @@ -2878,6 +2927,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, ocfs2_lock_res_free(&osb->osb_super_lockres); ocfs2_lock_res_free(&osb->osb_rename_lockres); ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); + ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); ocfs2_cluster_disconnect(osb->cconn, hangup_pending); osb->cconn = NULL; @@ -3061,6 +3111,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); + ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); } int ocfs2_drop_inode_locks(struct inode *inode) diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e1fd5721cd7f..31b90d7b8f51 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -62,6 +62,14 @@ struct ocfs2_qinfo_lvb { __be32 lvb_free_entry; }; +#define OCFS2_ORPHAN_LVB_VERSION 1 + +struct ocfs2_orphan_scan_lvb { + __u8 lvb_version; + __u8 lvb_reserved[3]; + __be32 lvb_os_seqno; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -113,6 +121,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb, int ex); void ocfs2_super_unlock(struct ocfs2_super *osb, int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); + int ocfs2_rename_lock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb); int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c2a87c885b73..07267e0da909 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file, if (err) goto bail; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto bail; + journal = osb->journal->j_journal; err = jbd2_journal_force_commit(journal); @@ -894,9 +897,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; - int locked[MAXQUOTAS] = {0, 0}; - int credits, qtype; - struct ocfs2_mem_dqinfo *oinfo; + int qtype; + struct dquot *transfer_from[MAXQUOTAS] = { }; + struct dquot *transfer_to[MAXQUOTAS] = { }; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -969,30 +972,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - credits = OCFS2_INODE_UPDATE_CREDITS; + /* + * Gather pointers to quota structures so that allocation / + * freeing of quota structures happens here and not inside + * vfs_dq_transfer() where we have problems with lock ordering + */ if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) + transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, + USRQUOTA); + transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, + USRQUOTA); + if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { + status = -ESRCH; goto bail_unlock; - credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + - ocfs2_calc_qdel_credits(sb, USRQUOTA); - locked[USRQUOTA] = 1; + } } if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) + transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, + GRPQUOTA); + transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, + GRPQUOTA); + if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { + status = -ESRCH; goto bail_unlock; - credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + - ocfs2_calc_qdel_credits(sb, GRPQUOTA); - locked[GRPQUOTA] = 1; + } } - handle = ocfs2_start_trans(osb, credits); + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + + 2 * ocfs2_quota_trans_credits(sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -1030,12 +1040,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: - for (qtype = 0; qtype < MAXQUOTAS; qtype++) { - if (!locked[qtype]) - continue; - oinfo = sb_dqinfo(sb, qtype)->dqi_priv; - ocfs2_unlock_global_qf(oinfo, 1); - } ocfs2_inode_unlock(inode, 1); bail_unlock_rw: if (size_change) @@ -1043,6 +1047,12 @@ bail_unlock_rw: bail: brelse(bh); + /* Release quota pointers in case we acquired them */ + for (qtype = 0; qtype < MAXQUOTAS; qtype++) { + dqput(transfer_to[qtype]); + dqput(transfer_from[qtype]); + } + if (!status && attr->ia_valid & ATTR_MODE) { status = ocfs2_acl_chmod(inode); if (status < 0) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a20a0f1e37fd..4a3b9e6b31ad 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -28,6 +28,8 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/kthread.h> +#include <linux/time.h> +#include <linux/random.h> #define MLOG_MASK_PREFIX ML_JOURNAL #include <cluster/masklog.h> @@ -52,6 +54,8 @@ DEFINE_SPINLOCK(trans_inc_lock); +#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000 + static int ocfs2_force_read_journal(struct inode *inode); static int ocfs2_recover_node(struct ocfs2_super *osb, int node_num, int slot_num); @@ -1841,6 +1845,113 @@ bail: return status; } +/* + * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some + * randomness to the timeout to minimize multple nodes firing the timer at the + * same time. + */ +static inline unsigned long ocfs2_orphan_scan_timeout(void) +{ + unsigned long time; + + get_random_bytes(&time, sizeof(time)); + time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000); + return msecs_to_jiffies(time); +} + +/* + * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for + * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This + * is done to catch any orphans that are left over in orphan directories. + * + * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT + * seconds. It gets an EX lock on os_lockres and checks sequence number + * stored in LVB. If the sequence number has changed, it means some other + * node has done the scan. This node skips the scan and tracks the + * sequence number. If the sequence number didn't change, it means a scan + * hasn't happened. The node queues a scan and increments the + * sequence number in the LVB. + */ +void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + int status, i; + u32 seqno = 0; + + os = &osb->osb_orphan_scan; + + status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); + if (status < 0) { + if (status != -EAGAIN) + mlog_errno(status); + goto out; + } + + if (os->os_seqno != seqno) { + os->os_seqno = seqno; + goto unlock; + } + + for (i = 0; i < osb->max_slots; i++) + ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, + NULL); + /* + * We queued a recovery on orphan slots, increment the sequence + * number and update LVB so other node will skip the scan for a while + */ + seqno++; + os->os_count++; + os->os_scantime = CURRENT_TIME; +unlock: + ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); +out: + return; +} + +/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */ +void ocfs2_orphan_scan_work(struct work_struct *work) +{ + struct ocfs2_orphan_scan *os; + struct ocfs2_super *osb; + + os = container_of(work, struct ocfs2_orphan_scan, + os_orphan_scan_work.work); + osb = os->os_osb; + + mutex_lock(&os->os_lock); + ocfs2_queue_orphan_scan(osb); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + mutex_unlock(&os->os_lock); +} + +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + + os = &osb->osb_orphan_scan; + mutex_lock(&os->os_lock); + cancel_delayed_work(&os->os_orphan_scan_work); + mutex_unlock(&os->os_lock); +} + +int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + + os = &osb->osb_orphan_scan; + os->os_osb = osb; + os->os_count = 0; + os->os_scantime = CURRENT_TIME; + mutex_init(&os->os_lock); + + INIT_DELAYED_WORK(&os->os_orphan_scan_work, + ocfs2_orphan_scan_work); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + return 0; +} + struct ocfs2_orphan_filldir_priv { struct inode *head; struct ocfs2_super *osb; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index eb7b76331eb7..61045eeb3f6e 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,6 +144,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, } /* Exported only for the journal struct init code in super.c. Do not call. */ +int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); +void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); + void ocfs2_complete_recovery(struct work_struct *work); void ocfs2_wait_for_recovery(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1386281950db..18c1d9ec1c93 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -47,6 +47,9 @@ #include "ocfs2_fs.h" #include "ocfs2_lockid.h" +/* For struct ocfs2_blockcheck_stats */ +#include "blockcheck.h" + /* Most user visible OCFS2 inodes will have very few pieces of * metadata, but larger files (including bitmaps, etc) must be taken * into account when designing an access scheme. We allow a small @@ -151,6 +154,16 @@ struct ocfs2_lock_res { #endif }; +struct ocfs2_orphan_scan { + struct mutex os_lock; + struct ocfs2_super *os_osb; + struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ + struct delayed_work os_orphan_scan_work; + struct timespec os_scantime; /* time this node ran the scan */ + u32 os_count; /* tracks node specific scans */ + u32 os_seqno; /* tracks cluster wide scans */ +}; + struct ocfs2_dlm_debug { struct kref d_refcnt; struct dentry *d_locking_state; @@ -295,6 +308,7 @@ struct ocfs2_super struct ocfs2_dinode *local_alloc_copy; struct ocfs2_quota_recovery *quota_rec; + struct ocfs2_blockcheck_stats osb_ecc_stats; struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ @@ -341,6 +355,8 @@ struct ocfs2_super unsigned int *osb_orphan_wipes; wait_queue_head_t osb_wipe_event; + struct ocfs2_orphan_scan osb_orphan_scan; + /* used to protect metaecc calculation check of xattr. */ spinlock_t osb_xattr_lock; diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index a53ce87481bf..fcdba091af3d 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -48,6 +48,7 @@ enum ocfs2_lock_type { OCFS2_LOCK_TYPE_FLOCK, OCFS2_LOCK_TYPE_QINFO, OCFS2_LOCK_TYPE_NFS_SYNC, + OCFS2_LOCK_TYPE_ORPHAN_SCAN, OCFS2_NUM_LOCK_TYPES }; @@ -85,6 +86,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) case OCFS2_LOCK_TYPE_NFS_SYNC: c = 'Y'; break; + case OCFS2_LOCK_TYPE_ORPHAN_SCAN: + c = 'P'; + break; default: c = '\0'; } @@ -104,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = { [OCFS2_LOCK_TYPE_OPEN] = "Open", [OCFS2_LOCK_TYPE_FLOCK] = "Flock", [OCFS2_LOCK_TYPE_QINFO] = "Quota", + [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", }; static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 1ed0f7c86869..edfa60cd155c 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -421,6 +421,7 @@ int ocfs2_global_read_dquot(struct dquot *dquot) OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; if (!dquot->dq_off) { /* No real quota entry? */ /* Upgrade to exclusive lock for allocation */ + ocfs2_qinfo_unlock(info, 0); err = ocfs2_qinfo_lock(info, 1); if (err < 0) goto out_qlock; @@ -435,7 +436,8 @@ int ocfs2_global_read_dquot(struct dquot *dquot) out_qlock: if (ex) ocfs2_qinfo_unlock(info, 1); - ocfs2_qinfo_unlock(info, 0); + else + ocfs2_qinfo_unlock(info, 0); out: if (err < 0) mlog_errno(err); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 07deec5e9721..5a460fa82553 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -444,10 +444,6 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) - goto out; - list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { chunk = rchunk->rc_chunk; hbh = NULL; @@ -480,12 +476,18 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, type); goto out_put_bh; } + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) { + mlog_errno(status); + goto out_put_dquot; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QSYNC_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto out_put_dquot; + goto out_drop_lock; } mutex_lock(&sb_dqopt(sb)->dqio_mutex); spin_lock(&dq_data_lock); @@ -523,6 +525,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, out_commit: mutex_unlock(&sb_dqopt(sb)->dqio_mutex); ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_drop_lock: + ocfs2_unlock_global_qf(oinfo, 1); out_put_dquot: dqput(dquot); out_put_bh: @@ -537,8 +541,6 @@ out_put_bh: if (status < 0) break; } - ocfs2_unlock_global_qf(oinfo, 1); -out: if (status < 0) free_recovery_list(&(rec->r_list[type])); mlog_exit(status); @@ -655,6 +657,9 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) struct ocfs2_quota_recovery *rec; int locked = 0; + /* We don't need the lock and we have to acquire quota file locks + * which will later depend on this lock */ + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); info->dqi_maxblimit = 0x7fffffffffffffffLL; info->dqi_maxilimit = 0x7fffffffffffffffLL; oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); @@ -733,6 +738,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) goto out_err; } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); return 0; out_err: if (oinfo) { @@ -746,6 +752,7 @@ out_err: kfree(oinfo); } brelse(bh); + mutex_lock(&sb_dqopt(sb)->dqio_mutex); return -1; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 201b40a441fe..d33767f17ba3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -119,10 +119,12 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb); static int ocfs2_check_volume(struct ocfs2_super *osb); static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, - u32 sectsize); + u32 sectsize, + struct ocfs2_blockcheck_stats *stats); static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, - int sector_size); + int sector_size, + struct ocfs2_blockcheck_stats *stats); static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, @@ -207,6 +209,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) int i; struct ocfs2_cluster_connection *cconn = osb->cconn; struct ocfs2_recovery_map *rm = osb->recovery_map; + struct ocfs2_orphan_scan *os; out += snprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", @@ -308,6 +311,13 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) i, osb->slot_recovery_generations[i]); } + os = &osb->osb_orphan_scan; + out += snprintf(buf + out, len - out, "Orphan Scan=> "); + out += snprintf(buf + out, len - out, "Local: %u Global: %u ", + os->os_count, os->os_seqno); + out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", + (get_seconds() - os->os_scantime.tv_sec)); + return out; } @@ -693,7 +703,8 @@ out: static int ocfs2_sb_probe(struct super_block *sb, struct buffer_head **bh, - int *sector_size) + int *sector_size, + struct ocfs2_blockcheck_stats *stats) { int status, tmpstat; struct ocfs1_vol_disk_hdr *hdr; @@ -759,7 +770,8 @@ static int ocfs2_sb_probe(struct super_block *sb, goto bail; } di = (struct ocfs2_dinode *) (*bh)->b_data; - status = ocfs2_verify_volume(di, *bh, blksize); + memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); + status = ocfs2_verify_volume(di, *bh, blksize, stats); if (status >= 0) goto bail; brelse(*bh); @@ -965,6 +977,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; char nodestr[8]; + struct ocfs2_blockcheck_stats stats; mlog_entry("%p, %p, %i", sb, data, silent); @@ -974,13 +987,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } /* probe for superblock */ - status = ocfs2_sb_probe(sb, &bh, §or_size); + status = ocfs2_sb_probe(sb, &bh, §or_size, &stats); if (status < 0) { mlog(ML_ERROR, "superblock probe failed!\n"); goto read_super_error; } - status = ocfs2_initialize_super(sb, bh, sector_size); + status = ocfs2_initialize_super(sb, bh, sector_size, &stats); osb = OCFS2_SB(sb); if (status < 0) { mlog_errno(status); @@ -1090,6 +1103,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } + if (ocfs2_meta_ecc(osb)) { + status = ocfs2_blockcheck_stats_debugfs_install( + &osb->osb_ecc_stats, + osb->osb_debug_root); + if (status) { + mlog(ML_ERROR, + "Unable to create blockcheck statistics " + "files\n"); + goto read_super_error; + } + } + status = ocfs2_mount_volume(sb); if (osb->root_inode) inode = igrab(osb->root_inode); @@ -1760,13 +1785,8 @@ static int ocfs2_mount_volume(struct super_block *sb) } status = ocfs2_truncate_log_init(osb); - if (status < 0) { + if (status < 0) mlog_errno(status); - goto leave; - } - - if (ocfs2_mount_local(osb)) - goto leave; leave: if (unlock_super) @@ -1796,6 +1816,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_truncate_log_shutdown(osb); + ocfs2_orphan_scan_stop(osb); + /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); @@ -1833,6 +1855,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) if (osb->cconn) ocfs2_dlm_shutdown(osb, hangup_needed); + ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); debugfs_remove(osb->osb_debug_root); if (hangup_needed) @@ -1880,7 +1903,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, - int sector_size) + int sector_size, + struct ocfs2_blockcheck_stats *stats) { int status; int i, cbits, bbits; @@ -1939,6 +1963,9 @@ static int ocfs2_initialize_super(struct super_block *sb, atomic_set(&osb->alloc_stats.bg_allocs, 0); atomic_set(&osb->alloc_stats.bg_extends, 0); + /* Copy the blockcheck stats from the superblock probe */ + osb->osb_ecc_stats = *stats; + ocfs2_init_node_maps(osb); snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", @@ -1951,6 +1978,13 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } + status = ocfs2_orphan_scan_init(osb); + if (status) { + mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); + mlog_errno(status); + goto bail; + } + init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); @@ -2169,7 +2203,8 @@ bail: */ static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, - u32 blksz) + u32 blksz, + struct ocfs2_blockcheck_stats *stats) { int status = -EAGAIN; @@ -2182,7 +2217,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, OCFS2_FEATURE_INCOMPAT_META_ECC) { status = ocfs2_block_check_validate(bh->b_data, bh->b_size, - &di->i_check); + &di->i_check, + stats); if (status) goto out; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 15631019dc63..ba320e250747 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3154,7 +3154,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, bucket, para); - if (ret) + if (ret && ret != -ERANGE) mlog_errno(ret); /* Fall through to bucket_relse() */ } @@ -3261,7 +3261,8 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode, ocfs2_list_xattr_bucket, &xl); if (ret) { - mlog_errno(ret); + if (ret != -ERANGE) + mlog_errno(ret); goto out; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1539e630c47d..3ce5ae9e3d2d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1006,7 +1006,12 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, if (!task) return -ESRCH; - oom_adjust = task->oomkilladj; + task_lock(task); + if (task->mm) + oom_adjust = task->mm->oom_adj; + else + oom_adjust = OOM_DISABLE; + task_unlock(task); put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); @@ -1035,11 +1040,19 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, task = get_proc_task(file->f_path.dentry->d_inode); if (!task) return -ESRCH; - if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { + task_lock(task); + if (!task->mm) { + task_unlock(task); + put_task_struct(task); + return -EINVAL; + } + if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { + task_unlock(task); put_task_struct(task); return -EACCES; } - task->oomkilladj = oom_adjust; + task->mm->oom_adj = oom_adjust; + task_unlock(task); put_task_struct(task); if (end - buffer == 0) return -EIO; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c6b0302af4c4..d5c410d47fae 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -64,10 +64,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Inactive(anon): %8lu kB\n" "Active(file): %8lu kB\n" "Inactive(file): %8lu kB\n" -#ifdef CONFIG_UNEVICTABLE_LRU "Unevictable: %8lu kB\n" "Mlocked: %8lu kB\n" -#endif #ifdef CONFIG_HIGHMEM "HighTotal: %8lu kB\n" "HighFree: %8lu kB\n" @@ -109,10 +107,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(pages[LRU_INACTIVE_ANON]), K(pages[LRU_ACTIVE_FILE]), K(pages[LRU_INACTIVE_FILE]), -#ifdef CONFIG_UNEVICTABLE_LRU K(pages[LRU_UNEVICTABLE]), K(global_page_state(NR_MLOCK)), -#endif #ifdef CONFIG_HIGHMEM K(i.totalhigh), K(i.freehigh), diff --git a/fs/proc/page.c b/fs/proc/page.c index e9983837d08d..2707c6c7a20f 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -6,11 +6,13 @@ #include <linux/mmzone.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/hugetlb.h> #include <asm/uaccess.h> #include "internal.h" #define KPMSIZE sizeof(u64) #define KPMMASK (KPMSIZE - 1) + /* /proc/kpagecount - an array exposing page counts * * Each entry is a u64 representing the corresponding @@ -32,20 +34,22 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - ppage = NULL; if (pfn_valid(pfn)) ppage = pfn_to_page(pfn); - pfn++; + else + ppage = NULL; if (!ppage) pcount = 0; else pcount = page_mapcount(ppage); - if (put_user(pcount, out++)) { + if (put_user(pcount, out)) { ret = -EFAULT; break; } + pfn++; + out++; count -= KPMSIZE; } @@ -68,19 +72,122 @@ static const struct file_operations proc_kpagecount_operations = { /* These macros are used to decouple internal flags from exported ones */ -#define KPF_LOCKED 0 -#define KPF_ERROR 1 -#define KPF_REFERENCED 2 -#define KPF_UPTODATE 3 -#define KPF_DIRTY 4 -#define KPF_LRU 5 -#define KPF_ACTIVE 6 -#define KPF_SLAB 7 -#define KPF_WRITEBACK 8 -#define KPF_RECLAIM 9 -#define KPF_BUDDY 10 +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +/* 11-20: new additions in 2.6.31 */ +#define KPF_MMAP 11 +#define KPF_ANON 12 +#define KPF_SWAPCACHE 13 +#define KPF_SWAPBACKED 14 +#define KPF_COMPOUND_HEAD 15 +#define KPF_COMPOUND_TAIL 16 +#define KPF_HUGE 17 +#define KPF_UNEVICTABLE 18 +#define KPF_NOPAGE 20 + +/* kernel hacking assistances + * WARNING: subject to change, never rely on them! + */ +#define KPF_RESERVED 32 +#define KPF_MLOCKED 33 +#define KPF_MAPPEDTODISK 34 +#define KPF_PRIVATE 35 +#define KPF_PRIVATE_2 36 +#define KPF_OWNER_PRIVATE 37 +#define KPF_ARCH 38 +#define KPF_UNCACHED 39 + +static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) +{ + return ((kflags >> kbit) & 1) << ubit; +} -#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos) +static u64 get_uflags(struct page *page) +{ + u64 k; + u64 u; + + /* + * pseudo flag: KPF_NOPAGE + * it differentiates a memory hole from a page with no flags + */ + if (!page) + return 1 << KPF_NOPAGE; + + k = page->flags; + u = 0; + + /* + * pseudo flags for the well known (anonymous) memory mapped pages + * + * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the + * simple test in page_mapped() is not enough. + */ + if (!PageSlab(page) && page_mapped(page)) + u |= 1 << KPF_MMAP; + if (PageAnon(page)) + u |= 1 << KPF_ANON; + + /* + * compound pages: export both head/tail info + * they together define a compound page's start/end pos and order + */ + if (PageHead(page)) + u |= 1 << KPF_COMPOUND_HEAD; + if (PageTail(page)) + u |= 1 << KPF_COMPOUND_TAIL; + if (PageHuge(page)) + u |= 1 << KPF_HUGE; + + u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); + + /* + * Caveats on high order pages: + * PG_buddy will only be set on the head page; SLUB/SLQB do the same + * for PG_slab; SLOB won't set PG_slab at all on compound pages. + */ + u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); + u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); + + u |= kpf_copy_bit(k, KPF_ERROR, PG_error); + u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); + u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); + u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); + + u |= kpf_copy_bit(k, KPF_LRU, PG_lru); + u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); + u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); + u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); + + u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); + u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); + + u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); + u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); + +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR + u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); +#endif + + u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); + u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk); + u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); + u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); + u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); + u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); + + return u; +}; static ssize_t kpageflags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -90,7 +197,6 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, unsigned long src = *ppos; unsigned long pfn; ssize_t ret = 0; - u64 kflags, uflags; pfn = src / KPMSIZE; count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); @@ -98,32 +204,18 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - ppage = NULL; if (pfn_valid(pfn)) ppage = pfn_to_page(pfn); - pfn++; - if (!ppage) - kflags = 0; else - kflags = ppage->flags; - - uflags = kpf_copy_bit(kflags, KPF_LOCKED, PG_locked) | - kpf_copy_bit(kflags, KPF_ERROR, PG_error) | - kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | - kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | - kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | - kpf_copy_bit(kflags, KPF_LRU, PG_lru) | - kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | - kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | - kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | - kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | - kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); - - if (put_user(uflags, out++)) { + ppage = NULL; + + if (put_user(get_uflags(ppage), out)) { ret = -EFAULT; break; } + pfn++; + out++; count -= KPMSIZE; } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 3a6b193d8444..0ff7566c767c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -202,9 +202,12 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts) return -EINVAL; opts->mode = option & S_IALLUGO; break; - default: - printk(KERN_ERR "ramfs: bad mount option: %s\n", p); - return -EINVAL; + /* + * We might like to report bad mount options here; + * but traditionally ramfs has ignored all mount options, + * and as it is used as a !CONFIG_SHMEM simple substitute + * for tmpfs, better continue to ignore other mount options. + */ } } diff --git a/fs/select.c b/fs/select.c index 0fe0e1469df3..d870237e42c7 100644 --- a/fs/select.c +++ b/fs/select.c @@ -168,7 +168,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) return table->entry++; } -static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct poll_wqueues *pwq = wait->private; DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); @@ -194,6 +194,16 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) return default_wake_function(&dummy_wait, mode, sync, key); } +static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct poll_table_entry *entry; + + entry = container_of(wait, struct poll_table_entry, wait); + if (key && !((unsigned long)key & entry->key)) + return 0; + return __pollwake(wait, mode, sync, key); +} + /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) @@ -205,6 +215,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, get_file(filp); entry->filp = filp; entry->wait_address = wait_address; + entry->key = p->key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); @@ -362,6 +373,18 @@ get_max: #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) +static inline void wait_key_set(poll_table *wait, unsigned long in, + unsigned long out, unsigned long bit) +{ + if (wait) { + wait->key = POLLEX_SET; + if (in & bit) + wait->key |= POLLIN_SET; + if (out & bit) + wait->key |= POLLOUT_SET; + } +} + int do_select(int n, fd_set_bits *fds, struct timespec *end_time) { ktime_t expire, *to = NULL; @@ -418,20 +441,25 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) if (file) { f_op = file->f_op; mask = DEFAULT_POLLMASK; - if (f_op && f_op->poll) - mask = (*f_op->poll)(file, retval ? NULL : wait); + if (f_op && f_op->poll) { + wait_key_set(wait, in, out, bit); + mask = (*f_op->poll)(file, wait); + } fput_light(file, fput_needed); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; + wait = NULL; } if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; + wait = NULL; } if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; + wait = NULL; } } } @@ -685,8 +713,12 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = POLLNVAL; if (file != NULL) { mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) + if (file->f_op && file->f_op->poll) { + if (pwait) + pwait->key = pollfd->events | + POLLERR | POLLHUP; mask = file->f_op->poll(file, pwait); + } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; fput_light(file, fput_needed); diff --git a/fs/super.c b/fs/super.c index 83b47416d006..d40d53a22fb5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -545,24 +545,18 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) { - unlock_kernel(); + else if (!fs_may_remount_ro(sb)) return -EBUSY; - } retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) { - unlock_kernel(); + if (retval < 0 && retval != -ENOSYS) return -EBUSY; - } } remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); - if (retval) { - unlock_kernel(); + if (retval) return retval; - } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); if (remount_rw) diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index a3ba217fbe74..1d897ad808e0 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -192,8 +192,11 @@ static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) { int error = -ENOMEM; unsigned long page = get_zeroed_page(GFP_KERNEL); - if (page) + if (page) { error = sysfs_getlink(dentry, (char *) page); + if (error < 0) + free_page((unsigned long)page); + } nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); return NULL; } diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index c7798079e644..4e50286a4cc3 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -15,13 +15,13 @@ #include <linux/pagemap.h> #include <linux/highmem.h> -#include <linux/smp_lock.h> #include <linux/swap.h> #include "sysv.h" static int sysv_readdir(struct file *, void *, filldir_t); const struct file_operations sysv_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = sysv_readdir, .fsync = simple_fsync, @@ -74,8 +74,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); - lock_kernel(); - pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); if (pos >= inode->i_size) goto done; @@ -113,7 +111,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) done: filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; - unlock_kernel(); return 0; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 479923456a54..9824743832a7 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -21,7 +21,6 @@ * the superblock. */ -#include <linux/smp_lock.h> #include <linux/highuid.h> #include <linux/slab.h> #include <linux/init.h> @@ -37,7 +36,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) unsigned long time = get_seconds(), old_time; lock_super(sb); - lock_kernel(); /* * If we are going to write out the super block, @@ -52,7 +50,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) mark_buffer_dirty(sbi->s_bh2); } - unlock_kernel(); unlock_super(sb); return 0; @@ -82,8 +79,6 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); - lock_kernel(); - if (sb->s_dirt) sysv_write_super(sb); @@ -99,8 +94,6 @@ static void sysv_put_super(struct super_block *sb) brelse(sbi->s_bh2); kfree(sbi); - - unlock_kernel(); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -275,7 +268,6 @@ int sysv_write_inode(struct inode *inode, int wait) return -EIO; } - lock_kernel(); raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); @@ -291,7 +283,6 @@ int sysv_write_inode(struct inode *inode, int wait) for (block = 0; block < 10+1+1+1; block++) write3byte(sbi, (u8 *)&si->i_data[block], &raw_inode->i_data[3*block]); - unlock_kernel(); mark_buffer_dirty(bh); if (wait) { sync_dirty_buffer(bh); @@ -315,9 +306,7 @@ static void sysv_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; sysv_truncate(inode); - lock_kernel(); sysv_free_inode(inode); - unlock_kernel(); } static struct kmem_cache *sysv_inode_cachep; diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index af1914462f02..eaf6d891d46f 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -91,7 +91,6 @@ static int shrink_liability(struct ubifs_info *c, int nr_to_write) return nr_written; } - /** * run_gc - run garbage collector. * @c: UBIFS file-system description object @@ -628,7 +627,7 @@ void ubifs_convert_page_budget(struct ubifs_info *c) * * This function releases budget corresponding to a dirty inode. It is usually * called when after the inode has been written to the media and marked as - * clean. + * clean. It also causes the "no space" flags to be cleared. */ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, struct ubifs_inode *ui) @@ -636,6 +635,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, struct ubifs_budget_req req; memset(&req, 0, sizeof(struct ubifs_budget_req)); + /* The "no space" flags will be cleared because dd_growth is > 0 */ req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ubifs_release_budget(c, &req); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f55d523c52bb..552fb0111fff 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -528,6 +528,25 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); + + /* + * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing + * otherwise has the potential to corrupt the orphan inode list. + * + * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and + * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not + * lock 'dirA->i_mutex', so this is possible. Both of the functions + * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes + * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this + * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA' + * to the list of orphans. After this, 'vfs_link()' will link + * 'dirB/fileB' to 'inodeA'. This is a problem because, for example, + * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode + * to the list of orphans. + */ + if (inode->i_nlink == 0) + return -ENOENT; + err = dbg_check_synced_i_size(inode); if (err) return err; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index e8e632a1dcdf..bc5857199ec2 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -293,13 +293,14 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) * * This function is called when the write-buffer timer expires. */ -static void wbuf_timer_callback_nolock(unsigned long data) +static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) { - struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; + struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); wbuf->need_sync = 1; wbuf->c->need_wbuf_sync = 1; ubifs_wake_up_bgt(wbuf->c); + return HRTIMER_NORESTART; } /** @@ -308,13 +309,12 @@ static void wbuf_timer_callback_nolock(unsigned long data) */ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { - ubifs_assert(!timer_pending(&wbuf->timer)); + ubifs_assert(!hrtimer_active(&wbuf->timer)); - if (!wbuf->timeout) + if (!ktime_to_ns(wbuf->softlimit)) return; - - wbuf->timer.expires = jiffies + wbuf->timeout; - add_timer(&wbuf->timer); + hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, + HRTIMER_MODE_REL); } /** @@ -329,7 +329,7 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) * should be canceled. */ wbuf->need_sync = 0; - del_timer(&wbuf->timer); + hrtimer_cancel(&wbuf->timer); } /** @@ -825,6 +825,7 @@ out: int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) { size_t size; + ktime_t hardlimit; wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); if (!wbuf->buf) @@ -845,14 +846,21 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) wbuf->sync_callback = NULL; mutex_init(&wbuf->io_mutex); spin_lock_init(&wbuf->lock); - wbuf->c = c; - init_timer(&wbuf->timer); - wbuf->timer.function = wbuf_timer_callback_nolock; - wbuf->timer.data = (unsigned long)wbuf; - wbuf->timeout = DEFAULT_WBUF_TIMEOUT; wbuf->next_ino = 0; + hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + wbuf->timer.function = wbuf_timer_callback_nolock; + /* + * Make write-buffer soft limit to be 20% of the hard limit. The + * write-buffer timer is allowed to expire any time between the soft + * and hard limits. + */ + hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0); + wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10; + wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta); + hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit, + wbuf->delta); return 0; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 10662975d2ef..805605250f12 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -343,33 +343,15 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) * * This function returns %1 if @offs was in the last write to the LEB whose data * is in @buf, otherwise %0 is returned. The determination is made by checking - * for subsequent empty space starting from the next min_io_size boundary (or a - * bit less than the common header size if min_io_size is one). + * for subsequent empty space starting from the next @c->min_io_size boundary. */ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) { - int empty_offs; - int check_len; + int empty_offs, check_len; uint8_t *p; - if (c->min_io_size == 1) { - check_len = c->leb_size - offs; - p = buf + check_len; - for (; check_len > 0; check_len--) - if (*--p != 0xff) - break; - /* - * 'check_len' is the size of the corruption which cannot be - * more than the size of 1 node if it was caused by an unclean - * unmount. - */ - if (check_len > UBIFS_MAX_NODE_SZ) - return 0; - return 1; - } - /* - * Round up to the next c->min_io_size boundary i.e. 'offs' is in the + * Round up to the next @c->min_io_size boundary i.e. @offs is in the * last wbuf written. After that should be empty space. */ empty_offs = ALIGN(offs + 1, c->min_io_size); @@ -392,7 +374,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) * * This function pads up to the next min_io_size boundary (if there is one) and * sets empty space to all 0xff. @buf, @offs and @len are updated to the next - * min_io_size boundary (if there is one). + * @c->min_io_size boundary. */ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, int *offs, int *len) @@ -402,11 +384,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, lnum = lnum; dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); - if (c->min_io_size == 1) { - memset(*buf, 0xff, c->leb_size - *offs); - return; - } - ubifs_assert(!(*offs & 7)); empty_offs = ALIGN(*offs, c->min_io_size); pad_len = empty_offs - *offs; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3589eab02a2f..79fad43f3c57 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -361,6 +361,11 @@ static void ubifs_delete_inode(struct inode *inode) out: if (ui->dirty) ubifs_release_dirty_inode_budget(c, ui); + else { + /* We've deleted something - clean the "no space" flags */ + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } clear_inode(inode); } @@ -792,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c) * does not need to be synchronized by timer. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; - c->jheads[GCHD].wbuf.timeout = 0; + c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); return 0; } @@ -933,6 +938,27 @@ static const match_table_t tokens = { }; /** + * parse_standard_option - parse a standard mount option. + * @option: the option to parse + * + * Normally, standard mount options like "sync" are passed to file-systems as + * flags. However, when a "rootflags=" kernel boot parameter is used, they may + * be present in the options string. This function tries to deal with this + * situation and parse standard options. Returns 0 if the option was not + * recognized, and the corresponding integer flag if it was. + * + * UBIFS is only interested in the "sync" option, so do not check for anything + * else. + */ +static int parse_standard_option(const char *option) +{ + ubifs_msg("parse %s", option); + if (!strcmp(option, "sync")) + return MS_SYNCHRONOUS; + return 0; +} + +/** * ubifs_parse_options - parse mount parameters. * @c: UBIFS file-system description object * @options: parameters to parse @@ -1008,9 +1034,19 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, break; } default: - ubifs_err("unrecognized mount option \"%s\" " - "or missing value", p); - return -EINVAL; + { + unsigned long flag; + struct super_block *sb = c->vfs_sb; + + flag = parse_standard_option(p); + if (!flag) { + ubifs_err("unrecognized mount option \"%s\" " + "or missing value", p); + return -EINVAL; + } + sb->s_flags |= flag; + break; + } } } @@ -1180,6 +1216,7 @@ static int mount_ubifs(struct ubifs_info *c) if (!ubifs_compr_present(c->default_compr)) { ubifs_err("'compressor \"%s\" is not compiled in", ubifs_compr_name(c->default_compr)); + err = -ENOTSUPP; goto out_free; } @@ -1656,7 +1693,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); - del_timer_sync(&c->jheads[i].wbuf.timer); + hrtimer_cancel(&c->jheads[i].wbuf.timer); } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); @@ -1719,7 +1756,7 @@ static void ubifs_put_super(struct super_block *sb) if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); - del_timer_sync(&c->jheads[i].wbuf.timer); + hrtimer_cancel(&c->jheads[i].wbuf.timer); } /* @@ -1911,6 +1948,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&c->orph_list); INIT_LIST_HEAD(&c->orph_new); + c->vfs_sb = sb; c->highest_inum = UBIFS_FIRST_INO; c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; @@ -1937,18 +1975,18 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) err = bdi_init(&c->bdi); if (err) goto out_close; + err = bdi_register(&c->bdi, NULL, "ubifs"); + if (err) + goto out_bdi; err = ubifs_parse_options(c, data, 0); if (err) goto out_bdi; - c->vfs_sb = sb; - sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; - sb->s_dev = c->vi.cdev; sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; @@ -1993,16 +2031,9 @@ out_free: static int sb_test(struct super_block *sb, void *data) { dev_t *dev = data; + struct ubifs_info *c = sb->s_fs_info; - return sb->s_dev == *dev; -} - -static int sb_set(struct super_block *sb, void *data) -{ - dev_t *dev = data; - - sb->s_dev = *dev; - return 0; + return c->vi.cdev == *dev; } static int ubifs_get_sb(struct file_system_type *fs_type, int flags, @@ -2030,7 +2061,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); - sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); + sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); if (IS_ERR(sb)) { err = PTR_ERR(sb); goto out_close; @@ -2070,16 +2101,11 @@ out_close: return err; } -static void ubifs_kill_sb(struct super_block *sb) -{ - generic_shutdown_super(sb); -} - static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, .get_sb = ubifs_get_sb, - .kill_sb = ubifs_kill_sb + .kill_sb = kill_anon_super, }; /* diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 0a8341e14088..1bf01d820066 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -95,8 +95,8 @@ */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout (5 secs) */ -#define DEFAULT_WBUF_TIMEOUT (5 * HZ) +/* Default write-buffer synchronization timeout in seconds */ +#define DEFAULT_WBUF_TIMEOUT_SECS 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -650,8 +650,10 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @io_mutex: serializes write-buffer I/O * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes * fields + * @softlimit: soft write-buffer timeout interval + * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit + * and @softlimit + @delta) * @timer: write-buffer timer - * @timeout: timer expire interval in jiffies * @need_sync: it is set if its timer expired and needs sync * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf @@ -678,8 +680,9 @@ struct ubifs_wbuf { int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); struct mutex io_mutex; spinlock_t lock; - struct timer_list timer; - int timeout; + ktime_t softlimit; + unsigned long long delta; + struct hrtimer timer; int need_sync; int next_ino; ino_t *inodes; diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 29228f5899cd..480f28127f09 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -39,6 +39,7 @@ config XFS_QUOTA config XFS_POSIX_ACL bool "XFS POSIX ACL support" depends on XFS_FS + select FS_POSIX_ACL help POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 60f107e47fe9..7a59daed1782 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o endif xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o @@ -88,8 +88,7 @@ xfs-y += xfs_alloc.o \ xfs_utils.o \ xfs_vnodeops.o \ xfs_rw.o \ - xfs_dmops.o \ - xfs_qmops.o + xfs_dmops.o xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ xfs_dir2_trace.o diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c new file mode 100644 index 000000000000..1e9d1246eebc --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2008, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_vnodeops.h" +#include <linux/xattr.h> +#include <linux/posix_acl_xattr.h> + + +#define XFS_ACL_NOT_CACHED ((void *)-1) + +/* + * Locking scheme: + * - all ACL updates are protected by inode->i_mutex, which is taken before + * calling into this file. + * - access and updates to the ip->i_acl and ip->i_default_acl pointers are + * protected by inode->i_lock. + */ + +STATIC struct posix_acl * +xfs_acl_from_disk(struct xfs_acl *aclp) +{ + struct posix_acl_entry *acl_e; + struct posix_acl *acl; + struct xfs_acl_entry *ace; + int count, i; + + count = be32_to_cpu(aclp->acl_cnt); + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + acl_e = &acl->a_entries[i]; + ace = &aclp->acl_entry[i]; + + /* + * The tag is 32 bits on disk and 16 bits in core. + * + * Because every access to it goes through the core + * format first this is not a problem. + */ + acl_e->e_tag = be32_to_cpu(ace->ae_tag); + acl_e->e_perm = be16_to_cpu(ace->ae_perm); + + switch (acl_e->e_tag) { + case ACL_USER: + case ACL_GROUP: + acl_e->e_id = be32_to_cpu(ace->ae_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + acl_e->e_id = ACL_UNDEFINED_ID; + break; + default: + goto fail; + } + } + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +STATIC void +xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) +{ + const struct posix_acl_entry *acl_e; + struct xfs_acl_entry *ace; + int i; + + aclp->acl_cnt = cpu_to_be32(acl->a_count); + for (i = 0; i < acl->a_count; i++) { + ace = &aclp->acl_entry[i]; + acl_e = &acl->a_entries[i]; + + ace->ae_tag = cpu_to_be32(acl_e->e_tag); + ace->ae_id = cpu_to_be32(acl_e->e_id); + ace->ae_perm = cpu_to_be16(acl_e->e_perm); + } +} + +/* + * Update the cached ACL pointer in the inode. + * + * Because we don't hold any locks while reading/writing the attribute + * from/to disk another thread could have raced and updated the cached + * ACL value before us. In that case we release the previous cached value + * and update it with our new value. + */ +STATIC void +xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED) + posix_acl_release(*p_acl); + *p_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl * +xfs_get_acl(struct inode *inode, int type) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl *acl = NULL, **p_acl; + struct xfs_acl *xfs_acl; + int len = sizeof(struct xfs_acl); + char *ea_name; + int error; + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + p_acl = &ip->i_acl; + break; + case ACL_TYPE_DEFAULT: + ea_name = SGI_ACL_DEFAULT; + p_acl = &ip->i_default_acl; + break; + default: + return ERR_PTR(-EINVAL); + } + + spin_lock(&inode->i_lock); + if (*p_acl != XFS_ACL_NOT_CACHED) + acl = posix_acl_dup(*p_acl); + spin_unlock(&inode->i_lock); + + /* + * If we have a cached ACLs value just return it, not need to + * go out to the disk. + */ + if (acl) + return acl; + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return ERR_PTR(-ENOMEM); + + error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); + if (error) { + /* + * If the attribute doesn't exist make sure we have a negative + * cache entry, for any other error assume it is transient and + * leave the cache entry as XFS_ACL_NOT_CACHED. + */ + if (error == -ENOATTR) { + acl = NULL; + goto out_update_cache; + } + goto out; + } + + acl = xfs_acl_from_disk(xfs_acl); + if (IS_ERR(acl)) + goto out; + + out_update_cache: + xfs_update_cached_acl(inode, p_acl, acl); + out: + kfree(xfs_acl); + return acl; +} + +STATIC int +xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl **p_acl; + char *ea_name; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + p_acl = &ip->i_acl; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + ea_name = SGI_ACL_DEFAULT; + p_acl = &ip->i_default_acl; + break; + default: + return -EINVAL; + } + + if (acl) { + struct xfs_acl *xfs_acl; + int len; + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return -ENOMEM; + + xfs_acl_to_disk(xfs_acl, acl); + len = sizeof(struct xfs_acl) - + (sizeof(struct xfs_acl_entry) * + (XFS_ACL_MAX_ENTRIES - acl->a_count)); + + error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, + len, ATTR_ROOT); + + kfree(xfs_acl); + } else { + /* + * A NULL ACL argument means we want to remove the ACL. + */ + error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); + + /* + * If the attribute didn't exist to start with that's fine. + */ + if (error == -ENOATTR) + error = 0; + } + + if (!error) + xfs_update_cached_acl(inode, p_acl, acl); + return error; +} + +int +xfs_check_acl(struct inode *inode, int mask) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl *acl; + int error = -EAGAIN; + + xfs_itrace_entry(ip); + + /* + * If there is no attribute fork no ACL exists on this inode and + * we can skip the whole exercise. + */ + if (!XFS_IFORK_Q(ip)) + return -EAGAIN; + + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + } + + return error; +} + +static int +xfs_set_mode(struct inode *inode, mode_t mode) +{ + int error = 0; + + if (mode != inode->i_mode) { + struct iattr iattr; + + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; + + error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + } + + return error; +} + +static int +xfs_acl_exists(struct inode *inode, char *name) +{ + int len = sizeof(struct xfs_acl); + + return (xfs_attr_get(XFS_I(inode), name, NULL, &len, + ATTR_ROOT|ATTR_KERNOVAL) == 0); +} + +int +posix_acl_access_exists(struct inode *inode) +{ + return xfs_acl_exists(inode, SGI_ACL_FILE); +} + +int +posix_acl_default_exists(struct inode *inode) +{ + if (!S_ISDIR(inode->i_mode)) + return 0; + return xfs_acl_exists(inode, SGI_ACL_DEFAULT); +} + +/* + * No need for i_mutex because the inode is not yet exposed to the VFS. + */ +int +xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl) +{ + struct posix_acl *clone; + mode_t mode; + int error = 0, inherit = 0; + + if (S_ISDIR(inode->i_mode)) { + error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl); + if (error) + return error; + } + + clone = posix_acl_clone(default_acl, GFP_KERNEL); + if (!clone) + return -ENOMEM; + + mode = inode->i_mode; + error = posix_acl_create_masq(clone, &mode); + if (error < 0) + goto out_release_clone; + + /* + * If posix_acl_create_masq returns a positive value we need to + * inherit a permission that can't be represented using the Unix + * mode bits and we actually need to set an ACL. + */ + if (error > 0) + inherit = 1; + + error = xfs_set_mode(inode, mode); + if (error) + goto out_release_clone; + + if (inherit) + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + + out_release_clone: + posix_acl_release(clone); + return error; +} + +int +xfs_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + + posix_acl_release(clone); + return error; +} + +void +xfs_inode_init_acls(struct xfs_inode *ip) +{ + /* + * No need for locking, inode is not live yet. + */ + ip->i_acl = XFS_ACL_NOT_CACHED; + ip->i_default_acl = XFS_ACL_NOT_CACHED; +} + +void +xfs_inode_clear_acls(struct xfs_inode *ip) +{ + /* + * No need for locking here, the inode is not live anymore + * and just about to be freed. + */ + if (ip->i_acl != XFS_ACL_NOT_CACHED) + posix_acl_release(ip->i_acl); + if (ip->i_default_acl != XFS_ACL_NOT_CACHED) + posix_acl_release(ip->i_default_acl); +} + + +/* + * System xattr handlers. + * + * Currently Posix ACLs are the only system namespace extended attribute + * handlers supported by XFS, so we just implement the handlers here. + * If we ever support other system extended attributes this will need + * some refactoring. + */ + +static int +xfs_decode_acl(const char *name) +{ + if (strcmp(name, "posix_acl_access") == 0) + return ACL_TYPE_ACCESS; + else if (strcmp(name, "posix_acl_default") == 0) + return ACL_TYPE_DEFAULT; + return -EINVAL; +} + +static int +xfs_xattr_system_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + struct posix_acl *acl; + int type, error; + + type = xfs_decode_acl(name); + if (type < 0) + return type; + + acl = xfs_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + error = posix_acl_to_xattr(acl, value, size); + posix_acl_release(acl); + + return error; +} + +static int +xfs_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct posix_acl *acl = NULL; + int error = 0, type; + + type = xfs_decode_acl(name); + if (type < 0) + return type; + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > XFS_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = xfs_set_mode(inode, mode); + if (error) + goto out_release; + } + + set_acl: + error = xfs_set_acl(inode, type, acl); + out_release: + posix_acl_release(acl); + out: + return error; +} + +struct xattr_handler xfs_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = xfs_xattr_system_get, + .set = xfs_xattr_system_set, +}; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 34eaab608e6e..5bb523d7f37e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -41,7 +41,6 @@ #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" @@ -899,7 +898,8 @@ xfs_ioctl_setattr( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int lock_flags = 0; - struct xfs_dquot *udqp = NULL, *gdqp = NULL; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; @@ -919,7 +919,7 @@ xfs_ioctl_setattr( * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { - code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid, + code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, &gdqp); if (code) @@ -954,10 +954,11 @@ xfs_ioctl_setattr( * Do a quota reservation only if projid is actually going to change. */ if (mask & FSX_PROJID) { - if (XFS_IS_PQUOTA_ON(mp) && + if (XFS_IS_QUOTA_RUNNING(mp) && + XFS_IS_PQUOTA_ON(mp) && ip->i_d.di_projid != fa->fsx_projid) { ASSERT(tp); - code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -1059,8 +1060,8 @@ xfs_ioctl_setattr( * in the transaction. */ if (ip->i_d.di_projid != fa->fsx_projid) { - if (XFS_IS_PQUOTA_ON(mp)) { - olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip, + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_projid = fa->fsx_projid; @@ -1106,9 +1107,9 @@ xfs_ioctl_setattr( /* * Release any dquot(s) the inode had kept before chown. */ - XFS_QM_DQRELE(mp, olddquot); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(olddquot); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (code) return code; @@ -1122,8 +1123,8 @@ xfs_ioctl_setattr( return 0; error_return: - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 6075382336d7..58973bb46038 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -17,6 +17,7 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -51,6 +52,7 @@ #include <linux/capability.h> #include <linux/xattr.h> #include <linux/namei.h> +#include <linux/posix_acl.h> #include <linux/security.h> #include <linux/falloc.h> #include <linux/fiemap.h> @@ -202,9 +204,8 @@ xfs_vn_mknod( { struct inode *inode; struct xfs_inode *ip = NULL; - xfs_acl_t *default_acl = NULL; + struct posix_acl *default_acl = NULL; struct xfs_name name; - int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS; int error; /* @@ -219,18 +220,14 @@ xfs_vn_mknod( rdev = 0; } - if (test_default_acl && test_default_acl(dir)) { - if (!_ACL_ALLOC(default_acl)) { - return -ENOMEM; - } - if (!_ACL_GET_DEFAULT(dir, default_acl)) { - _ACL_FREE(default_acl); - default_acl = NULL; - } - } + if (IS_POSIXACL(dir)) { + default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(default_acl)) + return -PTR_ERR(default_acl); - if (IS_POSIXACL(dir) && !default_acl) - mode &= ~current_umask(); + if (!default_acl) + mode &= ~current_umask(); + } xfs_dentry_to_name(&name, dentry); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); @@ -244,10 +241,10 @@ xfs_vn_mknod( goto out_cleanup_inode; if (default_acl) { - error = _ACL_INHERIT(inode, mode, default_acl); + error = -xfs_inherit_acl(inode, default_acl); if (unlikely(error)) goto out_cleanup_inode; - _ACL_FREE(default_acl); + posix_acl_release(default_acl); } @@ -257,8 +254,7 @@ xfs_vn_mknod( out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); out_free_acl: - if (default_acl) - _ACL_FREE(default_acl); + posix_acl_release(default_acl); return -error; } @@ -488,26 +484,6 @@ xfs_vn_put_link( kfree(s); } -#ifdef CONFIG_XFS_POSIX_ACL -STATIC int -xfs_check_acl( - struct inode *inode, - int mask) -{ - struct xfs_inode *ip = XFS_I(inode); - int error; - - xfs_itrace_entry(ip); - - if (XFS_IFORK_Q(ip)) { - error = xfs_acl_iaccess(ip, mask, NULL); - if (error != -1) - return -error; - } - - return -EAGAIN; -} - STATIC int xfs_vn_permission( struct inode *inode, @@ -515,9 +491,6 @@ xfs_vn_permission( { return generic_permission(inode, mask, xfs_check_acl); } -#else -#define xfs_vn_permission NULL -#endif STATIC int xfs_vn_getattr( diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 9142192ccbe6..7078974a6eee 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_inode_item.h" #include "xfs_buf_item.h" diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 94d9a633d3d9..cb6e2cca214f 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -50,9 +50,11 @@ xfs_fs_quota_sync( { struct xfs_mount *mp = XFS_M(sb); + if (sb->s_flags & MS_RDONLY) + return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_sync_inodes(mp, SYNC_DELWRI); + return -xfs_sync_data(mp, 0); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 08d6bd9a3947..2e09efbca8db 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -43,7 +43,6 @@ #include "xfs_itable.h" #include "xfs_fsops.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -405,6 +404,14 @@ xfs_parseargs( return EINVAL; } +#ifndef CONFIG_XFS_QUOTA + if (XFS_IS_QUOTA_RUNNING(mp)) { + cmn_err(CE_WARN, + "XFS: quota support not available in this kernel."); + return EINVAL; + } +#endif + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { cmn_err(CE_WARN, @@ -1063,7 +1070,18 @@ xfs_fs_put_super( int unmount_event_flags = 0; xfs_syncd_stop(mp); - xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); + + if (!(sb->s_flags & MS_RDONLY)) { + /* + * XXX(hch): this should be SYNC_WAIT. + * + * Or more likely not needed at all because the VFS is already + * calling ->sync_fs after shutting down all filestem + * operations and just before calling ->put_super. + */ + xfs_sync_data(mp, 0); + xfs_sync_attr(mp, 0); + } #ifdef HAVE_DMAPI if (mp->m_flags & XFS_MOUNT_DMAPI) { @@ -1098,7 +1116,6 @@ xfs_fs_put_super( xfs_freesb(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - xfs_qmops_put(mp); xfs_dmops_put(mp); xfs_free_fsname(mp); kfree(mp); @@ -1158,6 +1175,7 @@ xfs_fs_statfs( { struct xfs_mount *mp = XFS_M(dentry->d_sb); xfs_sb_t *sbp = &mp->m_sb; + struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; xfs_extlen_t lsize; @@ -1186,7 +1204,10 @@ xfs_fs_statfs( statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); spin_unlock(&mp->m_sb_lock); - XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp); + if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || + ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + xfs_qm_statvfs(ip, statp); return 0; } @@ -1394,16 +1415,13 @@ xfs_fs_fill_super( error = xfs_dmops_get(mp); if (error) goto out_free_fsname; - error = xfs_qmops_get(mp); - if (error) - goto out_put_dmops; if (silent) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) - goto out_put_qmops; + goto out_put_dmops; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; @@ -1471,8 +1489,6 @@ xfs_fs_fill_super( out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - out_put_qmops: - xfs_qmops_put(mp); out_put_dmops: xfs_dmops_put(mp); out_free_fsname: @@ -1706,18 +1722,8 @@ xfs_init_zones(void) if (!xfs_ili_zone) goto out_destroy_inode_zone; -#ifdef CONFIG_XFS_POSIX_ACL - xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl"); - if (!xfs_acl_zone) - goto out_destroy_ili_zone; -#endif - return 0; -#ifdef CONFIG_XFS_POSIX_ACL - out_destroy_ili_zone: -#endif - kmem_zone_destroy(xfs_ili_zone); out_destroy_inode_zone: kmem_zone_destroy(xfs_inode_zone); out_destroy_efi_zone: @@ -1751,9 +1757,6 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { -#ifdef CONFIG_XFS_POSIX_ACL - kmem_zone_destroy(xfs_acl_zone); -#endif kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index f7ba76633c29..b619d6b8ca43 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -43,166 +43,267 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_rw.h" +#include "xfs_quota.h" #include <linux/kthread.h> #include <linux/freezer.h> -/* - * Sync all the inodes in the given AG according to the - * direction given by the flags. - */ -STATIC int -xfs_sync_inodes_ag( - xfs_mount_t *mp, - int ag, - int flags) -{ - xfs_perag_t *pag = &mp->m_perag[ag]; - int nr_found; - uint32_t first_index = 0; - int error = 0; - int last_error = 0; - do { - struct inode *inode; - xfs_inode_t *ip = NULL; - int lock_flags = XFS_ILOCK_SHARED; +STATIC xfs_inode_t * +xfs_inode_ag_lookup( + struct xfs_mount *mp, + struct xfs_perag *pag, + uint32_t *first_index, + int tag) +{ + int nr_found; + struct xfs_inode *ip; - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + if (tag == XFS_ICI_NO_TAG) { nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void**)&ip, first_index, 1); + (void **)&ip, *first_index, 1); + } else { + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void **)&ip, *first_index, 1, tag); + } + if (!nr_found) + goto unlock; - if (!nr_found) { - read_unlock(&pag->pag_ici_lock); - break; - } + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + goto unlock; - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { - read_unlock(&pag->pag_ici_lock); - break; - } + return ip; - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(mp)) { - read_unlock(&pag->pag_ici_lock); - return 0; - } +unlock: + read_unlock(&pag->pag_ici_lock); + return NULL; +} - /* - * If we can't get a reference on the inode, it must be - * in reclaim. Leave it for the reclaim code to flush. - */ - inode = VFS_I(ip); - if (!igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - read_unlock(&pag->pag_ici_lock); +STATIC int +xfs_inode_ag_walk( + struct xfs_mount *mp, + xfs_agnumber_t ag, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags, + int tag) +{ + struct xfs_perag *pag = &mp->m_perag[ag]; + uint32_t first_index; + int last_error = 0; + int skipped; - /* avoid new or bad inodes */ - if (is_bad_inode(inode) || - xfs_iflags_test(ip, XFS_INEW)) { - IRELE(ip); - continue; - } +restart: + skipped = 0; + first_index = 0; + do { + int error = 0; + xfs_inode_t *ip; - /* - * If we have to flush data or wait for I/O completion - * we need to hold the iolock. - */ - if (flags & SYNC_DELWRI) { - if (VN_DIRTY(inode)) { - if (flags & SYNC_TRYLOCK) { - if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) - lock_flags |= XFS_IOLOCK_SHARED; - } else { - xfs_ilock(ip, XFS_IOLOCK_SHARED); - lock_flags |= XFS_IOLOCK_SHARED; - } - if (lock_flags & XFS_IOLOCK_SHARED) { - error = xfs_flush_pages(ip, 0, -1, - (flags & SYNC_WAIT) ? 0 - : XFS_B_ASYNC, - FI_NONE); - } - } - if (VN_CACHED(inode) && (flags & SYNC_IOWAIT)) - xfs_ioend_wait(ip); - } - xfs_ilock(ip, XFS_ILOCK_SHARED); - - if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { - if (flags & SYNC_WAIT) { - xfs_iflock(ip); - if (!xfs_inode_clean(ip)) - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - else - xfs_ifunlock(ip); - } else if (xfs_iflock_nowait(ip)) { - if (!xfs_inode_clean(ip)) - error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); - else - xfs_ifunlock(ip); - } - } - xfs_iput(ip, lock_flags); + ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); + if (!ip) + break; + error = execute(ip, pag, flags); + if (error == EAGAIN) { + skipped++; + continue; + } if (error) last_error = error; /* * bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) - return XFS_ERROR(error); + break; - } while (nr_found); + } while (1); + + if (skipped) { + delay(1); + goto restart; + } + xfs_put_perag(mp, pag); return last_error; } int -xfs_sync_inodes( - xfs_mount_t *mp, - int flags) +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags, + int tag) { - int error; - int last_error; - int i; - int lflags = XFS_LOG_FORCE; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - error = 0; - last_error = 0; + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + if (!mp->m_perag[ag].pag_ici_init) + continue; + error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); + if (error) { + last_error = error; + if (error == EFSCORRUPTED) + break; + } + } + return XFS_ERROR(last_error); +} + +/* must be called with pag_ici_lock held and releases it */ +int +xfs_sync_inode_valid( + struct xfs_inode *ip, + struct xfs_perag *pag) +{ + struct inode *inode = VFS_I(ip); + + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + read_unlock(&pag->pag_ici_lock); + return EFSCORRUPTED; + } + + /* + * If we can't get a reference on the inode, it must be in reclaim. + * Leave it for the reclaim code to flush. Also avoid inodes that + * haven't been fully initialised. + */ + if (!igrab(inode)) { + read_unlock(&pag->pag_ici_lock); + return ENOENT; + } + read_unlock(&pag->pag_ici_lock); + + if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); + return ENOENT; + } + + return 0; +} + +STATIC int +xfs_sync_inode_data( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + struct inode *inode = VFS_I(ip); + struct address_space *mapping = inode->i_mapping; + int error = 0; + + error = xfs_sync_inode_valid(ip, pag); + if (error) + return error; + + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + goto out_wait; + + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (flags & SYNC_TRYLOCK) + goto out_wait; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } + + error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? + 0 : XFS_B_ASYNC, FI_NONE); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + out_wait: if (flags & SYNC_WAIT) - lflags |= XFS_LOG_SYNC; + xfs_ioend_wait(ip); + IRELE(ip); + return error; +} - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - if (!mp->m_perag[i].pag_ici_init) - continue; - error = xfs_sync_inodes_ag(mp, i, flags); - if (error) - last_error = error; - if (error == EFSCORRUPTED) - break; +STATIC int +xfs_sync_inode_attr( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + int error = 0; + + error = xfs_sync_inode_valid(ip, pag); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_clean(ip)) + goto out_unlock; + if (!xfs_iflock_nowait(ip)) { + if (!(flags & SYNC_WAIT)) + goto out_unlock; + xfs_iflock(ip); } - if (flags & SYNC_DELWRI) - xfs_log_force(mp, 0, lflags); - return XFS_ERROR(last_error); + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + goto out_unlock; + } + + error = xfs_iflush(ip, (flags & SYNC_WAIT) ? + XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + IRELE(ip); + return error; +} + +/* + * Write out pagecache data for the whole filesystem. + */ +int +xfs_sync_data( + struct xfs_mount *mp, + int flags) +{ + int error; + + ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); + + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, + XFS_ICI_NO_TAG); + if (error) + return XFS_ERROR(error); + + xfs_log_force(mp, 0, + (flags & SYNC_WAIT) ? + XFS_LOG_FORCE | XFS_LOG_SYNC : + XFS_LOG_FORCE); + return 0; +} + +/* + * Write out inode metadata (attributes) for the whole filesystem. + */ +int +xfs_sync_attr( + struct xfs_mount *mp, + int flags) +{ + ASSERT((flags & ~SYNC_WAIT) == 0); + + return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, + XFS_ICI_NO_TAG); } STATIC int @@ -252,7 +353,7 @@ xfs_sync_fsdata( * If this is xfssyncd() then only sync the superblock if we can * lock it without sleeping and it is not pinned. */ - if (flags & SYNC_BDFLUSH) { + if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); @@ -316,13 +417,13 @@ xfs_quiesce_data( int error; /* push non-blocking */ - xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH); - XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + xfs_sync_data(mp, 0); + xfs_qm_sync(mp, SYNC_TRYLOCK); xfs_filestream_flush(mp); /* push and block */ - xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); - XFS_QM_DQSYNC(mp, SYNC_WAIT); + xfs_sync_data(mp, SYNC_WAIT); + xfs_qm_sync(mp, SYNC_WAIT); /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, 0); @@ -341,7 +442,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop @@ -350,7 +451,7 @@ xfs_quiesce_fs( * logged before we can write the unmount record. */ do { - xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); + xfs_sync_attr(mp, SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); @@ -433,8 +534,8 @@ xfs_flush_inodes_work( void *arg) { struct inode *inode = arg; - xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK); - xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT); + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); iput(inode); } @@ -465,10 +566,10 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ - error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); - error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); + error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); if (xfs_log_need_covered(mp)) error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); } @@ -569,7 +670,7 @@ xfs_reclaim_inode( xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); } - return 1; + return -EAGAIN; } __xfs_iflags_set(ip, XFS_IRECLAIM); spin_unlock(&ip->i_flags_lock); @@ -654,101 +755,27 @@ xfs_inode_clear_reclaim_tag( xfs_put_perag(mp, pag); } - -STATIC void -xfs_reclaim_inodes_ag( - xfs_mount_t *mp, - int ag, - int noblock, - int mode) +STATIC int +xfs_reclaim_inode_now( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) { - xfs_inode_t *ip = NULL; - xfs_perag_t *pag = &mp->m_perag[ag]; - int nr_found; - uint32_t first_index; - int skipped; - -restart: - first_index = 0; - skipped = 0; - do { - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); - nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, - (void**)&ip, first_index, 1, - XFS_ICI_RECLAIM_TAG); - - if (!nr_found) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* ignore if already under reclaim */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - - if (noblock) { - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - read_unlock(&pag->pag_ici_lock); - continue; - } - } + /* ignore if already under reclaim */ + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { read_unlock(&pag->pag_ici_lock); - - /* - * hmmm - this is an inode already in reclaim. Do - * we even bother catching it here? - */ - if (xfs_reclaim_inode(ip, noblock, mode)) - skipped++; - } while (nr_found); - - if (skipped) { - delay(1); - goto restart; + return 0; } - return; + read_unlock(&pag->pag_ici_lock); + return xfs_reclaim_inode(ip, 0, flags); } int xfs_reclaim_inodes( xfs_mount_t *mp, - int noblock, int mode) { - int i; - - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - if (!mp->m_perag[i].pag_ici_init) - continue; - xfs_reclaim_inodes_ag(mp, i, noblock, mode); - } - return 0; + return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, + XFS_ICI_RECLAIM_TAG); } - - diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 308d5bf6dfbd..2a10301c99c7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -29,17 +29,14 @@ typedef struct xfs_sync_work { struct completion *w_completion; } xfs_sync_work_t; -#define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_DELWRI 0x0002 /* look at delayed writes */ -#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ -#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ -#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ +#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ +#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync_inodes(struct xfs_mount *mp, int flags); +int xfs_sync_attr(struct xfs_mount *mp, int flags); +int xfs_sync_data(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); @@ -48,10 +45,16 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); -int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); + +int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); +int xfs_inode_ag_iterator(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), + int flags, int tag); + #endif diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index 964621fde6ed..497c7fb75cc1 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c @@ -29,67 +29,6 @@ #include <linux/xattr.h> -/* - * ACL handling. Should eventually be moved into xfs_acl.c - */ - -static int -xfs_decode_acl(const char *name) -{ - if (strcmp(name, "posix_acl_access") == 0) - return _ACL_TYPE_ACCESS; - else if (strcmp(name, "posix_acl_default") == 0) - return _ACL_TYPE_DEFAULT; - return -EINVAL; -} - -/* - * Get system extended attributes which at the moment only - * includes Posix ACLs. - */ -static int -xfs_xattr_system_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - int acl; - - acl = xfs_decode_acl(name); - if (acl < 0) - return acl; - - return xfs_acl_vget(inode, buffer, size, acl); -} - -static int -xfs_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - int acl; - - acl = xfs_decode_acl(name); - if (acl < 0) - return acl; - if (flags & XATTR_CREATE) - return -EINVAL; - - if (!value) - return xfs_acl_vremove(inode, acl); - - return xfs_acl_vset(inode, (void *)value, size, acl); -} - -static struct xattr_handler xfs_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = xfs_xattr_system_get, - .set = xfs_xattr_system_set, -}; - - -/* - * Real xattr handling. The only difference between the namespaces is - * a flag passed to the low-level attr code. - */ - static int __xfs_xattr_get(struct inode *inode, const char *name, void *value, size_t size, int xflags) @@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = { &xfs_xattr_user_handler, &xfs_xattr_trusted_handler, &xfs_xattr_security_handler, +#ifdef CONFIG_XFS_POSIX_ACL &xfs_xattr_system_handler, +#endif NULL }; @@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) /* * Then add the two synthetic ACL attributes. */ - if (xfs_acl_vhasacl_access(inode)) { + if (posix_acl_access_exists(inode)) { error = list_one_attr(POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS) + 1, data, size, &context.count); @@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) return error; } - if (xfs_acl_vhasacl_default(inode)) { + if (posix_acl_default_exists(inode)) { error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, strlen(POSIX_ACL_XATTR_DEFAULT) + 1, data, size, &context.count); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index e4babcc63423..2f3f2229eaaf 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -1194,7 +1193,9 @@ void xfs_qm_dqrele( xfs_dquot_t *dqp) { - ASSERT(dqp); + if (!dqp) + return; + xfs_dqtrace_entry(dqp, "DQRELE"); xfs_dqlock(dqp); diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index de0f402ddb4c..6533ead9b889 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -181,7 +181,6 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, xfs_dqid_t, uint, uint, xfs_dquot_t **); extern void xfs_qm_dqput(xfs_dquot_t *); -extern void xfs_qm_dqrele(xfs_dquot_t *); extern void xfs_dqlock(xfs_dquot_t *); extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); extern void xfs_dqunlock(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 1728f6a7c4f5..d0d4a9a0bbd7 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5b6695049e00..45b1bfef7388 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -287,11 +286,13 @@ xfs_qm_rele_quotafs_ref( * Just destroy the quotainfo structure. */ void -xfs_qm_unmount_quotadestroy( - xfs_mount_t *mp) +xfs_qm_unmount( + struct xfs_mount *mp) { - if (mp->m_quotainfo) + if (mp->m_quotainfo) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); xfs_qm_destroy_quotainfo(mp); + } } @@ -385,8 +386,13 @@ xfs_qm_mount_quotas( if (error) { xfs_fs_cmn_err(CE_WARN, mp, "Failed to initialize disk quotas."); + return; } - return; + +#ifdef QUOTADEBUG + if (XFS_IS_QUOTA_ON(mp)) + xfs_qm_internalqcheck(mp); +#endif } /* @@ -774,12 +780,11 @@ xfs_qm_dqattach_grouphint( * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON * into account. * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. - * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. * Inode may get unlocked and relocked in here, and the caller must deal with * the consequences. */ int -xfs_qm_dqattach( +xfs_qm_dqattach_locked( xfs_inode_t *ip, uint flags) { @@ -787,17 +792,14 @@ xfs_qm_dqattach( uint nquotas = 0; int error = 0; - if ((! XFS_IS_QUOTA_ON(mp)) || - (! XFS_NOT_DQATTACHED(mp, ip)) || - (ip->i_ino == mp->m_sb.sb_uquotino) || - (ip->i_ino == mp->m_sb.sb_gquotino)) + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || + !XFS_NOT_DQATTACHED(mp, ip) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) return 0; - ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || - xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (! (flags & XFS_QMOPT_ILOCKED)) - xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (XFS_IS_UQUOTA_ON(mp)) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, @@ -849,8 +851,7 @@ xfs_qm_dqattach( xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); } - done: - + done: #ifdef QUOTADEBUG if (! error) { if (XFS_IS_UQUOTA_ON(mp)) @@ -858,15 +859,22 @@ xfs_qm_dqattach( if (XFS_IS_OQUOTA_ON(mp)) ASSERT(ip->i_gdquot); } + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); #endif + return error; +} - if (! (flags & XFS_QMOPT_ILOCKED)) - xfs_iunlock(ip, XFS_ILOCK_EXCL); +int +xfs_qm_dqattach( + struct xfs_inode *ip, + uint flags) +{ + int error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_qm_dqattach_locked(ip, flags); + xfs_iunlock(ip, XFS_ILOCK_EXCL); -#ifdef QUOTADEBUG - else - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif return error; } @@ -896,11 +904,6 @@ xfs_qm_dqdetach( } } -/* - * This is called to sync quotas. We can be told to use non-blocking - * semantics by either the SYNC_BDFLUSH flag or the absence of the - * SYNC_WAIT flag. - */ int xfs_qm_sync( xfs_mount_t *mp, @@ -909,17 +912,13 @@ xfs_qm_sync( int recl, restarts; xfs_dquot_t *dqp; uint flush_flags; - boolean_t nowait; int error; - if (! XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; + flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI; restarts = 0; - /* - * We won't block unless we are asked to. - */ - nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0); again: xfs_qm_mplist_lock(mp); @@ -939,18 +938,10 @@ xfs_qm_sync( * don't 'seem' to be dirty. ie. don't acquire dqlock. * This is very similar to what xfs_sync does with inodes. */ - if (flags & SYNC_BDFLUSH) { - if (! XFS_DQ_IS_DIRTY(dqp)) + if (flags & SYNC_TRYLOCK) { + if (!XFS_DQ_IS_DIRTY(dqp)) continue; - } - - if (nowait) { - /* - * Try to acquire the dquot lock. We are NOT out of - * lock order, but we just don't want to wait for this - * lock, unless somebody wanted us to. - */ - if (! xfs_qm_dqlock_nowait(dqp)) + if (!xfs_qm_dqlock_nowait(dqp)) continue; } else { xfs_dqlock(dqp); @@ -967,7 +958,7 @@ xfs_qm_sync( /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); if (!xfs_dqflock_nowait(dqp)) { - if (nowait) { + if (flags & SYNC_TRYLOCK) { xfs_dqunlock(dqp); continue; } @@ -985,7 +976,6 @@ xfs_qm_sync( * Let go of the mplist lock. We don't want to hold it * across a disk write */ - flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC; xfs_qm_mplist_unlock(mp); xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH"); error = xfs_qm_dqflush(dqp, flush_flags); @@ -2319,20 +2309,20 @@ xfs_qm_write_sb_changes( */ int xfs_qm_vop_dqalloc( - xfs_mount_t *mp, - xfs_inode_t *ip, - uid_t uid, - gid_t gid, - prid_t prid, - uint flags, - xfs_dquot_t **O_udqpp, - xfs_dquot_t **O_gdqpp) + struct xfs_inode *ip, + uid_t uid, + gid_t gid, + prid_t prid, + uint flags, + struct xfs_dquot **O_udqpp, + struct xfs_dquot **O_gdqpp) { - int error; - xfs_dquot_t *uq, *gq; - uint lockflags; + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *uq, *gq; + int error; + uint lockflags; - if (!XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; lockflags = XFS_ILOCK_EXCL; @@ -2346,8 +2336,8 @@ xfs_qm_vop_dqalloc( * if necessary. The dquot(s) will not be locked. */ if (XFS_NOT_DQATTACHED(mp, ip)) { - if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC | - XFS_QMOPT_ILOCKED))) { + error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); + if (error) { xfs_iunlock(ip, lockflags); return error; } @@ -2469,6 +2459,7 @@ xfs_qm_vop_chown( uint bfield = XFS_IS_REALTIME_INODE(ip) ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); @@ -2508,13 +2499,13 @@ xfs_qm_vop_chown_reserve( xfs_dquot_t *gdqp, uint flags) { - int error; - xfs_mount_t *mp; + xfs_mount_t *mp = ip->i_mount; uint delblks, blkflags, prjflags = 0; xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; + int error; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - mp = ip->i_mount; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); delblks = ip->i_delayed_blks; @@ -2582,28 +2573,23 @@ xfs_qm_vop_chown_reserve( int xfs_qm_vop_rename_dqattach( - xfs_inode_t **i_tab) + struct xfs_inode **i_tab) { - xfs_inode_t *ip; - int i; - int error; + struct xfs_mount *mp = i_tab[0]->i_mount; + int i; - ip = i_tab[0]; - - if (! XFS_IS_QUOTA_ON(ip->i_mount)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; - } - for (i = 1; (i < 4 && i_tab[i]); i++) { + for (i = 0; (i < 4 && i_tab[i]); i++) { + struct xfs_inode *ip = i_tab[i]; + int error; + /* * Watch out for duplicate entries in the table. */ - if ((ip = i_tab[i]) != i_tab[i-1]) { - if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { + if (i == 0 || ip != i_tab[i-1]) { + if (XFS_NOT_DQATTACHED(mp, ip)) { error = xfs_qm_dqattach(ip, 0); if (error) return error; @@ -2614,17 +2600,19 @@ xfs_qm_vop_rename_dqattach( } void -xfs_qm_vop_dqattach_and_dqmod_newinode( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp) +xfs_qm_vop_create_dqattach( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp) { - if (!XFS_IS_QUOTA_ON(tp->t_mountp)) + struct xfs_mount *mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); if (udqp) { xfs_dqlock(udqp); @@ -2632,7 +2620,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode( xfs_dqunlock(udqp); ASSERT(ip->i_udquot == NULL); ip->i_udquot = udqp; - ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp)); + ASSERT(XFS_IS_UQUOTA_ON(mp)); ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } @@ -2642,8 +2630,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode( xfs_dqunlock(gdqp); ASSERT(ip->i_gdquot == NULL); ip->i_gdquot = gdqp; - ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp)); - ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ? + ASSERT(XFS_IS_OQUOTA_ON(mp)); + ASSERT((XFS_IS_GQUOTA_ON(mp) ? ip->i_d.di_gid : ip->i_d.di_projid) == be32_to_cpu(gdqp->q_core.d_id)); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index a371954cae1b..495564b8af38 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -127,8 +127,6 @@ typedef struct xfs_quotainfo { } xfs_quotainfo_t; -extern xfs_dqtrxops_t xfs_trans_dquot_ops; - extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, xfs_dquot_t *, xfs_dquot_t *, long, long, uint); @@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); -extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); -extern void xfs_qm_unmount_quotas(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); -extern int xfs_qm_sync(xfs_mount_t *, int); /* dquot stuff */ extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); -extern int xfs_qm_dqattach(xfs_inode_t *, uint); -extern void xfs_qm_dqdetach(xfs_inode_t *); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); @@ -183,19 +175,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); -/* vop stuff */ -extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, - uid_t, gid_t, prid_t, uint, - xfs_dquot_t **, xfs_dquot_t **); -extern void xfs_qm_vop_dqattach_and_dqmod_newinode( - xfs_trans_t *, xfs_inode_t *, - xfs_dquot_t *, xfs_dquot_t *); -extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **); -extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *, - xfs_dquot_t **, xfs_dquot_t *); -extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, - xfs_dquot_t *, xfs_dquot_t *, uint); - /* list stuff */ extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_unlink(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 63037c689a4b..a5346630dfae 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -42,7 +42,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" @@ -84,7 +83,7 @@ xfs_fill_statvfs_from_dquot( * return a statvfs of the project, not the entire filesystem. * This makes such trees appear as if they are filesystems in themselves. */ -STATIC void +void xfs_qm_statvfs( xfs_inode_t *ip, struct kstatfs *statp) @@ -92,20 +91,13 @@ xfs_qm_statvfs( xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; - if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || - !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - return; - if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { - xfs_disk_dquot_t *dp = &dqp->q_core; - - xfs_fill_statvfs_from_dquot(statp, dp); + xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); xfs_qm_dqput(dqp); } } -STATIC int +int xfs_qm_newmount( xfs_mount_t *mp, uint *needquotamount, @@ -114,9 +106,6 @@ xfs_qm_newmount( uint quotaondisk; uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; - *quotaflags = 0; - *needquotamount = B_FALSE; - quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); @@ -179,66 +168,6 @@ xfs_qm_newmount( return 0; } -STATIC int -xfs_qm_endmount( - xfs_mount_t *mp, - uint needquotamount, - uint quotaflags) -{ - if (needquotamount) { - ASSERT(mp->m_qflags == 0); - mp->m_qflags = quotaflags; - xfs_qm_mount_quotas(mp); - } - -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - if (! (XFS_IS_QUOTA_ON(mp))) - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); - else - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); -#endif - -#ifdef QUOTADEBUG - if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp)) - cmn_err(CE_WARN, "XFS: mount internalqcheck failed"); -#endif - - return 0; -} - -STATIC void -xfs_qm_dqrele_null( - xfs_dquot_t *dq) -{ - /* - * Called from XFS, where we always check first for a NULL dquot. - */ - if (!dq) - return; - xfs_qm_dqrele(dq); -} - - -struct xfs_qmops xfs_qmcore_xfs = { - .xfs_qminit = xfs_qm_newmount, - .xfs_qmdone = xfs_qm_unmount_quotadestroy, - .xfs_qmmount = xfs_qm_endmount, - .xfs_qmunmount = xfs_qm_unmount_quotas, - .xfs_dqrele = xfs_qm_dqrele_null, - .xfs_dqattach = xfs_qm_dqattach, - .xfs_dqdetach = xfs_qm_dqdetach, - .xfs_dqpurgeall = xfs_qm_dqpurge_all, - .xfs_dqvopalloc = xfs_qm_vop_dqalloc, - .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode, - .xfs_dqvoprename = xfs_qm_vop_rename_dqattach, - .xfs_dqvopchown = xfs_qm_vop_chown, - .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, - .xfs_dqstatvfs = xfs_qm_statvfs, - .xfs_dqsync = xfs_qm_sync, - .xfs_dqtrxops = &xfs_trans_dquot_ops, -}; -EXPORT_SYMBOL(xfs_qmcore_xfs); - void __init xfs_qm_init(void) { diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 709f5f545cf5..21b08c0396a1 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -42,7 +42,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c7b66f6506ce..4e4276b956e8 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -45,7 +45,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -847,105 +846,55 @@ xfs_qm_export_flags( } -/* - * Release all the dquots on the inodes in an AG. - */ -STATIC void -xfs_qm_dqrele_inodes_ag( - xfs_mount_t *mp, - int ag, - uint flags) +STATIC int +xfs_dqrele_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) { - xfs_inode_t *ip = NULL; - xfs_perag_t *pag = &mp->m_perag[ag]; - int first_index = 0; - int nr_found; - - do { - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void**)&ip, first_index, 1); - - if (!nr_found) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* skip quota inodes */ - if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); - read_unlock(&pag->pag_ici_lock); - continue; - } + int error; - /* - * If we can't get a reference on the inode, it must be - * in reclaim. Leave it for the reclaim code to flush. - */ - if (!igrab(VFS_I(ip))) { - read_unlock(&pag->pag_ici_lock); - continue; - } + /* skip quota inodes */ + if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); read_unlock(&pag->pag_ici_lock); + return 0; + } - /* avoid new inodes though we shouldn't find any here */ - if (xfs_iflags_test(ip, XFS_INEW)) { - IRELE(ip); - continue; - } + error = xfs_sync_inode_valid(ip, pag); + if (error) + return error; - xfs_ilock(ip, XFS_ILOCK_EXCL); - if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && - ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } - xfs_iput(ip, XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL); + if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } + xfs_iput(ip, XFS_ILOCK_EXCL); + IRELE(ip); - } while (nr_found); + return 0; } + /* * Go thru all the inodes in the file system, releasing their dquots. + * * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. This also gets called from - * xfs_rootumount. + * AFTER this, in the case of quotaoff. */ void xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { - int i; - ASSERT(mp->m_quotainfo); - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - if (!mp->m_perag[i].pag_ici_init) - continue; - xfs_qm_dqrele_inodes_ag(mp, i, flags); - } + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); } /*------------------------------------------------------------------------*/ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 447173bcf96d..97ac9640be98 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -42,7 +42,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -111,7 +110,7 @@ xfs_trans_log_dquot( * Carry forward whatever is left of the quota blk reservation to * the spanky new transaction */ -STATIC void +void xfs_trans_dup_dqinfo( xfs_trans_t *otp, xfs_trans_t *ntp) @@ -167,19 +166,17 @@ xfs_trans_dup_dqinfo( /* * Wrap around mod_dquot to account for both user and group quotas. */ -STATIC void +void xfs_trans_mod_dquot_byino( xfs_trans_t *tp, xfs_inode_t *ip, uint field, long delta) { - xfs_mount_t *mp; - - ASSERT(tp); - mp = tp->t_mountp; + xfs_mount_t *mp = tp->t_mountp; - if (!XFS_IS_QUOTA_ON(mp) || + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || ip->i_ino == mp->m_sb.sb_uquotino || ip->i_ino == mp->m_sb.sb_gquotino) return; @@ -229,6 +226,7 @@ xfs_trans_mod_dquot( xfs_dqtrx_t *qtrx; ASSERT(tp); + ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); qtrx = NULL; if (tp->t_dqinfo == NULL) @@ -346,7 +344,7 @@ xfs_trans_dqlockedjoin( * Unreserve just the reservations done by this transaction. * dquot is still left locked at exit. */ -STATIC void +void xfs_trans_apply_dquot_deltas( xfs_trans_t *tp) { @@ -357,7 +355,7 @@ xfs_trans_apply_dquot_deltas( long totalbdelta; long totalrtbdelta; - if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) return; ASSERT(tp->t_dqinfo); @@ -531,7 +529,7 @@ xfs_trans_apply_dquot_deltas( * we simply throw those away, since that's the expected behavior * when a transaction is curtailed without a commit. */ -STATIC void +void xfs_trans_unreserve_and_mod_dquots( xfs_trans_t *tp) { @@ -768,7 +766,7 @@ xfs_trans_reserve_quota_bydquots( { int resvd = 0, error; - if (!XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; if (tp && tp->t_dqinfo == NULL) @@ -811,18 +809,17 @@ xfs_trans_reserve_quota_bydquots( * This doesn't change the actual usage, just the reservation. * The inode sent in is locked. */ -STATIC int +int xfs_trans_reserve_quota_nblks( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_inode_t *ip, - long nblks, - long ninos, - uint flags) + struct xfs_trans *tp, + struct xfs_inode *ip, + long nblks, + long ninos, + uint flags) { - int error; + struct xfs_mount *mp = ip->i_mount; - if (!XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; if (XFS_IS_PQUOTA_ON(mp)) flags |= XFS_QMOPT_ENOSPC; @@ -831,7 +828,6 @@ xfs_trans_reserve_quota_nblks( ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == XFS_TRANS_DQ_RES_RTBLKS || (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == @@ -840,11 +836,9 @@ xfs_trans_reserve_quota_nblks( /* * Reserve nblks against these dquots, with trans as the mediator. */ - error = xfs_trans_reserve_quota_bydquots(tp, mp, - ip->i_udquot, ip->i_gdquot, - nblks, ninos, - flags); - return error; + return xfs_trans_reserve_quota_bydquots(tp, mp, + ip->i_udquot, ip->i_gdquot, + nblks, ninos, flags); } /* @@ -895,25 +889,15 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); + tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); } -STATIC void +void xfs_trans_free_dqinfo( xfs_trans_t *tp) { if (!tp->t_dqinfo) return; - kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo); - (tp)->t_dqinfo = NULL; + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); + tp->t_dqinfo = NULL; } - -xfs_dqtrxops_t xfs_trans_dquot_ops = { - .qo_dup_dqinfo = xfs_trans_dup_dqinfo, - .qo_free_dqinfo = xfs_trans_free_dqinfo, - .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino, - .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas, - .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks, - .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots, - .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots, -}; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c deleted file mode 100644 index a8cdd73999a4..000000000000 --- a/fs/xfs/xfs_acl.c +++ /dev/null @@ -1,874 +0,0 @@ -/* - * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_vnodeops.h" - -#include <linux/capability.h> -#include <linux/posix_acl_xattr.h> - -STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *); -STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_endian(xfs_acl_t *); -STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); -STATIC int xfs_acl_invalid(xfs_acl_t *); -STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); -STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); -STATIC int xfs_acl_allow_set(struct inode *, int); - -kmem_zone_t *xfs_acl_zone; - - -/* - * Test for existence of access ACL attribute as efficiently as possible. - */ -int -xfs_acl_vhasacl_access( - struct inode *vp) -{ - int error; - - xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error); - return (error == 0); -} - -/* - * Test for existence of default ACL attribute as efficiently as possible. - */ -int -xfs_acl_vhasacl_default( - struct inode *vp) -{ - int error; - - if (!S_ISDIR(vp->i_mode)) - return 0; - xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); - return (error == 0); -} - -/* - * Convert from extended attribute representation to in-memory for XFS. - */ -STATIC int -posix_acl_xattr_to_xfs( - posix_acl_xattr_header *src, - size_t size, - xfs_acl_t *dest) -{ - posix_acl_xattr_entry *src_entry; - xfs_acl_entry_t *dest_entry; - int n; - - if (!src || !dest) - return EINVAL; - - if (size < sizeof(posix_acl_xattr_header)) - return EINVAL; - - if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) - return EOPNOTSUPP; - - memset(dest, 0, sizeof(xfs_acl_t)); - dest->acl_cnt = posix_acl_xattr_count(size); - if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES) - return EINVAL; - - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - if (!dest->acl_cnt) - return 0; - - src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src)); - dest_entry = &dest->acl_entry[0]; - - for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) { - dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm); - if (_ACL_PERM_INVALID(dest_entry->ae_perm)) - return EINVAL; - dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag); - switch(dest_entry->ae_tag) { - case ACL_USER: - case ACL_GROUP: - dest_entry->ae_id = le32_to_cpu(src_entry->e_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - dest_entry->ae_id = ACL_UNDEFINED_ID; - break; - default: - return EINVAL; - } - } - if (xfs_acl_invalid(dest)) - return EINVAL; - - return 0; -} - -/* - * Comparison function called from xfs_sort(). - * Primary key is ae_tag, secondary key is ae_id. - */ -STATIC int -xfs_acl_entry_compare( - const void *va, - const void *vb) -{ - xfs_acl_entry_t *a = (xfs_acl_entry_t *)va, - *b = (xfs_acl_entry_t *)vb; - - if (a->ae_tag == b->ae_tag) - return (a->ae_id - b->ae_id); - return (a->ae_tag - b->ae_tag); -} - -/* - * Convert from in-memory XFS to extended attribute representation. - */ -STATIC int -posix_acl_xfs_to_xattr( - xfs_acl_t *src, - posix_acl_xattr_header *dest, - size_t size) -{ - int n; - size_t new_size = posix_acl_xattr_size(src->acl_cnt); - posix_acl_xattr_entry *dest_entry; - xfs_acl_entry_t *src_entry; - - if (size < new_size) - return -ERANGE; - - /* Need to sort src XFS ACL by <ae_tag,ae_id> */ - xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]), - xfs_acl_entry_compare); - - dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); - dest_entry = &dest->a_entries[0]; - src_entry = &src->acl_entry[0]; - for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) { - dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm); - if (_ACL_PERM_INVALID(src_entry->ae_perm)) - return -EINVAL; - dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag); - switch (src_entry->ae_tag) { - case ACL_USER: - case ACL_GROUP: - dest_entry->e_id = cpu_to_le32(src_entry->ae_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); - break; - default: - return -EINVAL; - } - } - return new_size; -} - -int -xfs_acl_vget( - struct inode *vp, - void *acl, - size_t size, - int kind) -{ - int error; - xfs_acl_t *xfs_acl = NULL; - posix_acl_xattr_header *ext_acl = acl; - int flags = 0; - - if(size) { - if (!(_ACL_ALLOC(xfs_acl))) { - error = ENOMEM; - goto out; - } - memset(xfs_acl, 0, sizeof(xfs_acl_t)); - } else - flags = ATTR_KERNOVAL; - - xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error); - if (error) - goto out; - - if (!size) { - error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES); - } else { - if (xfs_acl_invalid(xfs_acl)) { - error = EINVAL; - goto out; - } - if (kind == _ACL_TYPE_ACCESS) - xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl); - error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); - } -out: - if(xfs_acl) - _ACL_FREE(xfs_acl); - return -error; -} - -int -xfs_acl_vremove( - struct inode *vp, - int kind) -{ - int error; - - error = xfs_acl_allow_set(vp, kind); - if (!error) { - error = xfs_attr_remove(XFS_I(vp), - kind == _ACL_TYPE_DEFAULT? - SGI_ACL_DEFAULT: SGI_ACL_FILE, - ATTR_ROOT); - if (error == ENOATTR) - error = 0; /* 'scool */ - } - return -error; -} - -int -xfs_acl_vset( - struct inode *vp, - void *acl, - size_t size, - int kind) -{ - posix_acl_xattr_header *ext_acl = acl; - xfs_acl_t *xfs_acl; - int error; - int basicperms = 0; /* more than std unix perms? */ - - if (!acl) - return -EINVAL; - - if (!(_ACL_ALLOC(xfs_acl))) - return -ENOMEM; - - error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl); - if (error) { - _ACL_FREE(xfs_acl); - return -error; - } - if (!xfs_acl->acl_cnt) { - _ACL_FREE(xfs_acl); - return 0; - } - - error = xfs_acl_allow_set(vp, kind); - - /* Incoming ACL exists, set file mode based on its value */ - if (!error && kind == _ACL_TYPE_ACCESS) - error = xfs_acl_setmode(vp, xfs_acl, &basicperms); - - if (error) - goto out; - - /* - * If we have more than std unix permissions, set up the actual attr. - * Otherwise, delete any existing attr. This prevents us from - * having actual attrs for permissions that can be stored in the - * standard permission bits. - */ - if (!basicperms) { - xfs_acl_set_attr(vp, xfs_acl, kind, &error); - } else { - error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); - } - -out: - _ACL_FREE(xfs_acl); - return -error; -} - -int -xfs_acl_iaccess( - xfs_inode_t *ip, - mode_t mode, - cred_t *cr) -{ - xfs_acl_t *acl; - int rval; - struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE}; - - if (!(_ACL_ALLOC(acl))) - return -1; - - /* If the file has no ACL return -1. */ - rval = sizeof(xfs_acl_t); - if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) { - _ACL_FREE(acl); - return -1; - } - xfs_acl_get_endian(acl); - - /* If the file has an empty ACL return -1. */ - if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) { - _ACL_FREE(acl); - return -1; - } - - /* Synchronize ACL with mode bits */ - xfs_acl_sync_mode(ip->i_d.di_mode, acl); - - rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr); - _ACL_FREE(acl); - return rval; -} - -STATIC int -xfs_acl_allow_set( - struct inode *vp, - int kind) -{ - if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) - return EPERM; - if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode)) - return ENOTDIR; - if (vp->i_sb->s_flags & MS_RDONLY) - return EROFS; - if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER)) - return EPERM; - return 0; -} - -/* - * Note: cr is only used here for the capability check if the ACL test fails. - * It is not used to find out the credentials uid or groups etc, as was - * done in IRIX. It is assumed that the uid and groups for the current - * thread are taken from "current" instead of the cr parameter. - */ -STATIC int -xfs_acl_access( - uid_t fuid, - gid_t fgid, - xfs_acl_t *fap, - mode_t md, - cred_t *cr) -{ - xfs_acl_entry_t matched; - int i, allows; - int maskallows = -1; /* true, but not 1, either */ - int seen_userobj = 0; - - matched.ae_tag = 0; /* Invalid type */ - matched.ae_perm = 0; - - for (i = 0; i < fap->acl_cnt; i++) { - /* - * Break out if we've got a user_obj entry or - * a user entry and the mask (and have processed USER_OBJ) - */ - if (matched.ae_tag == ACL_USER_OBJ) - break; - if (matched.ae_tag == ACL_USER) { - if (maskallows != -1 && seen_userobj) - break; - if (fap->acl_entry[i].ae_tag != ACL_MASK && - fap->acl_entry[i].ae_tag != ACL_USER_OBJ) - continue; - } - /* True if this entry allows the requested access */ - allows = ((fap->acl_entry[i].ae_perm & md) == md); - - switch (fap->acl_entry[i].ae_tag) { - case ACL_USER_OBJ: - seen_userobj = 1; - if (fuid != current_fsuid()) - continue; - matched.ae_tag = ACL_USER_OBJ; - matched.ae_perm = allows; - break; - case ACL_USER: - if (fap->acl_entry[i].ae_id != current_fsuid()) - continue; - matched.ae_tag = ACL_USER; - matched.ae_perm = allows; - break; - case ACL_GROUP_OBJ: - if ((matched.ae_tag == ACL_GROUP_OBJ || - matched.ae_tag == ACL_GROUP) && !allows) - continue; - if (!in_group_p(fgid)) - continue; - matched.ae_tag = ACL_GROUP_OBJ; - matched.ae_perm = allows; - break; - case ACL_GROUP: - if ((matched.ae_tag == ACL_GROUP_OBJ || - matched.ae_tag == ACL_GROUP) && !allows) - continue; - if (!in_group_p(fap->acl_entry[i].ae_id)) - continue; - matched.ae_tag = ACL_GROUP; - matched.ae_perm = allows; - break; - case ACL_MASK: - maskallows = allows; - break; - case ACL_OTHER: - if (matched.ae_tag != 0) - continue; - matched.ae_tag = ACL_OTHER; - matched.ae_perm = allows; - break; - } - } - /* - * First possibility is that no matched entry allows access. - * The capability to override DAC may exist, so check for it. - */ - switch (matched.ae_tag) { - case ACL_OTHER: - case ACL_USER_OBJ: - if (matched.ae_perm) - return 0; - break; - case ACL_USER: - case ACL_GROUP_OBJ: - case ACL_GROUP: - if (maskallows && matched.ae_perm) - return 0; - break; - case 0: - break; - } - - /* EACCES tells generic_permission to check for capability overrides */ - return EACCES; -} - -/* - * ACL validity checker. - * This acl validation routine checks each ACL entry read in makes sense. - */ -STATIC int -xfs_acl_invalid( - xfs_acl_t *aclp) -{ - xfs_acl_entry_t *entry, *e; - int user = 0, group = 0, other = 0, mask = 0; - int mask_required = 0; - int i, j; - - if (!aclp) - goto acl_invalid; - - if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES) - goto acl_invalid; - - for (i = 0; i < aclp->acl_cnt; i++) { - entry = &aclp->acl_entry[i]; - switch (entry->ae_tag) { - case ACL_USER_OBJ: - if (user++) - goto acl_invalid; - break; - case ACL_GROUP_OBJ: - if (group++) - goto acl_invalid; - break; - case ACL_OTHER: - if (other++) - goto acl_invalid; - break; - case ACL_USER: - case ACL_GROUP: - for (j = i + 1; j < aclp->acl_cnt; j++) { - e = &aclp->acl_entry[j]; - if (e->ae_id == entry->ae_id && - e->ae_tag == entry->ae_tag) - goto acl_invalid; - } - mask_required++; - break; - case ACL_MASK: - if (mask++) - goto acl_invalid; - break; - default: - goto acl_invalid; - } - } - if (!user || !group || !other || (mask_required && !mask)) - goto acl_invalid; - else - return 0; -acl_invalid: - return EINVAL; -} - -/* - * Do ACL endian conversion. - */ -STATIC void -xfs_acl_get_endian( - xfs_acl_t *aclp) -{ - xfs_acl_entry_t *ace, *end; - - INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - end = &aclp->acl_entry[0]+aclp->acl_cnt; - for (ace = &aclp->acl_entry[0]; ace < end; ace++) { - INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag); - INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id); - INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm); - } -} - -/* - * Get the ACL from the EA and do endian conversion. - */ -STATIC void -xfs_acl_get_attr( - struct inode *vp, - xfs_acl_t *aclp, - int kind, - int flags, - int *error) -{ - int len = sizeof(xfs_acl_t); - - ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); - flags |= ATTR_ROOT; - *error = xfs_attr_get(XFS_I(vp), - kind == _ACL_TYPE_ACCESS ? - SGI_ACL_FILE : SGI_ACL_DEFAULT, - (char *)aclp, &len, flags); - if (*error || (flags & ATTR_KERNOVAL)) - return; - xfs_acl_get_endian(aclp); -} - -/* - * Set the EA with the ACL and do endian conversion. - */ -STATIC void -xfs_acl_set_attr( - struct inode *vp, - xfs_acl_t *aclp, - int kind, - int *error) -{ - xfs_acl_entry_t *ace, *newace, *end; - xfs_acl_t *newacl; - int len; - - if (!(_ACL_ALLOC(newacl))) { - *error = ENOMEM; - return; - } - - len = sizeof(xfs_acl_t) - - (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt)); - end = &aclp->acl_entry[0]+aclp->acl_cnt; - for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0]; - ace < end; - ace++, newace++) { - INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag); - INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id); - INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); - } - INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - *error = xfs_attr_set(XFS_I(vp), - kind == _ACL_TYPE_ACCESS ? - SGI_ACL_FILE: SGI_ACL_DEFAULT, - (char *)newacl, len, ATTR_ROOT); - _ACL_FREE(newacl); -} - -int -xfs_acl_vtoacl( - struct inode *vp, - xfs_acl_t *access_acl, - xfs_acl_t *default_acl) -{ - int error = 0; - - if (access_acl) { - /* - * Get the Access ACL and the mode. If either cannot - * be obtained for some reason, invalidate the access ACL. - */ - xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); - if (error) - access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; - else /* We have a good ACL and the file mode, synchronize. */ - xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl); - } - - if (default_acl) { - xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error); - if (error) - default_acl->acl_cnt = XFS_ACL_NOT_PRESENT; - } - return error; -} - -/* - * This function retrieves the parent directory's acl, processes it - * and lets the child inherit the acl(s) that it should. - */ -int -xfs_acl_inherit( - struct inode *vp, - mode_t mode, - xfs_acl_t *pdaclp) -{ - xfs_acl_t *cacl; - int error = 0; - int basicperms = 0; - - /* - * If the parent does not have a default ACL, or it's an - * invalid ACL, we're done. - */ - if (!vp) - return 0; - if (!pdaclp || xfs_acl_invalid(pdaclp)) - return 0; - - /* - * Copy the default ACL of the containing directory to - * the access ACL of the new file and use the mode that - * was passed in to set up the correct initial values for - * the u::,g::[m::], and o:: entries. This is what makes - * umask() "work" with ACL's. - */ - - if (!(_ACL_ALLOC(cacl))) - return ENOMEM; - - memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); - xfs_acl_filter_mode(mode, cacl); - error = xfs_acl_setmode(vp, cacl, &basicperms); - if (error) - goto out_error; - - /* - * Set the Default and Access ACL on the file. The mode is already - * set on the file, so we don't need to worry about that. - * - * If the new file is a directory, its default ACL is a copy of - * the containing directory's default ACL. - */ - if (S_ISDIR(vp->i_mode)) - xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); - if (!error && !basicperms) - xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); -out_error: - _ACL_FREE(cacl); - return error; -} - -/* - * Set up the correct mode on the file based on the supplied ACL. This - * makes sure that the mode on the file reflects the state of the - * u::,g::[m::], and o:: entries in the ACL. Since the mode is where - * the ACL is going to get the permissions for these entries, we must - * synchronize the mode whenever we set the ACL on a file. - */ -STATIC int -xfs_acl_setmode( - struct inode *vp, - xfs_acl_t *acl, - int *basicperms) -{ - struct iattr iattr; - xfs_acl_entry_t *ap; - xfs_acl_entry_t *gap = NULL; - int i, nomask = 1; - - *basicperms = 1; - - if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) - return 0; - - /* - * Copy the u::, g::, o::, and m:: bits from the ACL into the - * mode. The m:: bits take precedence over the g:: bits. - */ - iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = XFS_I(vp)->i_d.di_mode; - iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); - ap = acl->acl_entry; - for (i = 0; i < acl->acl_cnt; ++i) { - switch (ap->ae_tag) { - case ACL_USER_OBJ: - iattr.ia_mode |= ap->ae_perm << 6; - break; - case ACL_GROUP_OBJ: - gap = ap; - break; - case ACL_MASK: /* more than just standard modes */ - nomask = 0; - iattr.ia_mode |= ap->ae_perm << 3; - *basicperms = 0; - break; - case ACL_OTHER: - iattr.ia_mode |= ap->ae_perm; - break; - default: /* more than just standard modes */ - *basicperms = 0; - break; - } - ap++; - } - - /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ - if (gap && nomask) - iattr.ia_mode |= gap->ae_perm << 3; - - return xfs_setattr(XFS_I(vp), &iattr, 0); -} - -/* - * The permissions for the special ACL entries (u::, g::[m::], o::) are - * actually stored in the file mode (if there is both a group and a mask, - * the group is stored in the ACL entry and the mask is stored on the file). - * This allows the mode to remain automatically in sync with the ACL without - * the need for a call-back to the ACL system at every point where the mode - * could change. This function takes the permissions from the specified mode - * and places it in the supplied ACL. - * - * This implementation draws its validity from the fact that, when the ACL - * was assigned, the mode was copied from the ACL. - * If the mode did not change, therefore, the mode remains exactly what was - * taken from the special ACL entries at assignment. - * If a subsequent chmod() was done, the POSIX spec says that the change in - * mode must cause an update to the ACL seen at user level and used for - * access checks. Before and after a mode change, therefore, the file mode - * most accurately reflects what the special ACL entries should permit/deny. - * - * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly, - * the existing mode bits will override whatever is in the - * ACL. Similarly, if there is a pre-existing ACL that was - * never in sync with its mode (owing to a bug in 6.5 and - * before), it will now magically (or mystically) be - * synchronized. This could cause slight astonishment, but - * it is better than inconsistent permissions. - * - * The supplied ACL is a template that may contain any combination - * of special entries. These are treated as place holders when we fill - * out the ACL. This routine does not add or remove special entries, it - * simply unites each special entry with its associated set of permissions. - */ -STATIC void -xfs_acl_sync_mode( - mode_t mode, - xfs_acl_t *acl) -{ - int i, nomask = 1; - xfs_acl_entry_t *ap; - xfs_acl_entry_t *gap = NULL; - - /* - * Set ACL entries. POSIX1003.1eD16 requires that the MASK - * be set instead of the GROUP entry, if there is a MASK. - */ - for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { - switch (ap->ae_tag) { - case ACL_USER_OBJ: - ap->ae_perm = (mode >> 6) & 0x7; - break; - case ACL_GROUP_OBJ: - gap = ap; - break; - case ACL_MASK: - nomask = 0; - ap->ae_perm = (mode >> 3) & 0x7; - break; - case ACL_OTHER: - ap->ae_perm = mode & 0x7; - break; - default: - break; - } - } - /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ - if (gap && nomask) - gap->ae_perm = (mode >> 3) & 0x7; -} - -/* - * When inheriting an Access ACL from a directory Default ACL, - * the ACL bits are set to the intersection of the ACL default - * permission bits and the file permission bits in mode. If there - * are no permission bits on the file then we must not give them - * the ACL. This is what what makes umask() work with ACLs. - */ -STATIC void -xfs_acl_filter_mode( - mode_t mode, - xfs_acl_t *acl) -{ - int i, nomask = 1; - xfs_acl_entry_t *ap; - xfs_acl_entry_t *gap = NULL; - - /* - * Set ACL entries. POSIX1003.1eD16 requires that the MASK - * be merged with GROUP entry, if there is a MASK. - */ - for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { - switch (ap->ae_tag) { - case ACL_USER_OBJ: - ap->ae_perm &= (mode >> 6) & 0x7; - break; - case ACL_GROUP_OBJ: - gap = ap; - break; - case ACL_MASK: - nomask = 0; - ap->ae_perm &= (mode >> 3) & 0x7; - break; - case ACL_OTHER: - ap->ae_perm &= mode & 0x7; - break; - default: - break; - } - } - /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ - if (gap && nomask) - gap->ae_perm &= (mode >> 3) & 0x7; -} diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 642f1db4def4..63dc1f2efad5 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -18,81 +18,48 @@ #ifndef __XFS_ACL_H__ #define __XFS_ACL_H__ -/* - * Access Control Lists - */ -typedef __uint16_t xfs_acl_perm_t; -typedef __int32_t xfs_acl_tag_t; -typedef __int32_t xfs_acl_id_t; +struct inode; +struct posix_acl; +struct xfs_inode; #define XFS_ACL_MAX_ENTRIES 25 #define XFS_ACL_NOT_PRESENT (-1) -typedef struct xfs_acl_entry { - xfs_acl_tag_t ae_tag; - xfs_acl_id_t ae_id; - xfs_acl_perm_t ae_perm; -} xfs_acl_entry_t; - -typedef struct xfs_acl { - __int32_t acl_cnt; - xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES]; -} xfs_acl_t; +/* On-disk XFS access control list structure */ +struct xfs_acl { + __be32 acl_cnt; + struct xfs_acl_entry { + __be32 ae_tag; + __be32 ae_id; + __be16 ae_perm; + } acl_entry[XFS_ACL_MAX_ENTRIES]; +}; /* On-disk XFS extended attribute names */ -#define SGI_ACL_FILE "SGI_ACL_FILE" -#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" +#define SGI_ACL_FILE "SGI_ACL_FILE" +#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) -#define _ACL_TYPE_ACCESS 1 -#define _ACL_TYPE_DEFAULT 2 - #ifdef CONFIG_XFS_POSIX_ACL +extern int xfs_check_acl(struct inode *inode, int mask); +extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); +extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); +extern int xfs_acl_chmod(struct inode *inode); +extern void xfs_inode_init_acls(struct xfs_inode *ip); +extern void xfs_inode_clear_acls(struct xfs_inode *ip); +extern int posix_acl_access_exists(struct inode *inode); +extern int posix_acl_default_exists(struct inode *inode); -struct vattr; -struct xfs_inode; - -extern struct kmem_zone *xfs_acl_zone; -#define xfs_acl_zone_init(zone, name) \ - (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) -#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) - -extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *); -extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); -extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); -extern int xfs_acl_vhasacl_access(struct inode *); -extern int xfs_acl_vhasacl_default(struct inode *); -extern int xfs_acl_vset(struct inode *, void *, size_t, int); -extern int xfs_acl_vget(struct inode *, void *, size_t, int); -extern int xfs_acl_vremove(struct inode *, int); - -#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) - -#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) -#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0) -#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0) -#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access -#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default - -#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP)) -#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0) - +extern struct xattr_handler xfs_xattr_system_handler; #else -#define xfs_acl_zone_init(zone,name) -#define xfs_acl_zone_destroy(zone) -#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP) -#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP) -#define xfs_acl_vremove(v,t) (-EOPNOTSUPP) -#define xfs_acl_vhasacl_access(v) (0) -#define xfs_acl_vhasacl_default(v) (0) -#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */ -#define _ACL_FREE(a) ((void)0) -#define _ACL_INHERIT(c,m,d) (0) -#define _ACL_GET_ACCESS(pv,pa) (0) -#define _ACL_GET_DEFAULT(pv,pd) (0) -#define _ACL_ACCESS_EXISTS (NULL) -#define _ACL_DEFAULT_EXISTS (NULL) -#endif - +# define xfs_check_acl NULL +# define xfs_get_acl(inode, type) NULL +# define xfs_inherit_acl(inode, default_acl) 0 +# define xfs_acl_chmod(inode) 0 +# define xfs_inode_init_acls(ip) +# define xfs_inode_clear_acls(ip) +# define posix_acl_access_exists(inode) 0 +# define posix_acl_default_exists(inode) 0 +#endif /* CONFIG_XFS_POSIX_ACL */ #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index c8641f713caa..f24b50b68d03 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -212,6 +212,8 @@ typedef struct xfs_perag /* * tags for inode radix tree */ +#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup + in xfs_inode_ag_iterator */ #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index 53d5e70d1360..0902249354a0 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b) #endif /* __KERNEL__ */ -/* do we need conversion? */ -#define ARCH_NOCONVERT 1 -#ifdef XFS_NATIVE_HOST -# define ARCH_CONVERT ARCH_NOCONVERT -#else -# define ARCH_CONVERT 0 -#endif - -/* generic swapping macros */ - -#ifndef HAVE_SWABMACROS -#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var)))) -#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var)))) -#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var)))) -#endif - -#define INT_SWAP(type, var) \ - ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \ - ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \ - ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \ - (var)))) - /* * get and set integers from potentially unaligned locations */ @@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b) ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ } -/* does not return a value */ -#define INT_SET(reference,arch,valueref) \ - (__builtin_constant_p(valueref) ? \ - (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \ - (void)( \ - ((reference) = (valueref)), \ - ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \ - ) \ - ) - /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 5fde1654b430..db15feb906ff 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -45,7 +45,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" -#include "xfs_acl.h" #include "xfs_rw.h" #include "xfs_vnodeops.h" @@ -249,8 +248,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, /* * Attach the dquots to the inode. */ - if ((error = XFS_QM_DQATTACH(mp, dp, 0))) - return (error); + error = xfs_qm_dqattach(dp, 0); + if (error) + return error; /* * If the inode doesn't have an attribute fork, add one. @@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, } xfs_ilock(dp, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, + error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { @@ -501,8 +501,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) /* * Attach the dquots to the inode. */ - if ((error = XFS_QM_DQATTACH(mp, dp, 0))) - return (error); + error = xfs_qm_dqattach(dp, 0); + if (error) + return error; /* * Start our first transaction of the day. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index ca7c6005a487..7928b9983c1d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc( * Adjust the disk quota also. This was reserved * earlier. */ - XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip, + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); } else { @@ -2995,7 +2995,7 @@ xfs_bmap_btalloc( * Adjust the disk quota also. This was reserved * earlier. */ - XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip, + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, (long) args.len); @@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents( return error; xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); ip->i_d.di_nblocks--; - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); if (cur->bc_bufs[0] == cbp) cur->bc_bufs[0] = NULL; @@ -3386,7 +3386,7 @@ xfs_bmap_del_extent( * Adjust quota data. */ if (qfield) - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks); + xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); /* * Account for change in delayed indirect blocks. @@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree( *firstblock = cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); /* * Fill in the child block. @@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents( XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip, + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); flags |= xfs_ilog_fext(whichfork); } else { @@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork( XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT))) goto error0; xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ? + error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { @@ -4983,10 +4983,11 @@ xfs_bmapi( * adjusted later. We return if we haven't * allocated blocks already inside this loop. */ - if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS( - mp, NULL, ip, (long)alen, 0, + error = xfs_trans_reserve_quota_nblks( + NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : - XFS_QMOPT_RES_REGBLKS))) { + XFS_QMOPT_RES_REGBLKS); + if (error) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); @@ -5035,8 +5036,8 @@ xfs_bmapi( if (XFS_IS_QUOTA_ON(mp)) /* unreserve the blocks now */ (void) - XFS_TRANS_UNRESERVE_QUOTA_NBLKS( - mp, NULL, ip, + xfs_trans_unreserve_quota_nblks( + NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); @@ -5691,14 +5692,14 @@ xfs_bunmapi( do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, (int64_t)rtexts, rsvd); - (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, - NULL, ip, -((long)del.br_blockcount), 0, + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)del.br_blockcount, rsvd); - (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, - NULL, ip, -((long)del.br_blockcount), 0, + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); } ip->i_delayed_blks -= del.br_blockcount; @@ -6085,6 +6086,7 @@ xfs_getbmap( break; } + kmem_free(out); return error; } diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 0760d352586f..5c1ade06578e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -590,7 +590,7 @@ xfs_bmbt_alloc_block( cur->bc_private.b.allocated++; cur->bc_private.b.ip->i_d.di_nblocks++; xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, + xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, XFS_TRANS_DQ_BCOUNT, 1L); new->l = cpu_to_be64(args.fsbno); @@ -618,7 +618,7 @@ xfs_bmbt_free_block( ip->i_d.di_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, bp); return 0; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 6c87c8f304ef..edf8bdf4141f 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -542,10 +542,8 @@ xfs_filestream_associate( * waiting for the lock because someone else is waiting on the lock we * hold and we cannot drop that as we are in a transaction here. * - * Lucky for us, this inversion is rarely a problem because it's a - * directory inode that we are trying to lock here and that means the - * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is - * used. i.e. freeze, remount-ro, quotasync or unmount. + * Lucky for us, this inversion is not a problem because it's a + * directory inode that we are trying to lock here. * * So, if we can't get the iolock without sleeping then just give up */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index f7c06fac8229..c4ea51b55dce 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks { * Minimum and maximum sizes need for growth checks */ #define XFS_MIN_AG_BLOCKS 64 -#define XFS_MIN_LOG_BLOCKS 512 -#define XFS_MAX_LOG_BLOCKS (64 * 1024) -#define XFS_MIN_LOG_BYTES (256 * 1024) -#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024) +#define XFS_MIN_LOG_BLOCKS 512ULL +#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) +#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) + +/* keep the maximum size under 2^31 by a small amount */ +#define XFS_MAX_LOG_BYTES \ + ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 89b81eedce6a..76c540f719e4 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -18,6 +18,7 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" +#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -82,6 +83,7 @@ xfs_inode_alloc( memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; ip->i_new_size = 0; + xfs_inode_init_acls(ip); /* * Initialize inode's trace buffers. @@ -500,10 +502,7 @@ xfs_ireclaim( * ilock one but will still hold the iolock. */ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - /* - * Release dquots (and their references) if any. - */ - XFS_QM_DQDETACH(ip->i_mount, ip); + xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); switch (ip->i_d.di_mode & S_IFMT) { @@ -561,6 +560,7 @@ xfs_ireclaim( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); + xfs_inode_clear_acls(ip); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 123b20c8cbf2..1f22d65fed0a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -49,7 +49,6 @@ #include "xfs_utils.h" #include "xfs_dir2_trace.h" #include "xfs_quota.h" -#include "xfs_acl.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f879c1bc4b96..77016702938b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -18,6 +18,7 @@ #ifndef __XFS_INODE_H__ #define __XFS_INODE_H__ +struct posix_acl; struct xfs_dinode; struct xfs_inode; @@ -272,6 +273,11 @@ typedef struct xfs_inode { /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ +#ifdef CONFIG_XFS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5aaa2d7ec155..67ae5555a30a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -385,7 +384,7 @@ xfs_iomap_write_direct( * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ - error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED); + error = xfs_qm_dqattach_locked(ip, 0); if (error) return XFS_ERROR(error); @@ -444,8 +443,7 @@ xfs_iomap_write_direct( if (error) goto error_out; - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, - qblocks, 0, quota_flag); + error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) goto error1; @@ -495,7 +493,7 @@ xfs_iomap_write_direct( error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); + xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -582,7 +580,7 @@ xfs_iomap_write_delay( * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ - error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + error = xfs_qm_dqattach_locked(ip, 0); if (error) return XFS_ERROR(error); @@ -684,7 +682,8 @@ xfs_iomap_write_allocate( /* * Make sure that the dquots are there. */ - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7ba450116d4f..47da2fb45377 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer( error = 0; if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { + if (item->ri_buf[i].i_addr == NULL) { + cmn_err(CE_ALERT, + "XFS: NULL dquot in %s.", __func__); + goto next; + } + if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) { + cmn_err(CE_ALERT, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[i].i_len, __func__); + goto next; + } error = xfs_qm_dqcheck((xfs_disk_dquot_t *) item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); + if (error) + goto next; } - if (!error) - memcpy(xfs_buf_offset(bp, - (uint)bit << XFS_BLI_SHIFT), /* dest */ - item->ri_buf[i].i_addr, /* source */ - nbits<<XFS_BLI_SHIFT); /* length */ + + memcpy(xfs_buf_offset(bp, + (uint)bit << XFS_BLI_SHIFT), /* dest */ + item->ri_buf[i].i_addr, /* source */ + nbits<<XFS_BLI_SHIFT); /* length */ + next: i++; bit += nbits; } @@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans( return (0); recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; - ASSERT(recddq); + + if (item->ri_buf[1].i_addr == NULL) { + cmn_err(CE_ALERT, + "XFS: NULL dquot in %s.", __func__); + return XFS_ERROR(EIO); + } + if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) { + cmn_err(CE_ALERT, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[1].i_len, __func__); + return XFS_ERROR(EIO); + } + /* * This type of quotas was turned off, so ignore this record. */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 65a99725d0cc..5c6f092659c1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp) } /* + * Clear the quotaflags in memory and in the superblock. + */ +int +xfs_mount_reset_sbqflags( + struct xfs_mount *mp) +{ + int error; + struct xfs_trans *tp; + + mp->m_qflags = 0; + + /* + * It is OK to look at sb_qflags here in mount path, + * without m_sb_lock. + */ + if (mp->m_sb.sb_qflags == 0) + return 0; + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = 0; + spin_unlock(&mp->m_sb_lock); + + /* + * If the fs is readonly, let the incore superblock run + * with quotas off but don't flush the update out to disk + */ + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + +#ifdef QUOTADEBUG + xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); +#endif + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_fs_cmn_err(CE_ALERT, mp, + "xfs_mount_reset_sbqflags: Superblock update failed!"); + return error; + } + + xfs_mod_sb(tp, XFS_SB_QFLAGS); + return xfs_trans_commit(tp, 0); +} + +/* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct * - if we're a 32-bit kernel, do a size check on the superblock @@ -976,7 +1023,8 @@ xfs_mountfs( xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; __uint64_t resblks; - uint quotamount, quotaflags; + uint quotamount = 0; + uint quotaflags = 0; int error = 0; xfs_mount_common(mp, sbp); @@ -1210,9 +1258,28 @@ xfs_mountfs( /* * Initialise the XFS quota management subsystem for this mount */ - error = XFS_QM_INIT(mp, "amount, "aflags); - if (error) - goto out_rtunmount; + if (XFS_IS_QUOTA_RUNNING(mp)) { + error = xfs_qm_newmount(mp, "amount, "aflags); + if (error) + goto out_rtunmount; + } else { + ASSERT(!XFS_IS_QUOTA_ON(mp)); + + /* + * If a file system had quotas running earlier, but decided to + * mount without -o uquota/pquota/gquota options, revoke the + * quotachecked license. + */ + if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { + cmn_err(CE_NOTE, + "XFS: resetting qflags for filesystem %s", + mp->m_fsname); + + error = xfs_mount_reset_sbqflags(mp); + if (error) + return error; + } + } /* * Finish recovering the file system. This part needed to be @@ -1228,9 +1295,19 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - error = XFS_QM_MOUNT(mp, quotamount, quotaflags); - if (error) - goto out_rtunmount; + if (quotamount) { + ASSERT(mp->m_qflags == 0); + mp->m_qflags = quotaflags; + + xfs_qm_mount_quotas(mp); + } + +#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) + if (XFS_IS_QUOTA_ON(mp)) + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); + else + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); +#endif /* * Now we are mounted, reserve a small amount of unused space for @@ -1279,12 +1356,7 @@ xfs_unmountfs( __uint64_t resblks; int error; - /* - * Release dquot that rootinode, rbmino and rsumino might be holding, - * and release the quota inodes. - */ - XFS_QM_UNMOUNT(mp); - + xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); IRELE(mp->m_rootip); @@ -1299,12 +1371,9 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC); - - XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); + xfs_qm_unmount(mp); /* * Flush out the log synchronously so that we know for sure diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d6a64392f983..a5122382afde 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -64,6 +64,8 @@ struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; +struct xfs_quotainfo; + /* * Prototypes and functions for the Data Migration subsystem. @@ -107,86 +109,6 @@ typedef struct xfs_dmops { (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) -/* - * Prototypes and functions for the Quota Management subsystem. - */ - -struct xfs_dquot; -struct xfs_dqtrxops; -struct xfs_quotainfo; - -typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); -typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); -typedef void (*xfs_qmunmount_t)(struct xfs_mount *); -typedef void (*xfs_qmdone_t)(struct xfs_mount *); -typedef void (*xfs_dqrele_t)(struct xfs_dquot *); -typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); -typedef void (*xfs_dqdetach_t)(struct xfs_inode *); -typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint); -typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *, - struct xfs_inode *, uid_t, gid_t, prid_t, uint, - struct xfs_dquot **, struct xfs_dquot **); -typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *); -typedef int (*xfs_dqvoprename_t)(struct xfs_inode **); -typedef struct xfs_dquot * (*xfs_dqvopchown_t)( - struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot **, struct xfs_dquot *); -typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *, uint); -typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); -typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); - -typedef struct xfs_qmops { - xfs_qminit_t xfs_qminit; - xfs_qmdone_t xfs_qmdone; - xfs_qmmount_t xfs_qmmount; - xfs_qmunmount_t xfs_qmunmount; - xfs_dqrele_t xfs_dqrele; - xfs_dqattach_t xfs_dqattach; - xfs_dqdetach_t xfs_dqdetach; - xfs_dqpurgeall_t xfs_dqpurgeall; - xfs_dqvopalloc_t xfs_dqvopalloc; - xfs_dqvopcreate_t xfs_dqvopcreate; - xfs_dqvoprename_t xfs_dqvoprename; - xfs_dqvopchown_t xfs_dqvopchown; - xfs_dqvopchownresv_t xfs_dqvopchownresv; - xfs_dqstatvfs_t xfs_dqstatvfs; - xfs_dqsync_t xfs_dqsync; - struct xfs_dqtrxops *xfs_dqtrxops; -} xfs_qmops_t; - -#define XFS_QM_INIT(mp, mnt, fl) \ - (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) -#define XFS_QM_MOUNT(mp, mnt, fl) \ - (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl) -#define XFS_QM_UNMOUNT(mp) \ - (*(mp)->m_qm_ops->xfs_qmunmount)(mp) -#define XFS_QM_DONE(mp) \ - (*(mp)->m_qm_ops->xfs_qmdone)(mp) -#define XFS_QM_DQRELE(mp, dq) \ - (*(mp)->m_qm_ops->xfs_dqrele)(dq) -#define XFS_QM_DQATTACH(mp, ip, fl) \ - (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl) -#define XFS_QM_DQDETACH(mp, ip) \ - (*(mp)->m_qm_ops->xfs_dqdetach)(ip) -#define XFS_QM_DQPURGEALL(mp, fl) \ - (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl) -#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \ - (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2) -#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \ - (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2) -#define XFS_QM_DQVOPRENAME(mp, ip) \ - (*(mp)->m_qm_ops->xfs_dqvoprename)(ip) -#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \ - (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq) -#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \ - (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl) -#define XFS_QM_DQSTATVFS(ip, statp) \ - (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp) -#define XFS_QM_DQSYNC(mp, flags) \ - (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags) - #ifdef HAVE_PERCPU_SB /* @@ -510,8 +432,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *); extern void xfs_dmops_put(struct xfs_mount *); -extern int xfs_qmops_get(struct xfs_mount *); -extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c deleted file mode 100644 index e101790ea8e7..000000000000 --- a/fs/xfs/xfs_qmops.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_error.h" - - -STATIC struct xfs_dquot * -xfs_dqvopchown_default( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_dquot **dqp, - struct xfs_dquot *dq) -{ - return NULL; -} - -/* - * Clear the quotaflags in memory and in the superblock. - */ -int -xfs_mount_reset_sbqflags(xfs_mount_t *mp) -{ - int error; - xfs_trans_t *tp; - - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without m_sb_lock. - */ - if (mp->m_sb.sb_qflags == 0) - return 0; - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = 0; - spin_unlock(&mp->m_sb_lock); - - /* - * if the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); - return error; - } - xfs_mod_sb(tp, XFS_SB_QFLAGS); - error = xfs_trans_commit(tp, 0); - return error; -} - -STATIC int -xfs_noquota_init( - xfs_mount_t *mp, - uint *needquotamount, - uint *quotaflags) -{ - int error = 0; - - *quotaflags = 0; - *needquotamount = B_FALSE; - - ASSERT(!XFS_IS_QUOTA_ON(mp)); - - /* - * If a file system had quotas running earlier, but decided to - * mount without -o uquota/pquota/gquota options, revoke the - * quotachecked license. - */ - if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { - cmn_err(CE_NOTE, - "XFS resetting qflags for filesystem %s", - mp->m_fsname); - - error = xfs_mount_reset_sbqflags(mp); - } - return error; -} - -static struct xfs_qmops xfs_qmcore_stub = { - .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, - .xfs_qmdone = (xfs_qmdone_t) fs_noerr, - .xfs_qmmount = (xfs_qmmount_t) fs_noerr, - .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, - .xfs_dqrele = (xfs_dqrele_t) fs_noerr, - .xfs_dqattach = (xfs_dqattach_t) fs_noerr, - .xfs_dqdetach = (xfs_dqdetach_t) fs_noerr, - .xfs_dqpurgeall = (xfs_dqpurgeall_t) fs_noerr, - .xfs_dqvopalloc = (xfs_dqvopalloc_t) fs_noerr, - .xfs_dqvopcreate = (xfs_dqvopcreate_t) fs_noerr, - .xfs_dqvoprename = (xfs_dqvoprename_t) fs_noerr, - .xfs_dqvopchown = xfs_dqvopchown_default, - .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr, - .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval, - .xfs_dqsync = (xfs_dqsync_t) fs_noerr, -}; - -int -xfs_qmops_get(struct xfs_mount *mp) -{ - if (XFS_IS_QUOTA_RUNNING(mp)) { -#ifdef CONFIG_XFS_QUOTA - mp->m_qm_ops = &xfs_qmcore_xfs; -#else - cmn_err(CE_WARN, - "XFS: qouta support not available in this kernel."); - return EINVAL; -#endif - } else { - mp->m_qm_ops = &xfs_qmcore_stub; - } - - return 0; -} - -void -xfs_qmops_put(struct xfs_mount *mp) -{ -} diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index f5d1202dde25..3ec91ac74c2a 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ #define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ -#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ @@ -302,69 +301,79 @@ typedef struct xfs_dqtrx { long qt_delrtb_delta; /* delayed RT blk count changes */ } xfs_dqtrx_t; -/* - * Dquot transaction functions, used if quota is enabled. - */ -typedef void (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *); -typedef void (*qo_mod_dquot_byino_t)(struct xfs_trans *, - struct xfs_inode *, uint, long); -typedef void (*qo_free_dqinfo_t)(struct xfs_trans *); -typedef void (*qo_apply_dquot_deltas_t)(struct xfs_trans *); -typedef void (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *); -typedef int (*qo_reserve_quota_nblks_t)( - struct xfs_trans *, struct xfs_mount *, - struct xfs_inode *, long, long, uint); -typedef int (*qo_reserve_quota_bydquots_t)( - struct xfs_trans *, struct xfs_mount *, - struct xfs_dquot *, struct xfs_dquot *, - long, long, uint); -typedef struct xfs_dqtrxops { - qo_dup_dqinfo_t qo_dup_dqinfo; - qo_free_dqinfo_t qo_free_dqinfo; - qo_mod_dquot_byino_t qo_mod_dquot_byino; - qo_apply_dquot_deltas_t qo_apply_dquot_deltas; - qo_reserve_quota_nblks_t qo_reserve_quota_nblks; - qo_reserve_quota_bydquots_t qo_reserve_quota_bydquots; - qo_unreserve_and_mod_dquots_t qo_unreserve_and_mod_dquots; -} xfs_dqtrxops_t; - -#define XFS_DQTRXOP(mp, tp, op, args...) \ - ((mp)->m_qm_ops->xfs_dqtrxops ? \ - ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0) - -#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \ - ((mp)->m_qm_ops->xfs_dqtrxops ? \ - ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0) - -#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \ - XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp) -#define XFS_TRANS_FREE_DQINFO(mp, tp) \ - XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo) -#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \ - XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta) -#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \ - XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas) -#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \ - XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl) -#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \ - XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl) -#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \ - XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots) - -#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags) -#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ - XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \ - f | XFS_QMOPT_RES_REGBLKS) -#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ - XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \ +#ifdef CONFIG_XFS_QUOTA +extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); +extern void xfs_trans_free_dqinfo(struct xfs_trans *); +extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, + uint, long); +extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); +extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); +extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, + struct xfs_inode *, long, long, uint); +extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, + struct xfs_mount *, struct xfs_dquot *, + struct xfs_dquot *, long, long, uint); + +extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, + struct xfs_dquot **, struct xfs_dquot **); +extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot *, struct xfs_dquot *); +extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); +extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, + struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); +extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot *, struct xfs_dquot *, uint); +extern int xfs_qm_dqattach(struct xfs_inode *, uint); +extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); +extern void xfs_qm_dqdetach(struct xfs_inode *); +extern void xfs_qm_dqrele(struct xfs_dquot *); +extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); +extern int xfs_qm_sync(struct xfs_mount *, int); +extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); +extern void xfs_qm_mount_quotas(struct xfs_mount *); +extern void xfs_qm_unmount(struct xfs_mount *); +extern void xfs_qm_unmount_quotas(struct xfs_mount *); + +#else +static inline int +xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, + uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) +{ + *udqp = NULL; + *gdqp = NULL; + return 0; +} +#define xfs_trans_dup_dqinfo(tp, tp2) +#define xfs_trans_free_dqinfo(tp) +#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) +#define xfs_trans_apply_dquot_deltas(tp) +#define xfs_trans_unreserve_and_mod_dquots(tp) +#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) +#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) +#define xfs_qm_vop_create_dqattach(tp, ip, u, g) +#define xfs_qm_vop_rename_dqattach(it) (0) +#define xfs_qm_vop_chown(tp, ip, old, new) (NULL) +#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) +#define xfs_qm_dqattach(ip, fl) (0) +#define xfs_qm_dqattach_locked(ip, fl) (0) +#define xfs_qm_dqdetach(ip) +#define xfs_qm_dqrele(d) +#define xfs_qm_statvfs(ip, s) +#define xfs_qm_sync(mp, fl) (0) +#define xfs_qm_newmount(mp, a, b) (0) +#define xfs_qm_mount_quotas(mp) +#define xfs_qm_unmount(mp) +#define xfs_qm_unmount_quotas(mp) (0) +#endif /* CONFIG_XFS_QUOTA */ + +#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ + xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) +#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ + xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); -extern struct xfs_qmops xfs_qmcore_xfs; - #endif /* __KERNEL__ */ - #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 58f85e9cd11d..b81deea0ce19 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -166,7 +166,8 @@ xfs_rename( /* * Attach the dquots to the inodes */ - if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { + error = xfs_qm_vop_rename_dqattach(inodes); + if (error) { xfs_trans_cancel(tp, cancel_flags); goto std_return; } diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 36f3a21c54d2..fea68615ed23 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -41,7 +41,6 @@ #include "xfs_ialloc.h" #include "xfs_attr.h" #include "xfs_bmap.h" -#include "xfs_acl.h" #include "xfs_error.h" #include "xfs_buf_item.h" #include "xfs_rw.h" diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index bcc39d358ad3..66b849358e62 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -297,7 +297,7 @@ xfs_trans_dup( tp->t_rtx_res = tp->t_rtx_res_used; ntp->t_pflags = tp->t_pflags; - XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp); + xfs_trans_dup_dqinfo(tp, ntp); atomic_inc(&tp->t_mountp->m_active_trans); return ntp; @@ -829,7 +829,7 @@ shut_us_down: * means is that we have some (non-persistent) quota * reservations that need to be unreserved. */ - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); + xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); @@ -848,10 +848,9 @@ shut_us_down: /* * If we need to update the superblock, then do it now. */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) { + if (tp->t_flags & XFS_TRANS_SB_DIRTY) xfs_trans_apply_sb_deltas(tp); - } - XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp); + xfs_trans_apply_dquot_deltas(tp); /* * Ask each log item how many log_vector entries it will @@ -1056,7 +1055,7 @@ xfs_trans_uncommit( } xfs_trans_unreserve_and_mod_sb(tp); - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp); + xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_free_items(tp, flags); xfs_trans_free_busy(tp); @@ -1181,7 +1180,7 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); + xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { if (flags & XFS_TRANS_RELEASE_LOG_RES) { @@ -1211,7 +1210,7 @@ xfs_trans_free( xfs_trans_t *tp) { atomic_dec(&tp->t_mountp->m_active_trans); - XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); + xfs_trans_free_dqinfo(tp); kmem_zone_free(xfs_trans_zone, tp); } diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 79b9e5ea5359..4d88616bde91 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -166,7 +166,7 @@ xfs_dir_ialloc( xfs_buf_relse(ialloc_context); if (dqinfo) { tp->t_dqinfo = dqinfo; - XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); + xfs_trans_free_dqinfo(tp); } *tpp = ntp; *ipp = NULL; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 19cf90a9c762..c4eca5ed5dab 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -42,6 +42,7 @@ #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_rw.h" #include "xfs_error.h" @@ -118,7 +119,7 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid, + code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, qflags, &udqp, &gdqp); if (code) return code; @@ -180,10 +181,11 @@ xfs_setattr( * Do a quota reservation only if uid/gid is actually * going to change. */ - if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { ASSERT(tp); - code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -217,7 +219,7 @@ xfs_setattr( /* * Make sure that the dquots are attached to the inode. */ - code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + code = xfs_qm_dqattach_locked(ip, 0); if (code) goto error_return; @@ -351,21 +353,21 @@ xfs_setattr( * in the transaction. */ if (iuid != uid) { - if (XFS_IS_UQUOTA_ON(mp)) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); - olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = uid; inode->i_uid = uid; } if (igid != gid) { - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); - olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = gid; @@ -461,13 +463,25 @@ xfs_setattr( /* * Release any dquot(s) the inode had kept before chown. */ - XFS_QM_DQRELE(mp, olddquot1); - XFS_QM_DQRELE(mp, olddquot2); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); - if (code) { + if (code) return code; + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + code = -xfs_acl_chmod(inode); + if (code) + return XFS_ERROR(code); } if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && @@ -482,8 +496,8 @@ xfs_setattr( commit_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (tp) { xfs_trans_cancel(tp, commit_flags); } @@ -739,7 +753,8 @@ xfs_free_eofblocks( /* * Attach the dquots to the inode up front. */ - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return error; /* @@ -1181,7 +1196,8 @@ xfs_inactive( ASSERT(ip->i_d.di_nlink == 0); - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return VN_INACTIVE_CACHE; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); @@ -1307,7 +1323,7 @@ xfs_inactive( /* * Credit the quota account(s). The inode is gone. */ - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); /* * Just ignore errors at this point. There is nothing we can @@ -1323,11 +1339,11 @@ xfs_inactive( xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " "xfs_trans_commit() returned error %d", error); } + /* * Release the dquots held by inode, if any. */ - XFS_QM_DQDETACH(mp, ip); - + xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); out: @@ -1427,8 +1443,7 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -1489,7 +1504,7 @@ xfs_create( /* * Reserve disk quota and the inode. */ - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); if (error) goto out_trans_cancel; @@ -1561,7 +1576,7 @@ xfs_create( * These ids of the inode couldn't have changed since the new * inode has been locked ever since it was created. */ - XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); /* * xfs_trans_commit normally decrements the vnode ref count @@ -1580,8 +1595,8 @@ xfs_create( goto out_dqrele; } - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); *ipp = ip; @@ -1602,8 +1617,8 @@ xfs_create( out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); out_dqrele: - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -1837,11 +1852,11 @@ xfs_remove( return error; } - error = XFS_QM_DQATTACH(mp, dp, 0); + error = xfs_qm_dqattach(dp, 0); if (error) goto std_return; - error = XFS_QM_DQATTACH(mp, ip, 0); + error = xfs_qm_dqattach(ip, 0); if (error) goto std_return; @@ -2028,11 +2043,11 @@ xfs_link( /* Return through std_return after this point. */ - error = XFS_QM_DQATTACH(mp, sip, 0); + error = xfs_qm_dqattach(sip, 0); if (error) goto std_return; - error = XFS_QM_DQATTACH(mp, tdp, 0); + error = xfs_qm_dqattach(tdp, 0); if (error) goto std_return; @@ -2205,8 +2220,7 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2248,7 +2262,7 @@ xfs_symlink( /* * Reserve disk quota : blocks and inode. */ - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); if (error) goto error_return; @@ -2288,7 +2302,7 @@ xfs_symlink( /* * Also attach the dquot(s) to it, if applicable. */ - XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); if (resblks) resblks -= XFS_IALLOC_SPACE_RES(mp); @@ -2376,8 +2390,8 @@ xfs_symlink( goto error2; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ @@ -2401,8 +2415,8 @@ std_return: cancel_flags |= XFS_TRANS_ABORT; error_return: xfs_trans_cancel(tp, cancel_flags); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -2541,7 +2555,8 @@ xfs_alloc_file_space( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return error; if (len <= 0) @@ -2628,8 +2643,8 @@ retry: break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, - qblocks, 0, quota_flag); + error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, + 0, quota_flag); if (error) goto error1; @@ -2688,7 +2703,7 @@ dmapi_enospc_check: error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); + xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -2827,7 +2842,8 @@ xfs_free_file_space( xfs_itrace_entry(ip); - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return error; error = 0; @@ -2953,9 +2969,9 @@ xfs_free_file_space( break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, - XFS_QMOPT_RES_REGBLKS); + error = xfs_trans_reserve_quota(tp, mp, + ip->i_udquot, ip->i_gdquot, + resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 04373c6c61ff..a9e102de71a1 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ +#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_fsync(struct xfs_inode *ip); |