diff options
Diffstat (limited to 'fs')
61 files changed, 902 insertions, 490 deletions
diff --git a/fs/Makefile b/fs/Makefile index 34a1b9dea6dd..da0bbb456d3f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ +obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ @@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt; static const struct file_operations aio_ring_fops; static const struct address_space_operations aio_ctx_aops; +/* Backing dev info for aio fs. + * -no dirty page accounting or writeback happens + */ +static struct backing_dev_info aio_fs_backing_dev_info = { + .name = "aiofs", + .state = 0, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY, +}; + static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct qstr this = QSTR_INIT("[aio]", 5); @@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->a_ops = &aio_ctx_aops; inode->i_mapping->private_data = ctx; + inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info; inode->i_size = PAGE_SIZE * nr_pages; path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); @@ -220,6 +230,9 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); + if (bdi_init(&aio_fs_backing_dev_info)) + panic("Failed to init aio fs backing dev info."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -281,11 +294,6 @@ static const struct file_operations aio_ring_fops = { .mmap = aio_ring_mmap, }; -static int aio_set_page_dirty(struct page *page) -{ - return 0; -} - #if IS_ENABLED(CONFIG_MIGRATION) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) @@ -357,7 +365,7 @@ out: #endif static const struct address_space_operations aio_ctx_aops = { - .set_page_dirty = aio_set_page_dirty, + .set_page_dirty = __set_page_dirty_no_writeback, #if IS_ENABLED(CONFIG_MIGRATION) .migratepage = aio_migratepage, #endif @@ -412,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx) pr_debug("pid(%d) page[%d]->count=%d\n", current->pid, i, page_count(page)); SetPageUptodate(page); - SetPageDirty(page); unlock_page(page); ctx->ring_pages[i] = page; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d3220d31d3cb..dcd9be32ac57 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1011,8 +1011,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); - if (*pg_index == (vcnt - 1) && *pg_offset == 0) - memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); @@ -1054,3 +1052,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, return 1; } + +/* + * When uncompressing data, we need to make sure and zero any parts of + * the biovec that were not filled in by the decompression code. pg_index + * and pg_offset indicate the last page and the last offset of that page + * that have been filled in. This will zero everything remaining in the + * biovec. + */ +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset) +{ + while (pg_index < vcnt) { + struct page *page = bvec[pg_index].bv_page; + unsigned long off = bvec[pg_index].bv_offset; + unsigned long len = bvec[pg_index].bv_len; + + if (pg_offset < off) + pg_offset = off; + if (pg_offset < off + len) { + unsigned long bytes = off + len - pg_offset; + char *kaddr; + + kaddr = kmap_atomic(page); + memset(kaddr + pg_offset, 0, bytes); + kunmap_atomic(kaddr); + } + pg_index++; + pg_offset = 0; + } +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 0c803b4fbf93..d181f70caae0 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long nr_pages); int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); - +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset); struct btrfs_compress_op { struct list_head *(*alloc_workspace)(void); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19bc6162fb8e..150822ee0a0b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, { int i; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* lockdep really cares that we take all of these spinlocks - * in the right order. If any of the locks in the path are not - * currently blocking, it is going to complain. So, make really - * really sure by forcing the path to blocking before we clear - * the path blocking. - */ if (held) { btrfs_set_lock_blocking_rw(held, held_rw); if (held_rw == BTRFS_WRITE_LOCK) @@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, held_rw = BTRFS_READ_LOCK_BLOCKING; } btrfs_set_path_blocking(p); -#endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) { @@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, } } -#ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) btrfs_clear_lock_blocking_rw(held, held_rw); -#endif } /* this also releases the path */ @@ -2893,7 +2883,7 @@ cow_done: } p->locks[level] = BTRFS_WRITE_LOCK; } else { - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); @@ -3025,7 +3015,7 @@ again: } level = btrfs_header_level(b); - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 783a94355efd..84a2d1868271 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, ret = 0; fail: while (ret < 0 && !list_empty(&tmplist)) { - sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); + sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); list_del(&sums->list); kfree(sums); } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d2149249..f8229ef1b46d 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -128,6 +128,26 @@ again: } /* + * take a spinning read lock. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers + */ +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) +{ + if (atomic_read(&eb->blocking_writers)) + return 0; + + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; +} + +/* * returns 1 if we get the read lock and 0 if we don't * this won't wait for blocking writers */ @@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) atomic_read(&eb->blocking_readers)) return 0; - if (!write_trylock(&eb->lock)) - return 0; - + write_lock(&eb->lock); if (atomic_read(&eb->blocking_writers) || atomic_read(&eb->blocking_readers)) { write_unlock(&eb->lock); diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9a4894..c44a9d5f5362 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_read_lock(struct extent_buffer *eb); int btrfs_try_tree_write_lock(struct extent_buffer *eb); +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); + static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) { diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 78285f30909e..617553cdb7d3 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -373,6 +373,8 @@ cont: } done: kunmap(pages_in[page_in_index]); + if (!ret) + btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); return ret; } @@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, goto out; } + /* + * the caller is already checking against PAGE_SIZE, but lets + * move this check closer to the memcpy/memset + */ + destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); kaddr = kmap_atomic(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); + + /* + * btrfs_getblock is doing a zero on the tail of the page too, + * but this will cover anything missing from the decompressed + * data. + */ + if (bytes < destlen) + memset(kaddr+bytes, 0, destlen-bytes); kunmap_atomic(kaddr); out: return ret; diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 759fa4e2de8f..fb22fd8d8fb8 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -299,6 +299,8 @@ done: zlib_inflateEnd(&workspace->strm); if (data_in) kunmap(pages_in[page_in_index]); + if (!ret) + btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); return ret; } @@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - unsigned long bytes_left = destlen; + unsigned long bytes_left; unsigned long total_out = 0; + unsigned long pg_offset = 0; char *kaddr; + destlen = min_t(unsigned long, destlen, PAGE_SIZE); + bytes_left = destlen; + workspace->strm.next_in = data_in; workspace->strm.avail_in = srclen; workspace->strm.total_in = 0; @@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; - unsigned long pg_offset = 0; ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) @@ -384,6 +389,17 @@ next: ret = 0; zlib_inflateEnd(&workspace->strm); + + /* + * this should only happen if zlib returned fewer bytes than we + * expected. btrfs_get_block is responsible for zeroing from the + * end of the inline extent (destlen) to the end of the page + */ + if (pg_offset < destlen) { + kaddr = kmap_atomic(dest_page); + memset(kaddr + pg_offset, 0, destlen - pg_offset); + kunmap_atomic(kaddr); + } return ret; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea9e6f7..cefca661464b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, for (i = 0; i < CEPH_CAP_BITS; i++) if ((dirty & (1 << i)) && - flush_tid == ci->i_cap_flush_tid[i]) + (u16)flush_tid == ci->i_cap_flush_tid[i]) cleaned |= 1 << i; dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," diff --git a/fs/dcache.c b/fs/dcache.c index 3ffef7f4e5cd..5bc72b07fde2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -778,6 +778,7 @@ restart: struct dentry *parent = lock_parent(dentry); if (likely(!dentry->d_lockref.count)) { __dentry_kill(dentry); + dput(parent); goto restart; } if (parent) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 0a48886e069c..6dad1176ec52 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -236,6 +236,7 @@ static void efivarfs_kill_sb(struct super_block *sb) } static struct file_system_type efivarfs_type = { + .owner = THIS_MODULE, .name = "efivarfs", .mount = efivarfs_mount, .kill_sb = efivarfs_kill_sb, @@ -244,17 +245,23 @@ static struct file_system_type efivarfs_type = { static __init int efivarfs_init(void) { if (!efi_enabled(EFI_RUNTIME_SERVICES)) - return 0; + return -ENODEV; if (!efivars_kobject()) - return 0; + return -ENODEV; return register_filesystem(&efivarfs_type); } +static __exit void efivarfs_exit(void) +{ + unregister_filesystem(&efivarfs_type); +} + MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr"); MODULE_DESCRIPTION("EFI Variable Filesystem"); MODULE_LICENSE("GPL"); MODULE_ALIAS_FS("efivarfs"); module_init(efivarfs_init); +module_exit(efivarfs_exit); diff --git a/fs/exec.c b/fs/exec.c index 7302b75a9820..01aebe300200 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) goto err; mm->stack_vm = mm->total_vm = 1; + arch_bprm_mm_init(mm, vma); up_write(&mm->mmap_sem); bprm->p = vma->vm_end - sizeof(void *); return 0; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6df8d3d885e5..b8b92c2f9683 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, } alias = d_find_alias(inode); - if (alias && !vfat_d_anon_disconn(alias)) { + /* + * Checking "alias->d_parent == dentry->d_parent" to make sure + * FS is not corrupted (especially double linked dir). + */ + if (alias && alias->d_parent == dentry->d_parent && + !vfat_d_anon_disconn(alias)) { /* * This inode has non anonymous-DCACHE_DISCONNECTED * dentry. This means, the user did ->lookup() by an @@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: mutex_unlock(&MSDOS_SB(sb)->s_lock); - dentry->d_time = dentry->d_parent->d_inode->i_version; - dentry = d_splice_alias(inode, dentry); - if (dentry) - dentry->d_time = dentry->d_parent->d_inode->i_version; - return dentry; - + if (!inode) + dentry->d_time = dir->i_version; + return d_splice_alias(inode, dentry); error: mutex_unlock(&MSDOS_SB(sb)->s_lock); return ERR_PTR(err); @@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); + dentry->d_time = dir->i_version; out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); + dentry->d_time = dir->i_version; out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); mutex_unlock(&MSDOS_SB(sb)->s_lock); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index fe839b915116..d67a16f2a45d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -174,27 +174,6 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct qstr *qstr, int ms) -{ - const char *name; - int len; - - len = qstr->len; - name = qstr->name; - if (ms) { - while (len && name[len-1] == '.') - len--; - } - - qstr->hash = full_name_hash(name, len); - - return 0; -} - -/* - * Compute the hash for the isofs name corresponding to the dentry. - */ -static int isofs_hashi_common(struct qstr *qstr, int ms) { const char *name; @@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, } #ifdef CONFIG_JOLIET +/* + * Compute the hash for the isofs name corresponding to the dentry. + */ +static int +isofs_hash_common(struct qstr *qstr, int ms) +{ + const char *name; + int len; + + len = qstr->len; + name = qstr->name; + if (ms) { + while (len && name[len-1] == '.') + len--; + } + + qstr->hash = full_name_hash(name, len); + + return 0; +} + static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e4dc74713a43..1df94fabe4eb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1853,13 +1853,12 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, journal->j_chksum_driver = NULL; return 0; } - } - /* Precompute checksum seed for all metadata */ - if (jbd2_journal_has_csum_v2or3(journal)) + /* Precompute checksum seed for all metadata */ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); + } } /* If enabling v1 checksums, downgrade superblock */ diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 13db95f54176..56598742dde4 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -#ifdef LOCKD_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 5228f201d3d5..77fec6a55f57 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) loff_t offset = header->args.offset; size_t count = header->args.count; struct page **pages = header->args.pages; - int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; + int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; unsigned int pg_len; struct blk_plug plug; int i; @@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) /* Optimize common case that writes from 0 to end of file */ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); - if (end != NFS_I(inode)->npages) { + if (end != inode->i_mapping->nrpages) { rcu_read_lock(); end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index e966c023b1b7..acbf9ca4018c 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); + mutex_lock(&nn->bl_mutex); bl_pipe_msg.bl_wq = &nn->bl_wq; b->simple.len += 4; /* single volume */ if (b->simple.len > PAGE_SIZE) - return -EIO; + goto out_unlock; memset(msg, 0, sizeof(*msg)); msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out; + goto out_free_data; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT, @@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, rc = rpc_queue_upcall(nn->bl_device_pipe, msg); if (rc < 0) { remove_wait_queue(&nn->bl_wq, &wq); - goto out; + goto out_free_data; } set_current_state(TASK_UNINTERRUPTIBLE); @@ -97,12 +98,14 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, if (reply->status != BL_DEVICE_REQUEST_PROC) { printk(KERN_WARNING "%s failed to decode device: %d\n", __func__, reply->status); - goto out; + goto out_free_data; } dev = MKDEV(reply->major, reply->minor); -out: +out_free_data: kfree(msg->data); +out_unlock: + mutex_unlock(&nn->bl_mutex); return dev; } @@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); struct dentry *dentry; + mutex_init(&nn->bl_mutex); init_waitqueue_head(&nn->bl_wq); nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); if (IS_ERR(nn->bl_device_pipe)) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b934090..e36a9d78ea49 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->npages != 0) + if (nfsi->nrequests != 0) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5853f53db732..7f3f60641344 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -125,6 +125,8 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; get_nfs_open_context(ctx); @@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * { int res = 0; - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); + if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + res = nfs4_proc_delegreturn(inode, + delegation->cred, + &delegation->stateid, + issync); nfs_free_delegation(delegation); return res; } @@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - int err; + int err = 0; if (delegation == NULL) return 0; do { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + break; err = nfs_delegation_claim_opens(inode, &delegation->stateid); if (!issync || err != -EAGAIN) break; @@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } +static void nfs_revoke_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + nfs_mark_return_delegation(NFS_SERVER(inode), delegation); + } + rcu_read_unlock(); +} + void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; + nfs_revoke_delegation(inode); delegation = nfs_inode_detach_delegation(inode); if (delegation) { nfs_inode_find_state_and_recover(inode, &delegation->stateid); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5c1cce39297f..e3c20a3ccc93 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -31,6 +31,7 @@ enum { NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, + NFS_DELEGATION_REVOKED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 06e8cfcbb670..6e62155abf26 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, case -ENOENT: d_drop(dentry); d_add(dentry, NULL); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case -EISDIR: case -ENOTDIR: diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 20cffc830468..10bf07280f4a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); if (dreq->l_ctx != NULL) nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 46fab1cb455a..7afb52f6a25a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 9bb806a76d99..bfecac781f19 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) ifdebug(FACILITY) print_ds(ds); - if (ds->ds_clp) - nfs_put_client(ds->ds_clp); + nfs_put_client(ds->ds_clp); while (!list_empty(&ds->ds_addrs)) { da = list_first_entry(&ds->ds_addrs, diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3ef01f0ba0bc..d63bea8bbfbb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } return 1; @@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) BUG_ON(!PageLocked(page)); fscache_uncache_page(cookie, page); - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } /* @@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, case 0: /* read BIO submitted (page in fscache) */ dfprintk(FSCACHE, "NFS: readpage_from_fscache: BIO submitted\n"); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); return ret; case -ENOBUFS: /* inode not in cache */ case -ENODATA: /* page not in cache */ - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); return 1; default: dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); } return ret; } @@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) if (ret != 0) { fscache_uncache_page(nfs_i_fscache(inode), page); - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); } else { - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK); } } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6388a59f2add..4bffe637ea32 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode) nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_caches); void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { @@ -626,7 +627,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; - int err; + int err = 0; trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ @@ -1149,7 +1150,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) { + && nfsi->nrequests == 0) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1192,7 +1193,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) + if (cur_size != new_isize && nfsi->nrequests == 0) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } @@ -1619,7 +1620,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0) || new_isize > cur_isize) { + if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_REVAL_PAGECACHE; @@ -1784,7 +1785,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->npages = 0; + nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index c5832487c456..0cb806fbd4c4 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode, { this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); } +static inline void nfs_inc_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat) +{ + this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]); +} #endif static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index ef221fb8a183..f0e06e4acbef 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -19,6 +19,7 @@ struct nfs_net { struct rpc_pipe *bl_device_pipe; struct bl_dev_msg bl_mount_reply; wait_queue_head_t bl_wq; + struct mutex bl_mutex; struct list_head nfs_client_list; struct list_head nfs_volume_list; #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index d10333a197bf..7afb8947dfdf 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,8 @@ #define __LINUX_FS_NFS_NFS4_2_H /* nfs4.2proc.c */ +int nfs42_proc_allocate(struct file *, loff_t, loff_t); +int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); /* nfs4.2xdr.h */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0886f1db5917..cb170722769c 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -32,6 +32,81 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, return ret; } +static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + struct nfs42_falloc_args args = { + .falloc_fh = NFS_FH(inode), + .falloc_offset = offset, + .falloc_length = len, + }; + struct nfs42_falloc_res res; + struct nfs_server *server = NFS_SERVER(inode); + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); + if (status) + return status; + + return nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); +} + +static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_fallocate(msg, filep, offset, len); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; + return err; +} + +int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; + return err; +} + loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); @@ -50,7 +125,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) struct nfs_server *server = NFS_SERVER(inode); int status; - if (!(server->caps & NFS_CAP_SEEK)) + if (!nfs_server_capable(inode, NFS_CAP_SEEK)) return -ENOTSUPP; status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c90469b604b8..038a7e1521fa 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,15 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#define encode_fallocate_maxsz (encode_stateid_maxsz + \ + 2 /* offset */ + \ + 2 /* length */) +#define encode_allocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_allocate_maxsz (op_decode_hdr_maxsz) +#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -14,6 +23,18 @@ 2 /* offset */ + \ 2 /* length */) +#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_allocate_maxsz) +#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_allocate_maxsz) +#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_deallocate_maxsz) +#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_deallocate_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) @@ -22,6 +43,30 @@ decode_seek_maxsz) +static void encode_fallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + encode_nfs4_stateid(xdr, &args->falloc_stateid); + encode_uint64(xdr, args->falloc_offset); + encode_uint64(xdr, args->falloc_length); +} + +static void encode_allocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + +static void encode_deallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + static void encode_seek(struct xdr_stream *xdr, struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -33,6 +78,42 @@ static void encode_seek(struct xdr_stream *xdr, } /* + * Encode ALLOCATE request + */ +static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_allocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode DEALLOCATE request + */ +static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_deallocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* * Encode SEEK request */ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, @@ -50,6 +131,16 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_ALLOCATE); +} + +static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_DEALLOCATE); +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -73,6 +164,54 @@ out_overflow: } /* + * Decode ALLOCATE request + */ +static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_allocate(xdr, res); +out: + return status; +} + +/* + * Decode DEALLOCATE request + */ +static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_deallocate(xdr, res); +out: + return status; +} + +/* * Decode SEEK request */ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index be6cac37ea10..a08178764cf9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); /* nfs4proc.c */ +extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ffdb28d86cf8..03311259b0c4 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp) */ static int nfs4_init_callback(struct nfs_client *clp) { + struct rpc_xprt *xprt; int error; - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + if (error < 0) return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + return 0; } @@ -498,8 +495,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -517,8 +513,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); @@ -549,8 +544,7 @@ int nfs40_walk_client_list(struct nfs_client *new, /* No match found. The server lost our clientid */ out: - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); dprintk("NFS: <-- %s status = %d\n", __func__, status); return status; } @@ -641,8 +635,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -675,8 +668,7 @@ int nfs41_walk_client_list(struct nfs_client *new, /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c51fb4db9bfe..8b46389c4c5b 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -3,6 +3,8 @@ * * Copyright (C) 1992 Rick Sladkey */ +#include <linux/fs.h> +#include <linux/falloc.h> #include <linux/nfs_fs.h> #include "internal.h" #include "fscache.h" @@ -134,6 +136,32 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) return nfs_file_llseek(filep, offset, whence); } } + +static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + long ret; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))) + return -EOPNOTSUPP; + + ret = inode_newsize_ok(inode, offset + len); + if (ret < 0) + return ret; + + mutex_lock(&inode->i_mutex); + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = nfs42_proc_deallocate(filep, offset, len); + else + ret = nfs42_proc_allocate(filep, offset, len); + mutex_unlock(&inode->i_mutex); + + nfs_zap_caches(inode); + return ret; +} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { @@ -155,6 +183,9 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, +#ifdef CONFIG_NFS_V4_2 + .fallocate = nfs42_fallocate, +#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 405bd95c1f58..e7f8d5ff2581 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -158,8 +158,6 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; - case -NFS4ERR_ACCESS: - return -EACCES; case -NFS4ERR_FILE_OPEN: return -EBUSY; default: @@ -344,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; @@ -370,11 +368,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { - nfs_remove_bad_delegation(inode); - exception->retry = 1; - break; - } if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); @@ -1654,7 +1647,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - return 0; + return -EAGAIN; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: set_bit(NFS_DELEGATED_STATE, &state->flags); @@ -2109,46 +2102,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) +{ + nfs_remove_bad_delegation(state->inode); + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + write_sequnlock(&state->seqlock); + clear_bit(NFS_DELEGATED_STATE, &state->flags); +} + +static void nfs40_clear_delegation_stateid(struct nfs4_state *state) +{ + if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) + nfs_finish_clear_delegation_stateid(state); +} + +static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + /* NFSv4.0 doesn't allow for delegation recovery on open expire */ + nfs40_clear_delegation_stateid(state); + return nfs4_open_expired(sp, state); +} + #if defined(CONFIG_NFS_V4_1) -static void nfs41_clear_delegation_stateid(struct nfs4_state *state) +static void nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - nfs4_stateid *stateid = &state->stateid; + nfs4_stateid stateid; struct nfs_delegation *delegation; - struct rpc_cred *cred = NULL; - int status = -NFS4ERR_BAD_STATEID; - - /* If a state reset has been done, test_stateid is unneeded */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - return; + struct rpc_cred *cred; + int status; /* Get the delegation credential for use by test/free_stateid */ rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && - nfs4_stateid_match(&delegation->stateid, stateid)) { - cred = get_rpccred(delegation->cred); - rcu_read_unlock(); - status = nfs41_test_stateid(server, stateid, cred); - trace_nfs4_test_delegation_stateid(state, NULL, status); - } else + if (delegation == NULL) { rcu_read_unlock(); + return; + } + + nfs4_stateid_copy(&stateid, &delegation->stateid); + cred = get_rpccred(delegation->cred); + rcu_read_unlock(); + status = nfs41_test_stateid(server, &stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - nfs_remove_bad_delegation(state->inode); - - write_seqlock(&state->seqlock); - nfs4_stateid_copy(&state->stateid, &state->open_stateid); - write_sequnlock(&state->seqlock); - clear_bit(NFS_DELEGATED_STATE, &state->flags); + nfs41_free_stateid(server, &stateid, cred); + nfs_finish_clear_delegation_stateid(state); } - if (cred != NULL) - put_rpccred(cred); + put_rpccred(cred); } /** @@ -2192,7 +2199,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st { int status; - nfs41_clear_delegation_stateid(state); + nfs41_check_delegation_stateid(state); status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -2231,19 +2238,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); ret = _nfs4_proc_open(opendata); - if (ret != 0) { - if (ret == -ENOENT) { - dentry = opendata->dentry; - if (dentry->d_inode) - d_delete(dentry); - else if (d_unhashed(dentry)) - d_add(dentry, NULL); - - nfs_set_verifier(dentry, - nfs_save_change_attribute(opendata->dir->d_inode)); - } + if (ret != 0) goto out; - } state = nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); @@ -4841,9 +4837,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -7709,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7721,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); @@ -8341,7 +8334,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, + .recover_open = nfs40_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, }; @@ -8408,8 +8401,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1 - | NFS_CAP_SEEK, + | NFS_CAP_ATOMIC_OPEN_V1, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -8431,7 +8423,10 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_ALLOCATE + | NFS_CAP_DEALLOCATE + | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 206c08a60c7f..cb4376b78ed9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ 1 /* sc_prog */ + \ - XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ - XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ 1) /* sc_cb_ident */ #define decode_setclientid_maxsz \ (op_decode_hdr_maxsz + \ - 2 + \ - 1024) /* large value for CLID_INUSE */ + 2 /* clientid */ + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) #define encode_setclientid_confirm_maxsz \ (op_encode_hdr_maxsz + \ 3 + (NFS4_VERIFIER_SIZE >> 2)) @@ -7394,6 +7396,8 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 PROC(SEEK, enc_seek, dec_seek), + PROC(ALLOCATE, enc_allocate, dec_allocate), + PROC(DEALLOCATE, enc_deallocate, dec_deallocate), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f8543..2b5e769beb16 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) static inline void nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) { + struct inode *inode; WARN_ON_ONCE(prev == req); if (!prev) { @@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * nfs_page_group_destroy is called */ kref_get(&req->wb_head->wb_kref); - /* grab extra ref if head request has extra ref from - * the write/commit path to handle handoff between write - * and commit lists */ + /* grab extra ref and bump the request count if head request + * has extra ref from the write/commit path to handle handoff + * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + spin_lock(&inode->i_lock); + NFS_I(inode)->nrequests++; + spin_unlock(&inode->i_lock); } } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index beff2769c5c5..c91a4799c562 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - nfs_add_stats(inode, NFSIOS_READPAGES, 1); + nfs_inc_stats(inode, NFSIOS_READPAGES); /* * Try to flush any pending writes to the file.. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 12493846a2d3..af3af685a9e3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + nfs_inc_stats(inode, NFSIOS_WRITEPAGES); nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); @@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (!nfsi->nrequests && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race @@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->npages++; + nfsi->nrequests++; /* this a head request for a page group - mark it as having an - * extra reference so sub groups can follow suit */ + * extra reference so sub groups can follow suit. + * This flag also informs pgio layer when to bump nrequests when + * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -709,14 +712,16 @@ static void nfs_inode_remove_request(struct nfs_page *req) wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->npages--; + nfsi->nrequests--; + spin_unlock(&inode->i_lock); + } else { + spin_lock(&inode->i_lock); + nfsi->nrequests--; spin_unlock(&inode->i_lock); } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); - else - WARN_ON_ONCE(1); } static void @@ -1737,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ed2b1151b171..7cbdf1b2e4ab 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -774,8 +774,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) { if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); - dprintk("%s slot is busy\n", __func__); - return false; + /* Race breaker */ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + dprintk("%s slot is busy\n", __func__); + return false; + } + rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } return true; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 747f3b95bd11..33a46a8dfaf7 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -335,12 +335,15 @@ void nfsd_lockd_shutdown(void); (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ - (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL #else -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#define NFSD4_2_SECURITY_ATTRS 0 #endif +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + NFSD4_2_SECURITY_ATTRS) + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9d3e9c50066a..89326acd4561 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, &fsnotify_mark_srcu); } + /* + * We need to merge inode & vfsmount mark lists so that inode mark + * ignore masks are properly reflected for mount mark notifications. + * That's why this traversal is so complicated... + */ while (inode_node || vfsmount_node) { - inode_group = vfsmount_group = NULL; + inode_group = NULL; + inode_mark = NULL; + vfsmount_group = NULL; + vfsmount_mark = NULL; if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), @@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, vfsmount_group = vfsmount_mark->group; } - if (inode_group > vfsmount_group) { - /* handle inode */ - ret = send_to_group(to_tell, inode_mark, NULL, mask, - data, data_is, cookie, file_name); - /* we didn't use the vfsmount_mark */ - vfsmount_group = NULL; - } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, - data, data_is, cookie, file_name); - inode_group = NULL; - } else { - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, - mask, data, data_is, cookie, - file_name); + if (inode_group && vfsmount_group) { + int cmp = fsnotify_compare_groups(inode_group, + vfsmount_group); + if (cmp > 0) { + inode_group = NULL; + inode_mark = NULL; + } else if (cmp < 0) { + vfsmount_group = NULL; + vfsmount_mark = NULL; + } } + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, + data, data_is, cookie, file_name); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c4cfe1..3b68b0ae0a97 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; +/* compare two groups for sorting of marks lists */ +extern int fsnotify_compare_groups(struct fsnotify_group *a, + struct fsnotify_group *b); + extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, __u32 mask); /* add a mark to an inode */ diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index e8497144b323..dfbf5447eea4 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, { struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index daf76652fe58..283aa312d745 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, struct fsnotify_event *kevent; char __user *start; int ret; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; + add_wait_queue(&group->notification_waitq, &wait); while (1) { - prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&group->notification_mutex); kevent = get_one_event(group, count); mutex_unlock(&group->notification_mutex); @@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf, if (start != buf) break; - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } + remove_wait_queue(&group->notification_waitq, &wait); - finish_wait(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa08e78..34c38fabf514 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas } /* + * Sorting function for lists of fsnotify marks. + * + * Fanotify supports different notification classes (reflected as priority of + * notification group). Events shall be passed to notification groups in + * decreasing priority order. To achieve this marks in notification lists for + * inodes and vfsmounts are sorted so that priorities of corresponding groups + * are descending. + * + * Furthermore correct handling of the ignore mask requires processing inode + * and vfsmount marks of each group together. Using the group address as + * further sort criterion provides a unique sorting order and thus we can + * merge inode and vfsmount lists of marks in linear time and find groups + * present in both lists. + * + * A return value of 1 signifies that b has priority over a. + * A return value of 0 signifies that the two marks have to be handled together. + * A return value of -1 signifies that a has priority over b. + */ +int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) +{ + if (a == b) + return 0; + if (!a) + return 1; + if (!b) + return -1; + if (a->priority < b->priority) + return 1; + if (a->priority > b->priority) + return -1; + if (a < b) + return 1; + return -1; +} + +/* * Attach an initialized mark to a given group and fs object. * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8376b1..faefa72a11eb 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, struct mount *m = real_mount(mnt); struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 97de0fbd9f78..a96044004064 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, size_t veclen, size_t total) { int ret; - struct msghdr msg; + struct msghdr msg = {.msg_flags = 0,}; if (sock == NULL) { ret = -EINVAL; diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index e60125976873..34355818a2e0 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,4 +1,4 @@ -config OVERLAYFS_FS +config OVERLAY_FS tristate "Overlay filesystem support" help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 8f91889480d0..900daed3e91d 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -2,6 +2,6 @@ # Makefile for the overlay filesystem. # -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o +obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 15cd91ad9940..8ffc4b980f1b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -284,8 +284,7 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, - enum ovl_path_type type) +static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) { int err; struct dentry *ret = NULL; @@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, err = ovl_check_empty_dir(dentry, &list); if (err) ret = ERR_PTR(err); - else if (type == OVL_PATH_MERGE) - ret = ovl_clear_empty(dentry, &list); + else { + /* + * If no upperdentry then skip clearing whiteouts. + * + * Can race with copy-up, since we don't hold the upperdir + * mutex. Doesn't matter, since copy-up can't create a + * non-empty directory from an empty one. + */ + if (ovl_dentry_upper(dentry)) + ret = ovl_clear_empty(dentry, &list); + } ovl_cache_free(&list); @@ -487,8 +495,7 @@ out: return err; } -static int ovl_remove_and_whiteout(struct dentry *dentry, - enum ovl_path_type type, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; @@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, int err; if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry, type); + opaquedir = ovl_check_empty_and_clear(dentry); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, if (IS_ERR(whiteout)) goto out_unlock; - if (type == OVL_PATH_LOWER) { + upper = ovl_dentry_upper(dentry); + if (!upper) { upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto kill_whiteout; @@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, } else { int flags = 0; - upper = ovl_dentry_upper(dentry); if (opaquedir) upper = opaquedir; err = -ESTALE; @@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) cap_raise(override_cred->cap_effective, CAP_CHOWN); old_cred = override_creds(override_cred); - err = ovl_remove_and_whiteout(dentry, type, is_dir); + err = ovl_remove_and_whiteout(dentry, is_dir); revert_creds(old_cred); put_cred(override_cred); @@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, } if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { - opaquedir = ovl_check_empty_and_clear(new, new_type); + opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index af2d18c9fcee..07d74b24913b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -235,26 +235,36 @@ out: return err; } +static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) +{ + return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); +} + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); + return vfs_getxattr(realpath.dentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); ssize_t res; int off; - res = vfs_listxattr(ovl_dentry_real(dentry), list, size); + res = vfs_listxattr(realpath.dentry, list, size); if (res <= 0 || size == 0) return res; - if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) + if (!ovl_need_xattr_filter(dentry, type)) return res; /* filter out private xattrs */ @@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) { int err; struct path realpath; - enum ovl_path_type type; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); err = ovl_want_write(dentry); if (err) goto out; - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + err = -ENODATA; + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) goto out_drop_write; - type = ovl_path_real(dentry, &realpath); if (type == OVL_PATH_LOWER) { err = vfs_getxattr(realpath.dentry, name, NULL, 0); if (err < 0) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 4e9d7c1fea52..ab1e3dcbed95 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) { struct ovl_dir_cache *cache = od->cache; - list_del(&od->cursor.l_node); + list_del_init(&od->cursor.l_node); WARN_ON(cache->refcount <= 0); cache->refcount--; if (!cache->refcount) { @@ -274,11 +274,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, return 0; } -static inline int ovl_dir_read_merged(struct path *upperpath, - struct path *lowerpath, - struct list_head *list) +static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) { int err; + struct path lowerpath; + struct path upperpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, .list = list, @@ -286,25 +286,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, .is_merge = false, }; - if (upperpath->dentry) { - err = ovl_dir_read(upperpath, &rdd); + ovl_path_lower(dentry, &lowerpath); + ovl_path_upper(dentry, &upperpath); + + if (upperpath.dentry) { + err = ovl_dir_read(&upperpath, &rdd); if (err) goto out; - if (lowerpath->dentry) { - err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); + if (lowerpath.dentry) { + err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); if (err) goto out; } } - if (lowerpath->dentry) { + if (lowerpath.dentry) { /* * Insert lowerpath entries before upperpath ones, this allows * offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); rdd.is_merge = true; - err = ovl_dir_read(lowerpath, &rdd); + err = ovl_dir_read(&lowerpath, &rdd); list_del(&rdd.middle); } out: @@ -329,8 +332,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) { int res; - struct path lowerpath; - struct path upperpath; struct ovl_dir_cache *cache; cache = ovl_dir_cache(dentry); @@ -347,10 +348,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) cache->refcount = 1; INIT_LIST_HEAD(&cache->entries); - ovl_path_lower(dentry, &lowerpath); - ovl_path_upper(dentry, &upperpath); - - res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); + res = ovl_dir_read_merged(dentry, &cache->entries); if (res) { ovl_cache_free(&cache->entries); kfree(cache); @@ -452,10 +450,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { struct inode *inode = file_inode(file); - realfile =lockless_dereference(od->upperfile); + realfile = lockless_dereference(od->upperfile); if (!realfile) { struct path upperpath; @@ -538,14 +536,9 @@ const struct file_operations ovl_dir_operations = { int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) { int err; - struct path lowerpath; - struct path upperpath; struct ovl_cache_entry *p; - ovl_path_upper(dentry, &upperpath); - ovl_path_lower(dentry, &lowerpath); - - err = ovl_dir_read_merged(&upperpath, &lowerpath, list); + err = ovl_dir_read_merged(dentry, list); if (err) return err; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 08b704cebfc4..f16d318b71f8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); -#define OVERLAYFS_SUPER_MAGIC 0x794c764f +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 struct ovl_config { char *lowerdir; @@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) { - struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); - /* - * Make sure to order reads to upperdentry wrt ovl_dentry_update() - */ - smp_read_barrier_depends(); - return upperdentry; + return lockless_dereference(oe->__upperdentry); } void ovl_path_upper(struct dentry *dentry, struct path *path) @@ -462,11 +457,34 @@ static const match_table_t ovl_tokens = { {OPT_ERR, NULL} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; - while ((p = strsep(&opt, ",")) != NULL) { + while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -554,15 +572,34 @@ out_dput: goto out_unlock; } +static void ovl_unescape(char *s) +{ + char *d = s; + + for (;; s++, d++) { + if (*s == '\\') + s++; + *d = *s; + if (!*s) + break; + } +} + static int ovl_mount_dir(const char *name, struct path *path) { int err; + char *tmp = kstrdup(name, GFP_KERNEL); + + if (!tmp) + return -ENOMEM; - err = kern_path(name, LOOKUP_FOLLOW, path); + ovl_unescape(tmp); + err = kern_path(tmp, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); err = -EINVAL; } + kfree(tmp); return err; } @@ -776,11 +813,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, static struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, - .name = "overlayfs", + .name = "overlay", .mount = ovl_mount, .kill_sb = kill_anon_super, }; -MODULE_ALIAS_FS("overlayfs"); +MODULE_ALIAS_FS("overlay"); static int __init ovl_init(void) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4e0388cffe3d..f6734c6b66a6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -552,6 +552,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", +#ifdef CONFIG_X86_INTEL_MPX + [ilog2(VM_MPX)] = "mp", +#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 92e8f99a5857..281002689d64 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1338,7 +1338,10 @@ xfs_free_file_space( goto out; } - +/* + * Preallocate and zero a range of a file. This mechanism has the allocation + * semantics of fallocate and in addition converts data in the range to zeroes. + */ int xfs_zero_file_space( struct xfs_inode *ip, @@ -1346,65 +1349,30 @@ xfs_zero_file_space( xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; - uint granularity; - xfs_off_t start_boundary; - xfs_off_t end_boundary; + uint blksize; int error; trace_xfs_zero_file_space(ip); - granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); + blksize = 1 << mp->m_sb.sb_blocklog; /* - * Round the range of extents we are going to convert inwards. If the - * offset is aligned, then it doesn't get changed so we zero from the - * start of the block offset points to. + * Punch a hole and prealloc the range. We use hole punch rather than + * unwritten extent conversion for two reasons: + * + * 1.) Hole punch handles partial block zeroing for us. + * + * 2.) If prealloc returns ENOSPC, the file range is still zero-valued + * by virtue of the hole punch. */ - start_boundary = round_up(offset, granularity); - end_boundary = round_down(offset + len, granularity); - - ASSERT(start_boundary >= offset); - ASSERT(end_boundary <= offset + len); - - if (start_boundary < end_boundary - 1) { - /* - * Writeback the range to ensure any inode size updates due to - * appending writes make it to disk (otherwise we could just - * punch out the delalloc blocks). - */ - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - start_boundary, end_boundary - 1); - if (error) - goto out; - truncate_pagecache_range(VFS_I(ip), start_boundary, - end_boundary - 1); - - /* convert the blocks */ - error = xfs_alloc_file_space(ip, start_boundary, - end_boundary - start_boundary - 1, - XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); - if (error) - goto out; - - /* We've handled the interior of the range, now for the edges */ - if (start_boundary != offset) { - error = xfs_iozero(ip, offset, start_boundary - offset); - if (error) - goto out; - } - - if (end_boundary != offset + len) - error = xfs_iozero(ip, end_boundary, - offset + len - end_boundary); - - } else { - /* - * It's either a sub-granularity range or the range spanned lies - * partially across two adjacent blocks. - */ - error = xfs_iozero(ip, offset, len); - } + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out; + error = xfs_alloc_file_space(ip, round_down(offset, blksize), + round_up(offset + len, blksize) - + round_down(offset, blksize), + XFS_BMAPI_PREALLOC); out: return error; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f1deb961a296..894924a5129b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk( XFS_WANT_CORRUPTED_RETURN(stat == 1); /* Check if the record contains the inode in request */ - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) - return -EINVAL; + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { + *icount = 0; + return 0; + } idx = agino - irec->ir_startino + 1; if (idx < XFS_INODES_PER_CHUNK && @@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk( #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) +struct xfs_bulkstat_agichunk { + char __user **ac_ubuffer;/* pointer into user's buffer */ + int ac_ubleft; /* bytes left in user's buffer */ + int ac_ubelem; /* spaces used in user's buffer */ +}; + /* * Process inodes in chunk with a pointer to a formatter function * that will iget the inode and fill in the appropriate structure. */ -int +static int xfs_bulkstat_ag_ichunk( struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_inobt_rec_incore *irbp, bulkstat_one_pf formatter, size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp) + struct xfs_bulkstat_agichunk *acp, + xfs_agino_t *last_agino) { - xfs_ino_t lastino = acp->ac_lastino; char __user **ubufp = acp->ac_ubuffer; - int ubleft = acp->ac_ubleft; - int ubelem = acp->ac_ubelem; - int chunkidx, clustidx; + int chunkidx; int error = 0; - xfs_agino_t agino; + xfs_agino_t agino = irbp->ir_startino; - for (agino = irbp->ir_startino, chunkidx = clustidx = 0; - XFS_BULKSTAT_UBLEFT(ubleft) && - irbp->ir_freecount < XFS_INODES_PER_CHUNK; - chunkidx++, clustidx++, agino++) { - int fmterror; /* bulkstat formatter result */ + for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; + chunkidx++, agino++) { + int fmterror; int ubused; - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); - ASSERT(chunkidx < XFS_INODES_PER_CHUNK); + /* inode won't fit in buffer, we are done */ + if (acp->ac_ubleft < statstruct_size) + break; /* Skip if this inode is free */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { - lastino = ino; + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) continue; - } - - /* - * Count used inodes as free so we can tell when the - * chunk is used up. - */ - irbp->ir_freecount++; /* Get the inode and fill in a single buffer */ ubused = statstruct_size; - error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); - if (fmterror == BULKSTAT_RV_NOTHING) { - if (error && error != -ENOENT && error != -EINVAL) { - ubleft = 0; - break; - } - lastino = ino; - continue; - } - if (fmterror == BULKSTAT_RV_GIVEUP) { - ubleft = 0; + error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), + *ubufp, acp->ac_ubleft, &ubused, &fmterror); + + if (fmterror == BULKSTAT_RV_GIVEUP || + (error && error != -ENOENT && error != -EINVAL)) { + acp->ac_ubleft = 0; ASSERT(error); break; } - if (*ubufp) - *ubufp += ubused; - ubleft -= ubused; - ubelem++; - lastino = ino; + + /* be careful not to leak error if at end of chunk */ + if (fmterror == BULKSTAT_RV_NOTHING || error) { + error = 0; + continue; + } + + *ubufp += ubused; + acp->ac_ubleft -= ubused; + acp->ac_ubelem++; } - acp->ac_lastino = lastino; - acp->ac_ubleft = ubleft; - acp->ac_ubelem = ubelem; + /* + * Post-update *last_agino. At this point, agino will always point one + * inode past the last inode we processed successfully. Hence we + * substract that inode when setting the *last_agino cursor so that we + * return the correct cookie to userspace. On the next bulkstat call, + * the inode under the lastino cookie will be skipped as we have already + * processed it here. + */ + *last_agino = agino - 1; return error; } @@ -353,45 +356,33 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - int end_of_ag; /* set if we've seen the ag end */ - int error; /* error code */ - int fmterror;/* bulkstat formatter result */ - int i; /* loop index */ - int icount; /* count of inodes good in irbuf */ size_t irbsize; /* size of irec buffer in bytes */ - xfs_ino_t ino; /* inode number (filesystem) */ - xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ - xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ - xfs_ino_t lastino; /* last inode number returned */ int nirbuf; /* size of irbuf */ - int rval; /* return value error code */ - int tmp; /* result value from btree calls */ int ubcount; /* size of user's buffer */ - int ubleft; /* bytes left in user's buffer */ - char __user *ubufp; /* pointer into user's buffer */ - int ubelem; /* spaces used in user's buffer */ + struct xfs_bulkstat_agichunk ac; + int error = 0; /* * Get the last inode value, see if there's nothing to do. */ - ino = (xfs_ino_t)*lastinop; - lastino = ino; - agno = XFS_INO_TO_AGNO(mp, ino); - agino = XFS_INO_TO_AGINO(mp, ino); + agno = XFS_INO_TO_AGNO(mp, *lastinop); + agino = XFS_INO_TO_AGINO(mp, *lastinop); if (agno >= mp->m_sb.sb_agcount || - ino != XFS_AGINO_TO_INO(mp, agno, agino)) { + *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { *done = 1; *ubcountp = 0; return 0; } ubcount = *ubcountp; /* statstruct's */ - ubleft = ubcount * statstruct_size; /* bytes */ - *ubcountp = ubelem = 0; + ac.ac_ubuffer = &ubuffer; + ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; + ac.ac_ubelem = 0; + + *ubcountp = 0; *done = 0; - fmterror = 0; - ubufp = ubuffer; + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); if (!irbuf) return -ENOMEM; @@ -402,9 +393,13 @@ xfs_bulkstat( * Loop over the allocation groups, starting from the last * inode returned; 0 means start of the allocation group. */ - rval = 0; - while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { - cond_resched(); + while (agno < mp->m_sb.sb_agcount) { + struct xfs_inobt_rec_incore *irbp = irbuf; + struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; + bool end_of_ag = false; + int icount = 0; + int stat; + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); if (error) break; @@ -414,10 +409,6 @@ xfs_bulkstat( */ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO); - irbp = irbuf; - irbufend = irbuf + nirbuf; - end_of_ag = 0; - icount = 0; if (agino > 0) { /* * In the middle of an allocation group, we need to get @@ -427,22 +418,23 @@ xfs_bulkstat( error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); if (error) - break; + goto del_cursor; if (icount) { irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; - agino = r.ir_startino + XFS_INODES_PER_CHUNK; } /* Increment to the next record */ - error = xfs_btree_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &stat); } else { /* Start of ag. Lookup the first inode chunk */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); + } + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; } - if (error) - break; /* * Loop through inode btree records in this ag, @@ -451,10 +443,10 @@ xfs_bulkstat( while (irbp < irbufend && icount < ubcount) { struct xfs_inobt_rec_incore r; - error = xfs_inobt_get_rec(cur, &r, &i); - if (error || i == 0) { - end_of_ag = 1; - break; + error = xfs_inobt_get_rec(cur, &r, &stat); + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; } /* @@ -469,77 +461,79 @@ xfs_bulkstat( irbp++; icount += XFS_INODES_PER_CHUNK - r.ir_freecount; } - /* - * Set agino to after this chunk and bump the cursor. - */ - agino = r.ir_startino + XFS_INODES_PER_CHUNK; - error = xfs_btree_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &stat); + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; + } cond_resched(); } + /* - * Drop the btree buffers and the agi buffer. - * We can't hold any of the locks these represent - * when calling iget. + * Drop the btree buffers and the agi buffer as we can't hold any + * of the locks these represent when calling iget. If there is a + * pending error, then we are done. */ +del_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_buf_relse(agbp); + if (error) + break; /* - * Now format all the good inodes into the user's buffer. + * Now format all the good inodes into the user's buffer. The + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer + * for the next loop iteration. */ irbufend = irbp; for (irbp = irbuf; - irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { - struct xfs_bulkstat_agichunk ac; - - ac.ac_lastino = lastino; - ac.ac_ubuffer = &ubuffer; - ac.ac_ubleft = ubleft; - ac.ac_ubelem = ubelem; + irbp < irbufend && ac.ac_ubleft >= statstruct_size; + irbp++) { error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, - formatter, statstruct_size, &ac); + formatter, statstruct_size, &ac, + &agino); if (error) - rval = error; - - lastino = ac.ac_lastino; - ubleft = ac.ac_ubleft; - ubelem = ac.ac_ubelem; + break; cond_resched(); } + /* - * Set up for the next loop iteration. + * If we've run out of space or had a formatting error, we + * are now done */ - if (XFS_BULKSTAT_UBLEFT(ubleft)) { - if (end_of_ag) { - agno++; - agino = 0; - } else - agino = XFS_INO_TO_AGINO(mp, lastino); - } else + if (ac.ac_ubleft < statstruct_size || error) break; + + if (end_of_ag) { + agno++; + agino = 0; + } } /* * Done, we're either out of filesystem or space to put the data. */ kmem_free(irbuf); - *ubcountp = ubelem; + *ubcountp = ac.ac_ubelem; + /* - * Found some inodes, return them now and return the error next time. + * We found some inodes, so clear the error status and return them. + * The lastino pointer will point directly at the inode that triggered + * any error that occurred, so on the next call the error will be + * triggered again and propagated to userspace as there will be no + * formatted inodes in the buffer. */ - if (ubelem) - rval = 0; - if (agno >= mp->m_sb.sb_agcount) { - /* - * If we ran out of filesystem, mark lastino as off - * the end of the filesystem, so the next call - * will return immediately. - */ - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); + if (ac.ac_ubelem) + error = 0; + + /* + * If we ran out of filesystem, lastino will point off the end of + * the filesystem so the next call will return immediately. + */ + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); + if (agno >= mp->m_sb.sb_agcount) *done = 1; - } else - *lastinop = (xfs_ino_t)lastino; - return rval; + return error; } int diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index aaed08022eb9..6ea8b3912fa4 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, int *ubused, int *stat); -struct xfs_bulkstat_agichunk { - xfs_ino_t ac_lastino; /* last inode returned */ - char __user **ac_ubuffer;/* pointer into user's buffer */ - int ac_ubleft; /* bytes left in user's buffer */ - int ac_ubelem; /* spaces used in user's buffer */ -}; - -int -xfs_bulkstat_ag_ichunk( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irbp, - bulkstat_one_pf formatter, - size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp); - /* * Values for stat return value. */ |