diff options
Diffstat (limited to 'fs')
257 files changed, 10095 insertions, 6673 deletions
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f039b104a98e..b03dd23feda8 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -43,23 +43,6 @@ #include "fid.h" /** - * v9fs_dentry_delete - called when dentry refcount equals 0 - * @dentry: dentry in question - * - * By returning 1 here we should remove cacheing of unused - * dentry components. - * - */ - -static int v9fs_dentry_delete(const struct dentry *dentry) -{ - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); - - return 1; -} - -/** * v9fs_cached_dentry_delete - called when dentry refcount equals 0 * @dentry: dentry in question * @@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_delete = v9fs_dentry_delete, + .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, }; @@ -80,6 +80,8 @@ struct kioctx { struct percpu_ref users; atomic_t dead; + struct percpu_ref reqs; + unsigned long user_id; struct __percpu kioctx_cpu *cpu; @@ -107,7 +109,6 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct rcu_head rcu_head; struct work_struct free_work; struct { @@ -163,8 +164,8 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) struct file *file; struct path path; struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); - if (!inode) - return ERR_PTR(-ENOMEM); + if (IS_ERR(inode)) + return ERR_CAST(inode); inode->i_mapping->a_ops = &aio_ctx_aops; inode->i_mapping->private_data = ctx; @@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx) put_aio_ring_file(ctx); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { kfree(ctx->ring_pages); + ctx->ring_pages = NULL; + } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) return cancel(kiocb); } -static void free_ioctx_rcu(struct rcu_head *head) +static void free_ioctx(struct work_struct *work) { - struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + struct kioctx *ctx = container_of(work, struct kioctx, free_work); + + pr_debug("freeing %p\n", ctx); + aio_free_ring(ctx); free_percpu(ctx->cpu); kmem_cache_free(kioctx_cachep, ctx); } +static void free_ioctx_reqs(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + /* * When this function runs, the kioctx has been removed from the "hash table" * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx(struct work_struct *work) +static void free_ioctx_users(struct percpu_ref *ref) { - struct kioctx *ctx = container_of(work, struct kioctx, free_work); - struct aio_ring *ring; + struct kioctx *ctx = container_of(ref, struct kioctx, users); struct kiocb *req; - unsigned cpu, avail; - DEFINE_WAIT(wait); spin_lock_irq(&ctx->ctx_lock); @@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work) spin_unlock_irq(&ctx->ctx_lock); - for_each_possible_cpu(cpu) { - struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); - - atomic_add(kcpu->reqs_available, &ctx->reqs_available); - kcpu->reqs_available = 0; - } - - while (1) { - prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); - - ring = kmap_atomic(ctx->ring_pages[0]); - avail = (ring->head <= ring->tail) - ? ring->tail - ring->head - : ctx->nr_events - ring->head + ring->tail; - - atomic_add(avail, &ctx->reqs_available); - ring->head = ring->tail; - kunmap_atomic(ring); - - if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) - break; - - schedule(); - } - finish_wait(&ctx->wait, &wait); - - WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); - - aio_free_ring(ctx); - - pr_debug("freeing %p\n", ctx); - - /* - * Here the call_rcu() is between the wait_event() for reqs_active to - * hit 0, and freeing the ioctx. - * - * aio_complete() decrements reqs_active, but it has to touch the ioctx - * after to issue a wakeup so we use rcu. - */ - call_rcu(&ctx->rcu_head, free_ioctx_rcu); -} - -static void free_ioctx_ref(struct percpu_ref *ref) -{ - struct kioctx *ctx = container_of(ref, struct kioctx, users); - - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + percpu_ref_kill(&ctx->reqs); + percpu_ref_put(&ctx->reqs); } static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) @@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) } } +static void aio_nr_sub(unsigned nr) +{ + spin_lock(&aio_nr_lock); + if (WARN_ON(aio_nr - nr > aio_nr)) + aio_nr = 0; + else + aio_nr -= nr; + spin_unlock(&aio_nr_lock); +} + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->max_reqs = nr_events; - if (percpu_ref_init(&ctx->users, free_ioctx_ref)) - goto out_freectx; + if (percpu_ref_init(&ctx->users, free_ioctx_users)) + goto err; + + if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) + goto err; spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); @@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->cpu = alloc_percpu(struct kioctx_cpu); if (!ctx->cpu) - goto out_freeref; + goto err; if (aio_setup_ring(ctx) < 0) - goto out_freepcpu; + goto err; atomic_set(&ctx->reqs_available, ctx->nr_events - 1); ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); @@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (aio_nr + nr_events > (aio_max_nr * 2UL) || aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); - goto out_cleanup; + err = -EAGAIN; + goto err; } aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); @@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err = ioctx_add_table(ctx, mm); if (err) - goto out_cleanup_put; + goto err_cleanup; pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; -out_cleanup_put: - percpu_ref_put(&ctx->users); -out_cleanup: - err = -EAGAIN; - aio_free_ring(ctx); -out_freepcpu: +err_cleanup: + aio_nr_sub(ctx->max_reqs); +err: free_percpu(ctx->cpu); -out_freeref: + free_percpu(ctx->reqs.pcpu_count); free_percpu(ctx->users.pcpu_count); -out_freectx: - put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); @@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) * -EAGAIN with no ioctxs actually in use (as far as userspace * could tell). */ - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - ctx->max_reqs > aio_nr); - aio_nr -= ctx->max_reqs; - spin_unlock(&aio_nr_lock); + aio_nr_sub(ctx->max_reqs); if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); @@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) if (unlikely(!req)) goto out_put; + percpu_ref_get(&ctx->reqs); + req->ki_ctx = ctx; return req; out_put: @@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) return; } - /* - * Take rcu_read_lock() in case the kioctx is being destroyed, as we - * need to issue a wakeup after incrementing reqs_available. - */ - rcu_read_lock(); - if (iocb->ki_list.next) { unsigned long flags; @@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - rcu_read_unlock(); + percpu_ref_put(&ctx->reqs); } EXPORT_SYMBOL(aio_complete); @@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return 0; out_put_req: put_reqs_available(ctx, 1); + percpu_ref_put(&ctx->reqs); kiocb_free(req); return ret; } @@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, - unsigned short max_sectors) + unsigned int max_sectors) { int retried_segments = 0; struct bio_vec *bvec; @@ -1805,6 +1805,52 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) EXPORT_SYMBOL(bio_split); /** + * bio_trim - trim a bio + * @bio: bio to trim + * @offset: number of sectors to trim from the front of @bio + * @size: size we want to trim @bio to, in sectors + */ +void bio_trim(struct bio *bio, int offset, int size) +{ + /* 'bio' is a cloned bio which we need to trim to match + * the given offset and size. + * This requires adjusting bi_sector, bi_size, and bi_io_vec + */ + int i; + struct bio_vec *bvec; + int sofar = 0; + + size <<= 9; + if (offset == 0 && size == bio->bi_size) + return; + + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + + bio_advance(bio, offset << 9); + + bio->bi_size = size; + + /* avoid any complications with bi_idx being non-zero*/ + if (bio->bi_idx) { + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); + bio->bi_vcnt -= bio->bi_idx; + bio->bi_idx = 0; + } + /* Make sure vcnt and last bv are not too big */ + bio_for_each_segment(bvec, bio, i) { + if (sofar + bvec->bv_len > size) + bvec->bv_len = size - sofar; + if (bvec->bv_len == 0) { + bio->bi_vcnt = i; + break; + } + sofar += bvec->bv_len; + } +} +EXPORT_SYMBOL_GPL(bio_trim); + +/** * bio_sector_offset - Find hardware sector offset in bio * @bio: bio to inspect * @index: bio_vec index diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 398cbd517be2..aa976eced2d2 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -9,12 +9,17 @@ config BTRFS_FS select XOR_BLOCKS help - Btrfs is a new filesystem with extents, writable snapshotting, - support for multiple devices and many more features. + Btrfs is a general purpose copy-on-write filesystem with extents, + writable snapshotting, support for multiple devices and many more + features focused on fault tolerance, repair and easy administration. - Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET - FINALIZED. You should say N here unless you are interested in - testing Btrfs with non-critical data. + The filesystem disk format is no longer unstable, and it's not + expected to change unless there are strong reasons to do so. If there + is a format change, file systems with a unchanged format will + continue to be mountable and usable by newer kernels. + + For more information, please see the web pages at + http://btrfs.wiki.kernel.org. To compile this file system support as a module, choose M here. The module will be called btrfs. @@ -59,7 +64,8 @@ config BTRFS_FS_RUN_SANITY_TESTS help This will run some basic sanity tests on the free space cache code to make sure it is acting as it should. These are mostly - regression tests and are only really interesting to btrfs devlopers. + regression tests and are only really interesting to btrfs + developers. If unsure, say N. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a91a6a355cc5..1a44e42d602a 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -14,4 +14,6 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o -btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o +btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ + tests/extent-buffer-tests.o tests/btrfs-tests.o \ + tests/extent-io-tests.o tests/inode-tests.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index e15d2b0d8d3b..0890c83643e9 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); - } else { + } else if (ret < 0) { cache_no_acl(inode); } } else { diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 08cc08f037a6..c1e0b0caf9cc 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -262,7 +262,7 @@ static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, struct btrfs_work *work = NULL; struct list_head *cur = NULL; - if(!list_empty(prio_head)) + if (!list_empty(prio_head)) cur = prio_head->next; smp_mb(); @@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) spin_lock_irq(&workers->lock); if (workers->stopping) { spin_unlock_irq(&workers->lock); + ret = -EINVAL; goto fail_kthread; } list_add_tail(&worker->worker_list, &workers->idle_list); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 0552a599b28f..3775947429b2 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -185,6 +185,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, { struct __prelim_ref *ref; + if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) + return 0; + ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -323,8 +326,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, eb = path->nodes[level]; while (!eb) { - if (!level) { - WARN_ON(1); + if (WARN_ON(!level)) { ret = 1; goto out; } @@ -1619,7 +1621,7 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - item = btrfs_item_nr(eb, slot); + item = btrfs_item_nr(slot); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 71f074e1870b..ac0b39db27d1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -19,6 +19,7 @@ #ifndef __BTRFS_I__ #define __BTRFS_I__ +#include <linux/hash.h> #include "extent_map.h" #include "extent_io.h" #include "ordered-data.h" @@ -179,6 +180,25 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode) return container_of(inode, struct btrfs_inode, vfs_inode); } +static inline unsigned long btrfs_inode_hash(u64 objectid, + const struct btrfs_root *root) +{ + u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME); + +#if BITS_PER_LONG == 32 + h = (h >> 32) ^ (h & 0xffffffff); +#endif + + return (unsigned long)h; +} + +static inline void btrfs_insert_inode_hash(struct inode *inode) +{ + unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root); + + __insert_inode_hash(inode, h); +} + static inline u64 btrfs_ino(struct inode *inode) { u64 ino = BTRFS_I(inode)->location.objectid; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 1c47be187240..b50764bef141 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -77,6 +77,15 @@ * the integrity of (super)-block write requests, do not * enable the config option BTRFS_FS_CHECK_INTEGRITY to * include and compile the integrity check tool. + * + * Expect millions of lines of information in the kernel log with an + * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the + * kernel config to at least 26 (which is 64MB). Usually the value is + * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be + * changed like this before LOG_BUF_SHIFT can be set to a high value: + * config LOG_BUF_SHIFT + * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" + * range 12 30 */ #include <linux/sched.h> @@ -124,6 +133,7 @@ #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 +#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 struct btrfsic_dev_state; struct btrfsic_state; @@ -1038,7 +1048,7 @@ leaf_item_out_of_bounce_error: disk_item_offset, sizeof(struct btrfs_item)); item_offset = btrfs_stack_item_offset(&disk_item); - item_size = btrfs_stack_item_offset(&disk_item); + item_size = btrfs_stack_item_size(&disk_item); disk_key = &disk_item.key; type = btrfs_disk_key_type(disk_key); @@ -1900,7 +1910,9 @@ again: dev_state, dev_bytenr); } - if (block->logical_bytenr != bytenr) { + if (block->logical_bytenr != bytenr && + !(!block->is_metadata && + block->logical_bytenr == 0)) printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c," @@ -1910,15 +1922,14 @@ again: block->mirror_num, btrfsic_get_block_type(state, block), block->logical_bytenr); - block->logical_bytenr = bytenr; - } else if (state->print_mask & - BTRFSIC_PRINT_MASK_VERBOSE) + else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); + block->logical_bytenr = bytenr; } else { if (num_pages * PAGE_CACHE_SIZE < state->datablock_size) { @@ -2463,10 +2474,8 @@ static int btrfsic_process_written_superblock( } } - if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { - WARN_ON(1); + if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0))) btrfsic_dump_tree(state); - } return 0; } @@ -2906,7 +2915,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, btrfsic_release_block_ctx(&block_ctx); } - if (!match) { + if (WARN_ON(!match)) { printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," " buffer->log_bytenr=%llu, submit_bio(bdev=%s," " phys_bytenr=%llu)!\n", @@ -2923,7 +2932,6 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, bytenr, block_ctx.dev->name, block_ctx.dev_bytenr, mirror_num); } - WARN_ON(1); } } @@ -3017,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) (rw & WRITE) && NULL != bio->bi_io_vec) { unsigned int i; u64 dev_bytenr; + u64 cur_bytenr; int bio_is_patched; char **mapped_datav; @@ -3035,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) GFP_NOFS); if (!mapped_datav) goto leave; + cur_bytenr = dev_bytenr; for (i = 0; i < bio->bi_vcnt; i++) { BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); @@ -3046,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio) kfree(mapped_datav); goto leave; } - if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | - BTRFSIC_PRINT_MASK_VERBOSE) == - (dev_state->state->print_mask & - (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | - BTRFSIC_PRINT_MASK_VERBOSE))) + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) printk(KERN_INFO - "#%u: page=%p, len=%u, offset=%u\n", - i, bio->bi_io_vec[i].bv_page, - bio->bi_io_vec[i].bv_len, + "#%u: bytenr=%llu, len=%u, offset=%u\n", + i, cur_bytenr, bio->bi_io_vec[i].bv_len, bio->bi_io_vec[i].bv_offset); + cur_bytenr += bio->bi_io_vec[i].bv_len; } btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h deleted file mode 100644 index 7c4503ef6efd..000000000000 --- a/fs/btrfs/compat.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _COMPAT_H_ -#define _COMPAT_H_ - -#define btrfs_drop_nlink(inode) drop_nlink(inode) -#define btrfs_inc_nlink(inode) inc_nlink(inode) - -#endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6aad98cb343f..1499b27b4186 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -32,7 +32,6 @@ #include <linux/writeback.h> #include <linux/bit_spinlock.h> #include <linux/slab.h> -#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -360,7 +359,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); - if(!bio) { + if (!bio) { kfree(cb); return -ENOMEM; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 61b5bcd57b7e..316136bd6dd7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,7 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -996,7 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -1285,11 +1285,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); blocksize = btrfs_level_size(root, old_root->level); old = read_tree_block(root, logical, blocksize, 0); - if (!old || !extent_buffer_uptodate(old)) { + if (WARN_ON(!old || !extent_buffer_uptodate(old))) { free_extent_buffer(old); pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", logical); - WARN_ON(1); } else { eb = btrfs_clone_extent_buffer(old); free_extent_buffer(old); @@ -2758,7 +2757,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; - int prev_cmp; + int prev_cmp = -1; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2769,7 +2768,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: - prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2787,6 +2785,11 @@ again: */ btrfs_unlock_up_safe(p, level + 1); + /* + * Since we can unwind eb's we want to do a real search every + * time. + */ + prev_cmp = -1; ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { @@ -3148,7 +3151,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, @@ -3287,7 +3290,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - btrfs_header_fsid(split), BTRFS_FSID_SIZE); + btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); @@ -3337,8 +3340,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) if (!nr) return 0; btrfs_init_map_token(&token); - start_item = btrfs_item_nr(l, start); - end_item = btrfs_item_nr(l, end); + start_item = btrfs_item_nr(start); + end_item = btrfs_item_nr(end); data_len = btrfs_token_item_offset(l, start_item, &token) + btrfs_token_item_size(l, start_item, &token); data_len = data_len - btrfs_token_item_offset(l, end_item, &token); @@ -3406,7 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, slot = path->slots[1]; i = left_nritems - 1; while (i >= nr) { - item = btrfs_item_nr(left, i); + item = btrfs_item_nr(i); if (!empty && push_items > 0) { if (path->slots[0] > i) @@ -3470,7 +3473,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - item = btrfs_item_nr(right, i); + item = btrfs_item_nr(i); push_space -= btrfs_token_item_size(right, item, &token); btrfs_set_token_item_offset(right, item, push_space, &token); } @@ -3612,7 +3615,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, nr = min(right_nritems - 1, max_slot); for (i = 0; i < nr; i++) { - item = btrfs_item_nr(right, i); + item = btrfs_item_nr(i); if (!empty && push_items > 0) { if (path->slots[0] < i) @@ -3639,8 +3642,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, ret = 1; goto out; } - if (!empty && push_items == btrfs_header_nritems(right)) - WARN_ON(1); + WARN_ON(!empty && push_items == btrfs_header_nritems(right)); /* push data from right to left */ copy_extent_buffer(left, right, @@ -3663,7 +3665,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; - item = btrfs_item_nr(left, i); + item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(left, item, &token); btrfs_set_token_item_offset(left, item, @@ -3694,7 +3696,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - item = btrfs_item_nr(right, i); + item = btrfs_item_nr(i); push_space = push_space - btrfs_token_item_size(right, item, &token); @@ -3835,7 +3837,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_item_end_nr(l, mid); for (i = 0; i < nritems; i++) { - struct btrfs_item *item = btrfs_item_nr(right, i); + struct btrfs_item *item = btrfs_item_nr(i); u32 ioff; ioff = btrfs_token_item_offset(right, item, &token); @@ -4016,7 +4018,7 @@ again: data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (data_size && !tried_avoid_double) goto push_for_double; - split = 2 ; + split = 2; } } } @@ -4042,7 +4044,7 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - btrfs_header_fsid(right), BTRFS_FSID_SIZE); + btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(right), @@ -4177,7 +4179,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans, btrfs_set_path_blocking(path); - item = btrfs_item_nr(leaf, path->slots[0]); + item = btrfs_item_nr(path->slots[0]); orig_offset = btrfs_item_offset(leaf, item); item_size = btrfs_item_size(leaf, item); @@ -4200,7 +4202,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans, btrfs_cpu_key_to_disk(&disk_key, new_key); btrfs_set_item_key(leaf, &disk_key, slot); - new_item = btrfs_item_nr(leaf, slot); + new_item = btrfs_item_nr(slot); btrfs_set_item_offset(leaf, new_item, orig_offset); btrfs_set_item_size(leaf, new_item, item_size - split_offset); @@ -4339,7 +4341,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, @@ -4387,7 +4389,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, fixup_low_keys(root, path, &disk_key, 1); } - item = btrfs_item_nr(leaf, slot); + item = btrfs_item_nr(slot); btrfs_set_item_size(leaf, item, new_size); btrfs_mark_buffer_dirty(leaf); @@ -4441,7 +4443,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, @@ -4455,7 +4457,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, data_end = old_data; old_size = btrfs_item_size_nr(leaf, slot); - item = btrfs_item_nr(leaf, slot); + item = btrfs_item_nr(slot); btrfs_set_item_size(leaf, item, old_size + data_size); btrfs_mark_buffer_dirty(leaf); @@ -4514,7 +4516,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr( i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, ioff - total_data, &token); @@ -4535,7 +4537,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < nr; i++) { btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); btrfs_set_item_key(leaf, &disk_key, slot + i); - item = btrfs_item_nr(leaf, slot + i); + item = btrfs_item_nr(slot + i); btrfs_set_token_item_offset(leaf, item, data_end - data_size[i], &token); data_end -= data_size[i]; @@ -4730,7 +4732,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + nr; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(leaf, i); + item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, ioff + dsize, &token); @@ -4823,14 +4825,18 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_item_key_to_cpu(path->nodes[0], &key, 0); - if (key.offset > 0) + if (key.offset > 0) { key.offset--; - else if (key.type > 0) + } else if (key.type > 0) { key.type--; - else if (key.objectid > 0) + key.offset = (u64)-1; + } else if (key.objectid > 0) { key.objectid--; - else + key.type = (u8)-1; + key.offset = (u64)-1; + } else { return 1; + } btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -4866,7 +4872,6 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * was nothing in the tree that matched the search criteria. */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, - struct btrfs_key *max_key, struct btrfs_path *path, u64 min_trans) { @@ -4911,10 +4916,8 @@ again: * If it is too old, old, skip to the next one. */ while (slot < nritems) { - u64 blockptr; u64 gen; - blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); if (gen < min_trans) { slot++; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0506f40ede83..54ab86127f7a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -47,6 +47,12 @@ extern struct kmem_cache *btrfs_path_cachep; extern struct kmem_cache *btrfs_free_space_cachep; struct btrfs_ordered_sum; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +#define STATIC noinline +#else +#define STATIC static noinline +#endif + #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ #define BTRFS_MAX_MIRRORS 3 @@ -1580,7 +1586,6 @@ struct btrfs_fs_info { atomic_t scrubs_paused; atomic_t scrub_cancel_req; wait_queue_head_t scrub_pause_wait; - struct rw_semaphore scrub_super_lock; int scrub_workers_refcnt; struct btrfs_workers scrub_workers; struct btrfs_workers scrub_wr_completion_workers; @@ -1724,7 +1729,9 @@ struct btrfs_root { int ref_cows; int track_dirty; int in_radix; - +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + int dummy_root; +#endif u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; @@ -2461,8 +2468,7 @@ static inline unsigned long btrfs_item_nr_offset(int nr) sizeof(struct btrfs_item) * nr; } -static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, - int nr) +static inline struct btrfs_item *btrfs_item_nr(int nr) { return (struct btrfs_item *)btrfs_item_nr_offset(nr); } @@ -2475,30 +2481,30 @@ static inline u32 btrfs_item_end(struct extent_buffer *eb, static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); + return btrfs_item_end(eb, btrfs_item_nr(nr)); } static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); + return btrfs_item_offset(eb, btrfs_item_nr(nr)); } static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); + return btrfs_item_size(eb, btrfs_item_nr(nr)); } static inline void btrfs_item_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { - struct btrfs_item *item = btrfs_item_nr(eb, nr); + struct btrfs_item *item = btrfs_item_nr(nr); read_eb_member(eb, item, struct btrfs_item, key, disk_key); } static inline void btrfs_set_item_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { - struct btrfs_item *item = btrfs_item_nr(eb, nr); + struct btrfs_item *item = btrfs_item_nr(nr); write_eb_member(eb, item, struct btrfs_item, key, disk_key); } @@ -2666,7 +2672,7 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, btrfs_set_header_flags(eb, flags); } -static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_fsid(void) { return offsetof(struct btrfs_header, fsid); } @@ -3105,11 +3111,6 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) -static inline struct dentry *fdentry(struct file *file) -{ - return file->f_path.dentry; -} - static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) { return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && @@ -3308,7 +3309,6 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, - struct btrfs_key *max_key, struct btrfs_path *path, u64 min_trans); enum btrfs_compare_tree_result { @@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig); -int btrfs_csum_truncate(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 isize); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit); /* inode.c */ @@ -3675,8 +3672,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); -int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, - int delay_iput); +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, struct extent_state **cached_state); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, @@ -3745,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); -int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, - u64 start, u64 end, int skip_pinned, - int modified); extern const struct file_operations btrfs_file_operations; int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -3944,9 +3937,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, int readonly, int is_dev_replace); void btrfs_scrub_pause(struct btrfs_root *root); -void btrfs_scrub_pause_super(struct btrfs_root *root); void btrfs_scrub_continue(struct btrfs_root *root); -void btrfs_scrub_continue_super(struct btrfs_root *root); int btrfs_scrub_cancel(struct btrfs_fs_info *info); int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, struct btrfs_device *dev); @@ -4028,5 +4019,9 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) return signal_pending(current); } +/* Sanity test specific functions */ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_destroy_inode(struct inode *inode); +#endif #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index cbd9523ad09c..8d292fbae659 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -108,8 +108,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) return node; } btrfs_inode->delayed_node = node; - atomic_inc(&node->refs); /* can be accessed */ - atomic_inc(&node->refs); /* cached in the inode */ + /* can be accessed and cached in the inode */ + atomic_add(2, &node->refs); spin_unlock(&root->inode_lock); return node; } @@ -138,8 +138,8 @@ again: return ERR_PTR(-ENOMEM); btrfs_init_delayed_node(node, root, ino); - atomic_inc(&node->refs); /* cached in the btrfs inode */ - atomic_inc(&node->refs); /* can be accessed */ + /* cached in the btrfs inode and can be accessed */ + atomic_add(2, &node->refs); ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); if (ret) { @@ -649,14 +649,13 @@ static int btrfs_delayed_inode_reserve_metadata( goto out; ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); - if (!ret) + if (!WARN_ON(ret)) goto out; /* * Ok this is a problem, let's just steal from the global rsv * since this really shouldn't happen that often. */ - WARN_ON(1); ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, dst_rsv, num_bytes); goto out; @@ -771,13 +770,13 @@ static int btrfs_batch_insert_items(struct btrfs_root *root, */ btrfs_set_path_blocking(path); - keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS); + keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS); if (!keys) { ret = -ENOMEM; goto out; } - data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS); + data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS); if (!data_size) { ret = -ENOMEM; goto error; @@ -1174,8 +1173,10 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, mutex_unlock(&delayed_node->mutex); path = btrfs_alloc_path(); - if (!path) + if (!path) { + btrfs_release_delayed_node(delayed_node); return -ENOMEM; + } path->leave_spinning = 1; block_rsv = trans->block_rsv; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 9efb94e95858..2cfc3dfff64f 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -26,7 +26,6 @@ #include <linux/kthread.h> #include <linux/math64.h> #include <asm/div64.h> -#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -38,7 +37,6 @@ #include "rcu-string.h" #include "dev-replace.h" -static u64 btrfs_get_seconds_since_1970(void); static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret); static void btrfs_dev_replace_update_device_in_mapping_tree( @@ -296,13 +294,6 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) dev_replace->cursor_left_last_write_of_item; } -static u64 btrfs_get_seconds_since_1970(void) -{ - struct timespec t = CURRENT_TIME_SEC; - - return t.tv_sec; -} - int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_ioctl_dev_replace_args *args) { @@ -375,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO - "btrfs: dev_replace from %s (devid %llu) to %s) started\n", + "btrfs: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, @@ -390,7 +381,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; - dev_replace->time_started = btrfs_get_seconds_since_1970(); + dev_replace->time_started = get_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; @@ -400,7 +391,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; btrfs_dev_replace_unlock(dev_replace); - btrfs_wait_all_ordered_extents(root->fs_info); + btrfs_wait_ordered_roots(root->fs_info, -1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); @@ -470,12 +461,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ - ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); + ret = btrfs_start_delalloc_roots(root->fs_info, 0); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; } - btrfs_wait_all_ordered_extents(root->fs_info); + btrfs_wait_ordered_roots(root->fs_info, -1); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -493,7 +484,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; - dev_replace->time_stopped = btrfs_get_seconds_since_1970(); + dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; if (scrub_ret) { @@ -650,6 +641,9 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) u64 result; int ret; + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + mutex_lock(&dev_replace->lock_finishing_cancel_unmount); btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { @@ -668,7 +662,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) break; } dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; - dev_replace->time_stopped = btrfs_get_seconds_since_1970(); + dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; btrfs_dev_replace_unlock(dev_replace); btrfs_scrub_cancel(fs_info); @@ -703,7 +697,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; - dev_replace->time_stopped = btrfs_get_seconds_since_1970(); + dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; pr_info("btrfs: suspending dev_replace for unmount\n"); break; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 79e594e341c7..c031ea3fd70f 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -58,7 +58,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return ERR_PTR(ret); WARN_ON(ret > 0); leaf = path->nodes[0]; - item = btrfs_item_nr(leaf, path->slots[0]); + item = btrfs_item_nr(path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); BUG_ON(data_size > btrfs_item_size(leaf, item)); ptr += btrfs_item_size(leaf, item) - data_size; @@ -474,8 +474,10 @@ int verify_dir_item(struct btrfs_root *root, } /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ - if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { - printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", + if ((btrfs_dir_data_len(leaf, dir_item) + + btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) { + printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n", + (unsigned)btrfs_dir_name_len(leaf, dir_item), (unsigned)btrfs_dir_data_len(leaf, dir_item)); return 1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62176ad89846..8072cfa8a3b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -33,7 +33,6 @@ #include <linux/uuid.h> #include <linux/semaphore.h> #include <asm/unaligned.h> -#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -64,7 +63,6 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); -static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, @@ -477,14 +475,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (page != eb->pages[0]) return 0; found_start = btrfs_header_bytenr(eb); - if (found_start != start) { - WARN_ON(1); + if (WARN_ON(found_start != start || !PageUptodate(page))) return 0; - } - if (!PageUptodate(page)) { - WARN_ON(1); - return 0; - } csum_tree_block(root, eb, 0); return 0; } @@ -496,7 +488,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; - read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); + read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; @@ -1105,8 +1097,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize); + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr); return eb; } @@ -1229,14 +1220,18 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->refs, 1); root->log_transid = 0; root->last_log_commit = 0; - extent_io_tree_init(&root->dirty_log_pages, - fs_info->btree_inode->i_mapping); + if (fs_info) + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); - root->defrag_trans_start = fs_info->generation; + if (fs_info) + root->defrag_trans_start = fs_info->generation; + else + root->defrag_trans_start = 0; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->root_key.objectid = objectid; @@ -1253,6 +1248,22 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) return root; } +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +/* Should only be used by the testing infrastructure */ +struct btrfs_root *btrfs_alloc_dummy_root(void) +{ + struct btrfs_root *root; + + root = btrfs_alloc_root(NULL); + if (!root) + return ERR_PTR(-ENOMEM); + __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); + root->dummy_root = 1; + + return root; +} +#endif + struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 objectid) @@ -1292,7 +1303,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), + write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(leaf), @@ -1379,7 +1390,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->node = leaf; write_extent_buffer(root->node, root->fs_info->fsid, - btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); + btrfs_header_fsid(), BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -1780,6 +1791,9 @@ sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); + if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, + &root->fs_info->fs_state))) + btrfs_cleanup_transaction(root); if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && @@ -2013,50 +2027,28 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } +static void free_root_extent_buffers(struct btrfs_root *root) +{ + if (root) { + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + root->node = NULL; + root->commit_root = NULL; + } +} + /* helper to cleanup tree roots */ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) { - free_extent_buffer(info->tree_root->node); - free_extent_buffer(info->tree_root->commit_root); - info->tree_root->node = NULL; - info->tree_root->commit_root = NULL; - - if (info->dev_root) { - free_extent_buffer(info->dev_root->node); - free_extent_buffer(info->dev_root->commit_root); - info->dev_root->node = NULL; - info->dev_root->commit_root = NULL; - } - if (info->extent_root) { - free_extent_buffer(info->extent_root->node); - free_extent_buffer(info->extent_root->commit_root); - info->extent_root->node = NULL; - info->extent_root->commit_root = NULL; - } - if (info->csum_root) { - free_extent_buffer(info->csum_root->node); - free_extent_buffer(info->csum_root->commit_root); - info->csum_root->node = NULL; - info->csum_root->commit_root = NULL; - } - if (info->quota_root) { - free_extent_buffer(info->quota_root->node); - free_extent_buffer(info->quota_root->commit_root); - info->quota_root->node = NULL; - info->quota_root->commit_root = NULL; - } - if (info->uuid_root) { - free_extent_buffer(info->uuid_root->node); - free_extent_buffer(info->uuid_root->commit_root); - info->uuid_root->node = NULL; - info->uuid_root->commit_root = NULL; - } - if (chunk_root) { - free_extent_buffer(info->chunk_root->node); - free_extent_buffer(info->chunk_root->commit_root); - info->chunk_root->node = NULL; - info->chunk_root->commit_root = NULL; - } + free_root_extent_buffers(info->tree_root); + + free_root_extent_buffers(info->dev_root); + free_root_extent_buffers(info->extent_root); + free_root_extent_buffers(info->csum_root); + free_root_extent_buffers(info->quota_root); + free_root_extent_buffers(info->uuid_root); + if (chunk_root) + free_root_extent_buffers(info->chunk_root); } static void del_fs_roots(struct btrfs_fs_info *fs_info) @@ -2230,7 +2222,6 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->scrubs_paused, 0); atomic_set(&fs_info->scrub_cancel_req, 0); init_waitqueue_head(&fs_info->scrub_pause_wait); - init_rwsem(&fs_info->scrub_super_lock); fs_info->scrub_workers_refcnt = 0; #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY fs_info->check_integrity_print_mask = 0; @@ -2272,7 +2263,7 @@ int open_ctree(struct super_block *sb, sizeof(struct btrfs_key)); set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(fs_info->btree_inode)->runtime_flags); - insert_inode_hash(fs_info->btree_inode); + btrfs_insert_inode_hash(fs_info->btree_inode); spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree = RB_ROOT; @@ -2670,6 +2661,7 @@ retry_root_backup: btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); + btrfs_set_root_refs(&tree_root->root_item, 1); location.objectid = BTRFS_EXTENT_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3448,10 +3440,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors) { - int ret; - - ret = write_all_supers(root, max_mirrors); - return ret; + return write_all_supers(root, max_mirrors); } /* Drop a fs root from the radix tree and free it. */ @@ -3528,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) int btrfs_commit_super(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - int ret; mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); @@ -3542,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - /* run commit again to drop the original snapshot */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - ret = btrfs_write_and_wait_transaction(NULL, root); - if (ret) { - btrfs_error(root->fs_info, ret, - "Failed to sync btree inode to disk."); - return ret; - } - - ret = write_ctree_super(NULL, root, 0); - return ret; + return btrfs_commit_transaction(trans, root); } int close_ctree(struct btrfs_root *root) @@ -3614,12 +3584,12 @@ int close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } + del_fs_roots(fs_info); + btrfs_free_block_groups(fs_info); btrfs_stop_all_workers(fs_info); - del_fs_roots(fs_info); - free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); @@ -3669,10 +3639,20 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf) void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root; u64 transid = btrfs_header_generation(buf); int was_dirty; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + /* + * This is a fast path so only do this check if we have sanity tests + * enabled. Normal people shouldn't be marking dummy buffers as dirty + * outside of the sanity tests. + */ + if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags))) + return; +#endif + root = BTRFS_I(buf->pages[0]->mapping->host)->root; btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " @@ -3802,7 +3782,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); - list_del_init(&root->ordered_root); + list_move_tail(&root->ordered_root, + &fs_info->ordered_roots); btrfs_destroy_ordered_extents(root); @@ -3880,24 +3861,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } -static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t) -{ - struct btrfs_pending_snapshot *snapshot; - struct list_head splice; - - INIT_LIST_HEAD(&splice); - - list_splice_init(&t->pending_snapshots, &splice); - - while (!list_empty(&splice)) { - snapshot = list_entry(splice.next, - struct btrfs_pending_snapshot, - list); - snapshot->error = -ECANCELED; - list_del_init(&snapshot->list); - } -} - static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; @@ -4027,15 +3990,13 @@ again: void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { + btrfs_destroy_ordered_operations(cur_trans, root); + btrfs_destroy_delayed_refs(cur_trans, root); - btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, - cur_trans->dirty_pages.dirty_bytes); cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); - btrfs_evict_pending_snapshots(cur_trans); - cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); @@ -4059,63 +4020,51 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, static int btrfs_cleanup_transaction(struct btrfs_root *root) { struct btrfs_transaction *t; - LIST_HEAD(list); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); - list_splice_init(&root->fs_info->trans_list, &list); - root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->trans_lock); - - while (!list_empty(&list)) { - t = list_entry(list.next, struct btrfs_transaction, list); - - btrfs_destroy_ordered_operations(t, root); - - btrfs_destroy_all_ordered_extents(root->fs_info); - - btrfs_destroy_delayed_refs(t, root); - - /* - * FIXME: cleanup wait for commit - * We needn't acquire the lock here, because we are during - * the umount, there is no other task which will change it. - */ - t->state = TRANS_STATE_COMMIT_START; - smp_mb(); - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); - - btrfs_evict_pending_snapshots(t); - - t->state = TRANS_STATE_UNBLOCKED; - smp_mb(); - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); - - btrfs_destroy_delayed_inodes(root); - btrfs_assert_delayed_root_empty(root); - - btrfs_destroy_all_delalloc_inodes(root->fs_info); - - btrfs_destroy_marked_extents(root, &t->dirty_pages, - EXTENT_DIRTY); - - btrfs_destroy_pinned_extent(root, - root->fs_info->pinned_extents); - - t->state = TRANS_STATE_COMPLETED; - smp_mb(); - if (waitqueue_active(&t->commit_wait)) - wake_up(&t->commit_wait); + while (!list_empty(&root->fs_info->trans_list)) { + t = list_first_entry(&root->fs_info->trans_list, + struct btrfs_transaction, list); + if (t->state >= TRANS_STATE_COMMIT_START) { + atomic_inc(&t->use_count); + spin_unlock(&root->fs_info->trans_lock); + btrfs_wait_for_commit(root, t->transid); + btrfs_put_transaction(t); + spin_lock(&root->fs_info->trans_lock); + continue; + } + if (t == root->fs_info->running_transaction) { + t->state = TRANS_STATE_COMMIT_DOING; + spin_unlock(&root->fs_info->trans_lock); + /* + * We wait for 0 num_writers since we don't hold a trans + * handle open currently for this transaction. + */ + wait_event(t->writer_wait, + atomic_read(&t->num_writers) == 0); + } else { + spin_unlock(&root->fs_info->trans_lock); + } + btrfs_cleanup_one_transaction(t, root); - atomic_set(&t->use_count, 0); + spin_lock(&root->fs_info->trans_lock); + if (t == root->fs_info->running_transaction) + root->fs_info->running_transaction = NULL; list_del_init(&t->list); - memset(t, 0, sizeof(*t)); - kmem_cache_free(btrfs_transaction_cachep, t); - } + spin_unlock(&root->fs_info->trans_lock); + btrfs_put_transaction(t); + trace_btrfs_transaction_commit(root); + spin_lock(&root->fs_info->trans_lock); + } + spin_unlock(&root->fs_info->trans_lock); + btrfs_destroy_all_ordered_extents(root->fs_info); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); + btrfs_destroy_all_delalloc_inodes(root->fs_info); mutex_unlock(&root->fs_info->transaction_kthread_mutex); return 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5ce2a7da8b11..53059df350f8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -86,6 +86,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_root(struct btrfs_root *root); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +struct btrfs_root *btrfs_alloc_dummy_root(void); +#endif + /* * This function is used to grab the root, and avoid it is freed when we * access it. But it doesn't ensure that the tree is not dropped. diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 4b8691607373..41422a3de8ed 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -5,7 +5,6 @@ #include "btrfs_inode.h" #include "print-tree.h" #include "export.h" -#include "compat.h" #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ parent_objectid) / 4) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d58bef130a41..45d98d01028f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/percpu_counter.h> -#include "compat.h" #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -1551,9 +1550,8 @@ again: if (ret && !insert) { err = -ENOENT; goto out; - } else if (ret) { + } else if (WARN_ON(ret)) { err = -EIO; - WARN_ON(1); goto out; } @@ -1979,7 +1977,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u64 refs; int ret; - int err = 0; path = btrfs_alloc_path(); if (!path) @@ -1992,14 +1989,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path, bytenr, num_bytes, parent, root_objectid, owner, offset, refs_to_add, extent_op); - if (ret == 0) + if (ret != -EAGAIN) goto out; - if (ret != -EAGAIN) { - err = ret; - goto out; - } - leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, item); @@ -2021,7 +2013,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, ret); out: btrfs_free_path(path); - return err; + return ret; } static int run_delayed_data_ref(struct btrfs_trans_handle *trans, @@ -2137,15 +2129,28 @@ again: } if (ret > 0) { if (metadata) { - btrfs_release_path(path); - metadata = 0; + if (path->slots[0] > 0) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == node->bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == node->num_bytes) + ret = 0; + } + if (ret > 0) { + btrfs_release_path(path); + metadata = 0; - key.offset = node->num_bytes; - key.type = BTRFS_EXTENT_ITEM_KEY; - goto again; + key.objectid = node->bytenr; + key.offset = node->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } + } else { + err = -EIO; + goto out; } - err = -EIO; - goto out; } leaf = path->nodes[0]; @@ -2234,8 +2239,12 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, { int ret = 0; - if (trans->aborted) + if (trans->aborted) { + if (insert_reserved) + btrfs_pin_extent(root, node->bytenr, + node->num_bytes, 1); return 0; + } if (btrfs_delayed_ref_is_head(node)) { struct btrfs_delayed_ref_head *head; @@ -2411,6 +2420,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, btrfs_free_delayed_extent_op(extent_op); if (ret) { + /* + * Need to reset must_insert_reserved if + * there was an error so the abort stuff + * can cleanup the reserved space + * properly. + */ + if (must_insert_reserved) + locked_ref->must_insert_reserved = 1; btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); spin_lock(&delayed_refs->lock); btrfs_delayed_ref_unlock(locked_ref); @@ -3197,8 +3214,7 @@ again: if (ret) goto out_put; - ret = btrfs_truncate_free_space_cache(root, trans, path, - inode); + ret = btrfs_truncate_free_space_cache(root, trans, inode); if (ret) goto out_put; } @@ -3318,10 +3334,9 @@ again: last = cache->key.objectid + cache->key.offset; err = write_one_cache_group(trans, root, path, cache); + btrfs_put_block_group(cache); if (err) /* File system offline */ goto out; - - btrfs_put_block_group(cache); } while (1) { @@ -3605,10 +3620,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = ALIGN(bytes, root->sectorsize); - if (root == root->fs_info->tree_root || - BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { - alloc_chunk = 0; + if (btrfs_is_free_space_inode(inode)) { committed = 1; + ASSERT(current->journal_info); } data_sinfo = fs_info->data_sinfo; @@ -3636,6 +3650,16 @@ again: spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); + /* + * It is ugly that we don't call nolock join + * transaction for the free space inode case here. + * But it is safe because we only do the data space + * reservation for the free space cache in the + * transaction context, the common join transaction + * just increase the counter of the current transaction + * handler, doesn't try to acquire the trans_lock of + * the fs. + */ trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3681,6 +3705,9 @@ commit_trans: goto again; } + trace_btrfs_space_reservation(root->fs_info, + "space_info:enospc", + data_sinfo->flags, bytes, 1); return -ENOSPC; } data_sinfo->bytes_may_use += bytes; @@ -3989,12 +4016,26 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, * the filesystem is readonly(all dirty pages are written to * the disk). */ - btrfs_start_all_delalloc_inodes(root->fs_info, 0); + btrfs_start_delalloc_roots(root->fs_info, 0); if (!current->journal_info) - btrfs_wait_all_ordered_extents(root->fs_info); + btrfs_wait_ordered_roots(root->fs_info, -1); } } +static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim) +{ + u64 bytes; + int nr; + + bytes = btrfs_calc_trans_metadata_size(root, 1); + nr = (int)div64_u64(to_reclaim, bytes); + if (!nr) + nr = 1; + return nr; +} + +#define EXTENT_SIZE_PER_ITEM (256 * 1024) + /* * shrink metadata reservation for delalloc */ @@ -4007,24 +4048,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, u64 delalloc_bytes; u64 max_reclaim; long time_left; - unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; - int loops = 0; + unsigned long nr_pages; + int loops; + int items; enum btrfs_reserve_flush_enum flush; + /* Calc the number of the pages we need flush for space reservation */ + items = calc_reclaim_items_nr(root, to_reclaim); + to_reclaim = items * EXTENT_SIZE_PER_ITEM; + trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; - smp_mb(); delalloc_bytes = percpu_counter_sum_positive( &root->fs_info->delalloc_bytes); if (delalloc_bytes == 0) { if (trans) return; - btrfs_wait_all_ordered_extents(root->fs_info); + if (wait_ordered) + btrfs_wait_ordered_roots(root->fs_info, items); return; } + loops = 0; while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; @@ -4033,9 +4080,19 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, * We need to wait for the async pages to actually start before * we do anything. */ - wait_event(root->fs_info->async_submit_wait, - !atomic_read(&root->fs_info->async_delalloc_pages)); + max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages); + if (!max_reclaim) + goto skip_async; + + if (max_reclaim <= nr_pages) + max_reclaim = 0; + else + max_reclaim -= nr_pages; + wait_event(root->fs_info->async_submit_wait, + atomic_read(&root->fs_info->async_delalloc_pages) <= + (int)max_reclaim); +skip_async: if (!trans) flush = BTRFS_RESERVE_FLUSH_ALL; else @@ -4049,13 +4106,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, loops++; if (wait_ordered && !trans) { - btrfs_wait_all_ordered_extents(root->fs_info); + btrfs_wait_ordered_roots(root->fs_info, items); } else { time_left = schedule_timeout_killable(1); if (time_left) break; } - smp_mb(); delalloc_bytes = percpu_counter_sum_positive( &root->fs_info->delalloc_bytes); } @@ -4140,16 +4196,11 @@ static int flush_space(struct btrfs_root *root, switch (state) { case FLUSH_DELAYED_ITEMS_NR: case FLUSH_DELAYED_ITEMS: - if (state == FLUSH_DELAYED_ITEMS_NR) { - u64 bytes = btrfs_calc_trans_metadata_size(root, 1); - - nr = (int)div64_u64(num_bytes, bytes); - if (!nr) - nr = 1; - nr *= 2; - } else { + if (state == FLUSH_DELAYED_ITEMS_NR) + nr = calc_reclaim_items_nr(root, num_bytes) * 2; + else nr = -1; - } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -4332,6 +4383,10 @@ out: !block_rsv_use_bytes(global_rsv, orig_bytes)) ret = 0; } + if (ret == -ENOSPC) + trace_btrfs_space_reservation(root->fs_info, + "space_info:enospc", + space_info->flags, orig_bytes, 1); if (flushing) { spin_lock(&space_info->lock); space_info->flush = 0; @@ -4986,7 +5041,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); if (to_reserve) - trace_btrfs_space_reservation(root->fs_info,"delalloc", + trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_reserve, 1); block_rsv_add_bytes(block_rsv, to_reserve, 1); @@ -5264,6 +5319,8 @@ static int pin_down_extent(struct btrfs_root *root, set_extent_dirty(root->fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); + if (reserved) + trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); return 0; } @@ -5718,9 +5775,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } extent_slot = path->slots[0]; } - } else if (ret == -ENOENT) { + } else if (WARN_ON(ret == -ENOENT)) { btrfs_print_leaf(extent_root, path->nodes[0]); - WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", bytenr, parent, root_objectid, owner_objectid, @@ -5967,6 +6023,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_add_free_space(cache, buf->start, buf->len); btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); + trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; } out: @@ -6594,8 +6651,6 @@ again: } } - trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); - return ret; } @@ -6707,6 +6762,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ins->objectid, ins->offset); BUG(); } + trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); return ret; } @@ -6731,13 +6787,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, size += sizeof(*block_info); path = btrfs_alloc_path(); - if (!path) + if (!path) { + btrfs_free_and_pin_reserved_extent(root, ins->objectid, + root->leafsize); return -ENOMEM; + } path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); if (ret) { + btrfs_free_and_pin_reserved_extent(root, ins->objectid, + root->leafsize); btrfs_free_path(path); return ret; } @@ -6779,6 +6840,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ins->objectid, ins->offset); BUG(); } + + trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); return ret; } @@ -7983,7 +8046,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) spin_lock(&sinfo->lock); - for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) if (!list_empty(&sinfo->block_groups[i])) free_bytes += __btrfs_get_ro_block_group_free_space( &sinfo->block_groups[i]); @@ -8271,15 +8334,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) release_global_block_rsv(info); - while(!list_empty(&info->space_info)) { + while (!list_empty(&info->space_info)) { space_info = list_entry(info->space_info.next, struct btrfs_space_info, list); if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { - if (space_info->bytes_pinned > 0 || + if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_reserved > 0 || - space_info->bytes_may_use > 0) { - WARN_ON(1); + space_info->bytes_may_use > 0)) { dump_space_info(space_info, 0, 0); } } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 51731b76900d..8e457fca0a0b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -13,13 +13,13 @@ #include <linux/cleancache.h> #include "extent_io.h" #include "extent_map.h" -#include "compat.h" #include "ctree.h" #include "btrfs_inode.h" #include "volumes.h" #include "check-integrity.h" #include "locking.h" #include "rcu-string.h" +#include "backref.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -1597,11 +1597,10 @@ done: * * 1 is returned if we find something, 0 if nothing was in the tree */ -static noinline u64 find_lock_delalloc_range(struct inode *inode, - struct extent_io_tree *tree, - struct page *locked_page, - u64 *start, u64 *end, - u64 max_bytes) +STATIC u64 find_lock_delalloc_range(struct inode *inode, + struct extent_io_tree *tree, + struct page *locked_page, u64 *start, + u64 *end, u64 max_bytes) { u64 delalloc_start; u64 delalloc_end; @@ -1740,10 +1739,8 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 last = 0; int found = 0; - if (search_end <= cur_start) { - WARN_ON(1); + if (WARN_ON(search_end <= cur_start)) return 0; - } spin_lock(&tree->lock); if (cur_start == 0 && bits == EXTENT_DIRTY) { @@ -1983,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int ret; + ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); BUG_ON(!mirror_num); /* we can't repair anything in raid56 yet */ @@ -2039,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); int ret = 0; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, @@ -2060,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page) u64 private; u64 private_failure; struct io_failure_record *failrec; - struct btrfs_fs_info *fs_info; + struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct extent_state *state; int num_copies; int did_repair = 0; int ret; - struct inode *inode = page->mapping->host; private = 0; ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, @@ -2088,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page) did_repair = 1; goto out; } + if (fs_info->sb->s_flags & MS_RDONLY) + goto out; spin_lock(&BTRFS_I(inode)->io_tree.lock); state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, @@ -2097,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page) if (state && state->start <= failrec->start && state->end >= failrec->start + failrec->len - 1) { - fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies > 1) { @@ -3569,9 +3571,8 @@ retry: * but no sense in crashing the users box for something * we can survive anyway. */ - if (!eb) { + if (WARN_ON(!eb)) { spin_unlock(&mapping->private_lock); - WARN_ON(1); continue; } @@ -4038,7 +4039,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, if (offset >= last) return NULL; - while(1) { + while (1) { len = last - offset; if (len == 0) break; @@ -4062,6 +4063,19 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, return NULL; } +static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx) +{ + unsigned long cnt = *((unsigned long *)ctx); + + cnt++; + *((unsigned long *)ctx) = cnt; + + /* Now we're sure that the extent is shared. */ + if (cnt > 1) + return 1; + return 0; +} + int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent) { @@ -4128,7 +4142,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, last = found_key.offset; last_for_get_extent = last + 1; } - btrfs_free_path(path); + btrfs_release_path(path); /* * we might have some extents allocated but more delalloc past those @@ -4198,7 +4212,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); } else { + unsigned long ref_cnt = 0; + disko = em->block_start + offset_in_extent; + + /* + * As btrfs supports shared space, this information + * can be exported to userspace tools via + * flag FIEMAP_EXTENT_SHARED. + */ + ret = iterate_inodes_from_logical( + em->block_start, + BTRFS_I(inode)->root->fs_info, + path, count_ext_ref, &ref_cnt); + if (ret < 0 && ret != -ENOENT) + goto out_free; + + if (ref_cnt > 1) + flags |= FIEMAP_EXTENT_SHARED; } if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) flags |= FIEMAP_EXTENT_ENCODED; @@ -4230,6 +4261,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, out_free: free_extent_map(em); out: + btrfs_free_path(path); unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state, GFP_NOFS); return ret; @@ -4455,6 +4487,23 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) } } +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start) +{ + struct extent_buffer *eb; + + rcu_read_lock(); + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); + if (eb && atomic_inc_not_zero(&eb->refs)) { + rcu_read_unlock(); + mark_extent_buffer_accessed(eb); + return eb; + } + rcu_read_unlock(); + + return NULL; +} + struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len) { @@ -4468,14 +4517,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, int uptodate = 1; int ret; - rcu_read_lock(); - eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); - if (eb && atomic_inc_not_zero(&eb->refs)) { - rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + + eb = find_extent_buffer(tree, start); + if (eb) return eb; - } - rcu_read_unlock(); eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); if (!eb) @@ -4534,24 +4579,17 @@ again: spin_lock(&tree->buffer_lock); ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); + spin_unlock(&tree->buffer_lock); + radix_tree_preload_end(); if (ret == -EEXIST) { - exists = radix_tree_lookup(&tree->buffer, - start >> PAGE_CACHE_SHIFT); - if (!atomic_inc_not_zero(&exists->refs)) { - spin_unlock(&tree->buffer_lock); - radix_tree_preload_end(); - exists = NULL; + exists = find_extent_buffer(tree, start); + if (exists) + goto free_eb; + else goto again; - } - spin_unlock(&tree->buffer_lock); - radix_tree_preload_end(); - mark_extent_buffer_accessed(exists); - goto free_eb; } /* add one reference for the tree */ check_buffer_tree_ref(eb); - spin_unlock(&tree->buffer_lock); - radix_tree_preload_end(); /* * there is a race where release page may have @@ -4582,23 +4620,6 @@ free_eb: return exists; } -struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, - u64 start, unsigned long len) -{ - struct extent_buffer *eb; - - rcu_read_lock(); - eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); - if (eb && atomic_inc_not_zero(&eb->refs)) { - rcu_read_unlock(); - mark_extent_buffer_accessed(eb); - return eb; - } - rcu_read_unlock(); - - return NULL; -} - static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) { struct extent_buffer *eb = @@ -5062,23 +5083,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, } } -static void move_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = page_address(dst_page); - if (dst_page == src_page) { - memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); - } else { - char *src_kaddr = page_address(src_page); - char *p = dst_kaddr + dst_off + len; - char *s = src_kaddr + src_off + len; - - while (len--) - *--p = *--s; - } -} - static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) { unsigned long distance = (src > dst) ? src - dst : dst - src; @@ -5189,7 +5193,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), + copy_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6dbc645f1f3d..19620c58f096 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -271,7 +271,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, - u64 start, unsigned long len); + u64 start); void free_extent_buffer(struct extent_buffer *eb); void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_NONE 0 @@ -345,4 +345,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, int end_extent_writepage(struct page *page, int err, u64 start, u64 end); int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, int mirror_num); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +noinline u64 find_lock_delalloc_range(struct inode *inode, + struct extent_io_tree *tree, + struct page *locked_page, u64 *start, + u64 *end, u64 max_bytes); +#endif #endif diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 61adc44b7805..93fba716d7f8 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,10 +3,10 @@ #include <linux/rbtree.h> -#define EXTENT_MAP_LAST_BYTE (u64)-4 -#define EXTENT_MAP_HOLE (u64)-3 -#define EXTENT_MAP_INLINE (u64)-2 -#define EXTENT_MAP_DELALLOC (u64)-1 +#define EXTENT_MAP_LAST_BYTE ((u64)-4) +#define EXTENT_MAP_HOLE ((u64)-3) +#define EXTENT_MAP_INLINE ((u64)-2) +#define EXTENT_MAP_DELALLOC ((u64)-1) /* bits for the flags field */ #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4f53159bdb9d..6f3848860283 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -329,6 +329,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, u64 csum_end; u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + ASSERT(start == ALIGN(start, root->sectorsize) && + (end + 1) == ALIGN(end + 1, root->sectorsize)); + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -846,10 +849,8 @@ insert: path->leave_spinning = 0; if (ret < 0) goto fail_unlock; - if (ret != 0) { - WARN_ON(1); + if (WARN_ON(ret != 0)) goto fail_unlock; - } leaf = path->nodes[0]; csum: item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 72da4df53c9a..82d0342763c5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -39,7 +39,6 @@ #include "print-tree.h" #include "tree-log.h" #include "locking.h" -#include "compat.h" #include "volumes.h" static struct kmem_cache *btrfs_inode_defrag_cachep; @@ -370,7 +369,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) u64 root_objectid = 0; atomic_inc(&fs_info->defrag_running); - while(1) { + while (1) { /* Pause the auto defragger. */ if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)) @@ -1281,6 +1280,7 @@ again: } wait_on_page_writeback(pages[i]); } + faili = num_pages - 1; err = 0; if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; @@ -1299,8 +1299,10 @@ again: unlock_page(pages[i]); page_cache_release(pages[i]); } - btrfs_wait_ordered_range(inode, start_pos, - last_pos - start_pos); + err = btrfs_wait_ordered_range(inode, start_pos, + last_pos - start_pos); + if (err) + goto fail; goto again; } if (ordered) @@ -1809,8 +1811,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); - if (full_sync) - btrfs_wait_ordered_range(inode, start, end - start + 1); + if (full_sync) { + ret = btrfs_wait_ordered_range(inode, start, end - start + 1); + if (ret) { + mutex_unlock(&inode->i_mutex); + goto out; + } + } atomic_inc(&root->log_batch); /* @@ -1876,27 +1883,20 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) mutex_unlock(&inode->i_mutex); if (ret != BTRFS_NO_LOG_SYNC) { - if (ret > 0) { - /* - * If we didn't already wait for ordered extents we need - * to do that now. - */ - if (!full_sync) - btrfs_wait_ordered_range(inode, start, - end - start + 1); - ret = btrfs_commit_transaction(trans, root); - } else { + if (!ret) { ret = btrfs_sync_log(trans, root); - if (ret == 0) { + if (!ret) { ret = btrfs_end_transaction(trans, root); - } else { - if (!full_sync) - btrfs_wait_ordered_range(inode, start, - end - - start + 1); - ret = btrfs_commit_transaction(trans, root); + goto out; } } + if (!full_sync) { + ret = btrfs_wait_ordered_range(inode, start, + end - start + 1); + if (ret) + goto out; + } + ret = btrfs_commit_transaction(trans, root); } else { ret = btrfs_end_transaction(trans, root); } @@ -2067,7 +2067,9 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) bool same_page = ((offset >> PAGE_CACHE_SHIFT) == ((offset + len - 1) >> PAGE_CACHE_SHIFT)); - btrfs_wait_ordered_range(inode, offset, len); + ret = btrfs_wait_ordered_range(inode, offset, len); + if (ret) + return ret; mutex_lock(&inode->i_mutex); /* @@ -2136,8 +2138,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) btrfs_put_ordered_extent(ordered); unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); - btrfs_wait_ordered_range(inode, lockstart, - lockend - lockstart + 1); + ret = btrfs_wait_ordered_range(inode, lockstart, + lockend - lockstart + 1); + if (ret) { + mutex_unlock(&inode->i_mutex); + return ret; + } } path = btrfs_alloc_path(); @@ -2308,7 +2314,10 @@ static long btrfs_fallocate(struct file *file, int mode, * wait for ordered IO before we have any locks. We'll loop again * below with the locks held. */ - btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + ret = btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); + if (ret) + goto out; locked_end = alloc_end - 1; while (1) { @@ -2332,8 +2341,10 @@ static long btrfs_fallocate(struct file *file, int mode, * we can't wait on the range with the transaction * running or with the extent lock held */ - btrfs_wait_ordered_range(inode, alloc_start, - alloc_end - alloc_start); + ret = btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); + if (ret) + goto out; } else { if (ordered) btrfs_put_ordered_extent(ordered); @@ -2405,14 +2416,12 @@ out_reserve_fail: static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map *em; + struct extent_map *em = NULL; struct extent_state *cached_state = NULL; u64 lockstart = *offset; u64 lockend = i_size_read(inode); u64 start = *offset; - u64 orig_start = *offset; u64 len = i_size_read(inode); - u64 last_end = 0; int ret = 0; lockend = max_t(u64, root->sectorsize, lockend); @@ -2429,89 +2438,35 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, &cached_state); - /* - * Delalloc is such a pain. If we have a hole and we have pending - * delalloc for a portion of the hole we will get back a hole that - * exists for the entire range since it hasn't been actually written - * yet. So to take care of this case we need to look for an extent just - * before the position we want in case there is outstanding delalloc - * going on here. - */ - if (whence == SEEK_HOLE && start != 0) { - if (start <= root->sectorsize) - em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, - root->sectorsize, 0); - else - em = btrfs_get_extent_fiemap(inode, NULL, 0, - start - root->sectorsize, - root->sectorsize, 0); - if (IS_ERR(em)) { - ret = PTR_ERR(em); - goto out; - } - last_end = em->start + em->len; - if (em->block_start == EXTENT_MAP_DELALLOC) - last_end = min_t(u64, last_end, inode->i_size); - free_extent_map(em); - } - - while (1) { + while (start < inode->i_size) { em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { ret = PTR_ERR(em); + em = NULL; break; } - if (em->block_start == EXTENT_MAP_HOLE) { - if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { - if (last_end <= orig_start) { - free_extent_map(em); - ret = -ENXIO; - break; - } - } - - if (whence == SEEK_HOLE) { - *offset = start; - free_extent_map(em); - break; - } - } else { - if (whence == SEEK_DATA) { - if (em->block_start == EXTENT_MAP_DELALLOC) { - if (start >= inode->i_size) { - free_extent_map(em); - ret = -ENXIO; - break; - } - } - - if (!test_bit(EXTENT_FLAG_PREALLOC, - &em->flags)) { - *offset = start; - free_extent_map(em); - break; - } - } - } + if (whence == SEEK_HOLE && + (em->block_start == EXTENT_MAP_HOLE || + test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) + break; + else if (whence == SEEK_DATA && + (em->block_start != EXTENT_MAP_HOLE && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) + break; start = em->start + em->len; - last_end = em->start + em->len; - - if (em->block_start == EXTENT_MAP_DELALLOC) - last_end = min_t(u64, last_end, inode->i_size); - - if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { - free_extent_map(em); - ret = -ENXIO; - break; - } free_extent_map(em); + em = NULL; cond_resched(); } - if (!ret) - *offset = min(*offset, inode->i_size); -out: + free_extent_map(em); + if (!ret) { + if (whence == SEEK_DATA && start >= inode->i_size) + ret = -ENXIO; + else + *offset = min_t(loff_t, start, inode->i_size); + } unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); return ret; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index b4f9904c4c6b..057be95b1e1e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -218,7 +218,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path, struct inode *inode) { int ret = 0; @@ -1009,8 +1008,13 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (ret) goto out; - - btrfs_wait_ordered_range(inode, 0, (u64)-1); + ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (ret) { + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, + GFP_NOFS); + goto out; + } key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; @@ -2276,7 +2280,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, goto out; entry = rb_entry(node, struct btrfs_free_space, offset_index); - while(1) { + while (1) { if (entry->bytes < bytes && entry->bytes > *max_extent_size) *max_extent_size = entry->bytes; @@ -2967,19 +2971,15 @@ out: int btrfs_write_out_ino_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path) + struct btrfs_path *path, + struct inode *inode) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; - struct inode *inode; int ret; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return 0; - inode = lookup_free_ino_inode(root, path); - if (IS_ERR(inode)) - return 0; - ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); if (ret) { btrfs_delalloc_release_metadata(inode, inode->i_size); @@ -2990,7 +2990,6 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, #endif } - iput(inode); return ret; } diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e737f92cf6d0..0cf4977ef70d 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -58,7 +58,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, struct btrfs_block_rsv *rsv); int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path, struct inode *inode); int load_free_space_cache(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group); @@ -76,7 +75,8 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root); int btrfs_write_out_ino_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path); + struct btrfs_path *path, + struct inode *inode); void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index e0b7034d6343..ec82fae07097 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -369,7 +369,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, goto out; leaf = path->nodes[0]; - item = btrfs_item_nr(leaf, path->slots[0]); + item = btrfs_item_nr(path->slots[0]); ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); ptr += btrfs_item_size(leaf, item) - ins_len; extref = (struct btrfs_inode_extref *)ptr; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 2c66ddbbe670..ab485e57b6fe 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -78,10 +78,8 @@ again: btrfs_transaction_in_commit(fs_info)) { leaf = path->nodes[0]; - if (btrfs_header_nritems(leaf) == 0) { - WARN_ON(1); + if (WARN_ON(btrfs_header_nritems(leaf) == 0)) break; - } /* * Save the key so we can advances forward @@ -237,7 +235,7 @@ again: start_caching(root); if (objectid <= root->cache_progress || - objectid > root->highest_objectid) + objectid >= root->highest_objectid) __btrfs_add_free_space(ctl, objectid, 1); else __btrfs_add_free_space(pinned, objectid, 1); @@ -412,8 +410,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root, return 0; /* Don't save inode cache if we are deleting this root */ - if (btrfs_root_refs(&root->root_item) == 0 && - root != root->fs_info->tree_root) + if (btrfs_root_refs(&root->root_item) == 0) return 0; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) @@ -467,7 +464,7 @@ again: } if (i_size_read(inode) > 0) { - ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + ret = btrfs_truncate_free_space_cache(root, trans, inode); if (ret) { if (ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); @@ -504,7 +501,7 @@ again: } btrfs_free_reserved_data_space(inode, prealloc); - ret = btrfs_write_out_ino_cache(root, trans, path); + ret = btrfs_write_out_ino_cache(root, trans, path, inode); out_put: iput(inode); out_release: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 51e3afa78354..f1a77449d032 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,7 +43,6 @@ #include <linux/btrfs.h> #include <linux/blkdev.h> #include <linux/posix_acl_xattr.h> -#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -844,7 +843,10 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(btrfs_is_free_space_inode(inode)); + if (btrfs_is_free_space_inode(inode)) { + WARN_ON_ONCE(1); + return -EINVAL; + } num_bytes = ALIGN(end - start + 1, blocksize); num_bytes = max(blocksize, num_bytes); @@ -1178,10 +1180,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, while (1) { ret = btrfs_lookup_file_extent(trans, root, path, ino, cur_offset, 0); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + if (ret < 0) goto error; - } if (ret > 0 && path->slots[0] > 0 && check_prev) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, @@ -1195,10 +1195,8 @@ next_slot: leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + if (ret < 0) goto error; - } if (ret > 0) break; leaf = path->nodes[0]; @@ -1289,10 +1287,8 @@ out_check: ret = cow_file_range(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written, 1); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto error; - } cow_start = (u64)-1; } @@ -1339,10 +1335,8 @@ out_check: BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, cur_offset, num_bytes); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto error; - } } extent_clear_unlock_delalloc(inode, cur_offset, @@ -1364,10 +1358,8 @@ out_check: if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, page_started, nr_written, 1); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto error; - } } error: @@ -1551,7 +1543,13 @@ static void btrfs_clear_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } - if (*bits & EXTENT_DO_ACCOUNTING) + /* + * We don't reserve metadata space for space cache inodes so we + * don't need to call dellalloc_release_metadata if there is an + * error. + */ + if (*bits & EXTENT_DO_ACCOUNTING && + root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID @@ -2041,10 +2039,8 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, key.offset = offset; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - WARN_ON(1); + if (WARN_ON(ret < 0)) return ret; - } ret = 0; while (1) { @@ -2133,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, old->extent_offset, fs_info, path, record_one_backref, old); - BUG_ON(ret < 0 && ret != -ENOENT); + if (ret < 0 && ret != -ENOENT) + return false; /* no backref to be processed for this extent */ if (!old->count) { @@ -2367,10 +2364,23 @@ out_unlock: return ret; } +static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) +{ + struct old_sa_defrag_extent *old, *tmp; + + if (!new) + return; + + list_for_each_entry_safe(old, tmp, &new->head, list) { + list_del(&old->list); + kfree(old); + } + kfree(new); +} + static void relink_file_extents(struct new_sa_defrag_extent *new) { struct btrfs_path *path; - struct old_sa_defrag_extent *old, *tmp; struct sa_defrag_extent_backref *backref; struct sa_defrag_extent_backref *prev = NULL; struct inode *inode; @@ -2413,16 +2423,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new) kfree(prev); btrfs_free_path(path); - - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } out: + free_sa_defrag_extent(new); + atomic_dec(&root->fs_info->defrag_running); wake_up(&root->fs_info->transaction_wait); - - kfree(new); } static struct new_sa_defrag_extent * @@ -2432,7 +2437,7 @@ record_old_file_extents(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; struct btrfs_key key; - struct old_sa_defrag_extent *old, *tmp; + struct old_sa_defrag_extent *old; struct new_sa_defrag_extent *new; int ret; @@ -2480,7 +2485,7 @@ record_old_file_extents(struct inode *inode, if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret < 0) - goto out_free_list; + goto out_free_path; else if (ret > 0) break; continue; @@ -2509,7 +2514,7 @@ record_old_file_extents(struct inode *inode, old = kmalloc(sizeof(*old), GFP_NOFS); if (!old) - goto out_free_list; + goto out_free_path; offset = max(new->file_pos, key.offset); end = min(new->file_pos + new->len, key.offset + num_bytes); @@ -2531,15 +2536,10 @@ next: return new; -out_free_list: - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } out_free_path: btrfs_free_path(path); out_kfree: - kfree(new); + free_sa_defrag_extent(new); return NULL; } @@ -2710,8 +2710,14 @@ out: btrfs_remove_ordered_extent(inode, ordered_extent); /* for snapshot-aware defrag */ - if (new) - relink_file_extents(new); + if (new) { + if (ret) { + free_sa_defrag_extent(new); + atomic_dec(&root->fs_info->defrag_running); + } else { + relink_file_extents(new); + } + } /* once for us */ btrfs_put_ordered_extent(ordered_extent); @@ -2969,6 +2975,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret) { + atomic_dec(&root->orphan_inodes); if (reserve) { clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &BTRFS_I(inode)->runtime_flags); @@ -3018,14 +3025,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, release_rsv = 1; spin_unlock(&root->orphan_lock); - if (trans && delete_item) - ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - - if (release_rsv) { - btrfs_orphan_release_metadata(inode); + if (delete_item) { atomic_dec(&root->orphan_inodes); + if (trans) + ret = btrfs_del_orphan_item(trans, root, + btrfs_ino(inode)); } + if (release_rsv) + btrfs_orphan_release_metadata(inode); + return ret; } @@ -3172,8 +3181,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { - if (!S_ISREG(inode->i_mode)) { - WARN_ON(1); + if (WARN_ON(!S_ISREG(inode->i_mode))) { iput(inode); continue; } @@ -3636,7 +3644,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, int ret; ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) { - btrfs_drop_nlink(inode); + drop_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } return ret; @@ -4230,15 +4238,16 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) while (1) { struct btrfs_ordered_extent *ordered; - btrfs_wait_ordered_range(inode, hole_start, - block_end - hole_start); + lock_extent_bits(io_tree, hole_start, block_end - 1, 0, &cached_state); - ordered = btrfs_lookup_ordered_extent(inode, hole_start); + ordered = btrfs_lookup_ordered_range(inode, hole_start, + block_end - hole_start); if (!ordered) break; unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, GFP_NOFS); + btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } @@ -4472,8 +4481,10 @@ void btrfs_evict_inode(struct inode *inode) trace_btrfs_inode_evict(inode); truncate_inode_pages(&inode->i_data, 0); - if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - btrfs_is_free_space_inode(inode))) + if (inode->i_nlink && + ((btrfs_root_refs(&root->root_item) != 0 && + root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) || + btrfs_is_free_space_inode(inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4490,7 +4501,8 @@ void btrfs_evict_inode(struct inode *inode) } if (inode->i_nlink > 0) { - BUG_ON(btrfs_root_refs(&root->root_item) != 0); + BUG_ON(btrfs_root_refs(&root->root_item) != 0 && + root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID); goto no_delete; } @@ -4731,14 +4743,7 @@ static void inode_tree_del(struct inode *inode) } spin_unlock(&root->inode_lock); - /* - * Free space cache has inodes in the tree root, but the tree root has a - * root_refs of 0, so this could end up dropping the tree root as a - * snapshot, so we need the extra !root->fs_info->tree_root check to - * make sure we don't drop it. - */ - if (empty && btrfs_root_refs(&root->root_item) == 0 && - root != root->fs_info->tree_root) { + if (empty && btrfs_root_refs(&root->root_item) == 0) { synchronize_srcu(&root->fs_info->subvol_srcu); spin_lock(&root->inode_lock); empty = RB_EMPTY_ROOT(&root->inode_tree); @@ -4831,10 +4836,12 @@ static struct inode *btrfs_iget_locked(struct super_block *s, { struct inode *inode; struct btrfs_iget_args args; + unsigned long hashval = btrfs_inode_hash(objectid, root); + args.ino = objectid; args.root = root; - inode = iget5_locked(s, objectid, btrfs_find_actor, + inode = iget5_locked(s, hashval, btrfs_find_actor, btrfs_init_locked_inode, (void *)&args); return inode; @@ -5048,7 +5055,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) continue; } - item = btrfs_item_nr(leaf, slot); + item = btrfs_item_nr(slot); btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid != key.objectid) @@ -5454,7 +5461,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_INODE_NODATASUM; } - insert_inode_hash(inode); + btrfs_insert_inode_hash(inode); inode_tree_add(inode); trace_btrfs_inode_new(inode); @@ -5730,7 +5737,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; } - btrfs_inc_nlink(inode); + inc_nlink(inode); inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; ihold(inode); @@ -5860,7 +5867,7 @@ static noinline int uncompress_inline(struct btrfs_path *path, compress_type = btrfs_file_extent_compression(leaf, item); max_size = btrfs_file_extent_ram_bytes(leaf, item); inline_size = btrfs_file_extent_inline_item_len(leaf, - btrfs_item_nr(leaf, path->slots[0])); + btrfs_item_nr(path->slots[0])); tmp = kmalloc(inline_size, GFP_NOFS); if (!tmp) return -ENOMEM; @@ -5974,7 +5981,14 @@ again: found_type = btrfs_key_type(&found_key); if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { - goto not_found; + /* + * If we backup past the first extent we want to move forward + * and see if there is an extent in front of us, otherwise we'll + * say there is a hole for our whole search range which can + * cause problems. + */ + extent_end = start; + goto next; } found_type = btrfs_file_extent_type(leaf, item); @@ -5989,7 +6003,7 @@ again: size = btrfs_file_extent_inline_len(leaf, item); extent_end = ALIGN(extent_start + size, root->sectorsize); } - +next: if (start >= extent_end) { path->slots[0]++; if (path->slots[0] >= btrfs_header_nritems(leaf)) { @@ -6173,8 +6187,7 @@ insert: write_unlock(&em_tree->lock); out: - if (em) - trace_btrfs_get_extent(root, em); + trace_btrfs_get_extent(root, em); if (path) btrfs_free_path(path); @@ -6249,7 +6262,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag /* adjust the range_start to make sure it doesn't * go backwards from the start they passed in */ - range_start = max(start,range_start); + range_start = max(start, range_start); found = found_end - range_start; if (found > 0) { @@ -7053,7 +7066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, } } else { submit_len += bvec->bv_len; - nr_pages ++; + nr_pages++; bvec++; } } @@ -7222,7 +7235,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, * outstanding dirty pages are on disk. */ count = iov_length(iov, nr_segs); - btrfs_wait_ordered_range(inode, offset, count); + ret = btrfs_wait_ordered_range(inode, offset, count); + if (ret) + return ret; if (rw & WRITE) { /* @@ -7563,7 +7578,10 @@ static int btrfs_truncate(struct inode *inode) u64 mask = root->sectorsize - 1; u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); - btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask), + (u64)-1); + if (ret) + return ret; /* * Yes ladies and gentelment, this is indeed ugly. The fact is we have @@ -7787,6 +7805,14 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return inode; } +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_destroy_inode(struct inode *inode) +{ + btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} +#endif + static void btrfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -7857,8 +7883,7 @@ int btrfs_drop_inode(struct inode *inode) return 1; /* the snap/subvol tree is on deleting */ - if (btrfs_root_refs(&root->root_item) == 0 && - root != root->fs_info->tree_root) + if (btrfs_root_refs(&root->root_item) == 0) return 1; else return generic_drop_inode(inode); @@ -7995,8 +8020,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret == -EEXIST) { /* we shouldn't get * eexist without a new_inode */ - if (!new_inode) { - WARN_ON(1); + if (WARN_ON(!new_inode)) { return ret; } } else { @@ -8144,18 +8168,24 @@ out_notrans: static void btrfs_run_delalloc_work(struct btrfs_work *work) { struct btrfs_delalloc_work *delalloc_work; + struct inode *inode; delalloc_work = container_of(work, struct btrfs_delalloc_work, work); - if (delalloc_work->wait) - btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1); - else - filemap_flush(delalloc_work->inode->i_mapping); + inode = delalloc_work->inode; + if (delalloc_work->wait) { + btrfs_wait_ordered_range(inode, 0, (u64)-1); + } else { + filemap_flush(inode->i_mapping); + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_flush(inode->i_mapping); + } if (delalloc_work->delay_iput) - btrfs_add_delayed_iput(delalloc_work->inode); + btrfs_add_delayed_iput(inode); else - iput(delalloc_work->inode); + iput(inode); complete(&delalloc_work->completion); } @@ -8276,8 +8306,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) return ret; } -int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, - int delay_iput) +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) { struct btrfs_root *root; struct list_head splice; @@ -8337,14 +8366,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, int err; int drop_inode = 0; u64 objectid; - u64 index = 0 ; + u64 index = 0; int name_len; int datasize; unsigned long ptr; struct btrfs_file_extent_item *ei; struct extent_buffer *leaf; - name_len = strlen(symname) + 1; + name_len = strlen(symname); if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; @@ -8432,7 +8461,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode_set_bytes(inode, name_len); - btrfs_i_size_write(inode, name_len - 1); + btrfs_i_size_write(inode, name_len); err = btrfs_update_inode(trans, root, inode); if (err) drop_inode = 1; @@ -8491,6 +8520,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); if (ret) { + btrfs_free_reserved_extent(root, ins.objectid, + ins.offset); btrfs_abort_transaction(trans, root, ret); if (own_trans) btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9d46f60cb943..a111622598b0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -44,7 +44,6 @@ #include <linux/uuid.h> #include <linux/btrfs.h> #include <linux/uaccess.h> -#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -321,7 +320,7 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) { - struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); + struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); struct btrfs_device *device; struct request_queue *q; struct fstrim_range range; @@ -369,9 +368,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) int btrfs_is_empty_uuid(u8 *uuid) { - static char empty_uuid[BTRFS_UUID_SIZE] = {0}; + int i; - return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); + for (i = 0; i < BTRFS_UUID_SIZE; i++) { + if (uuid[i]) + return 0; + } + return 1; } static noinline int create_subvol(struct inode *dir, @@ -436,7 +439,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), + write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(leaf), @@ -574,7 +577,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) return ret; - btrfs_wait_ordered_extents(root); + btrfs_wait_ordered_extents(root, -1); pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) @@ -688,7 +691,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) * nfs_async_unlink(). */ -static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) { int error; @@ -842,7 +845,6 @@ static int find_new_extents(struct btrfs_root *root, { struct btrfs_path *path; struct btrfs_key min_key; - struct btrfs_key max_key; struct extent_buffer *leaf; struct btrfs_file_extent_item *extent; int type; @@ -857,15 +859,10 @@ static int find_new_extents(struct btrfs_root *root, min_key.type = BTRFS_EXTENT_DATA_KEY; min_key.offset = *off; - max_key.objectid = ino; - max_key.type = (u8)-1; - max_key.offset = (u64)-1; - path->keep_locks = 1; - while(1) { - ret = btrfs_search_forward(root, &min_key, &max_key, - path, newer_than); + while (1) { + ret = btrfs_search_forward(root, &min_key, path, newer_than); if (ret != 0) goto none; if (min_key.objectid != ino) @@ -1206,7 +1203,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra = &file->f_ra; } - pages = kmalloc(sizeof(struct page *) * max_cluster, + pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_NOFS); if (!pages) { ret = -ENOMEM; @@ -1893,7 +1890,6 @@ static noinline int search_ioctl(struct inode *inode, { struct btrfs_root *root; struct btrfs_key key; - struct btrfs_key max_key; struct btrfs_path *path; struct btrfs_ioctl_search_key *sk = &args->key; struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; @@ -1925,15 +1921,10 @@ static noinline int search_ioctl(struct inode *inode, key.type = sk->min_type; key.offset = sk->min_offset; - max_key.objectid = sk->max_objectid; - max_key.type = sk->max_type; - max_key.offset = sk->max_offset; - path->keep_locks = 1; - while(1) { - ret = btrfs_search_forward(root, &key, &max_key, path, - sk->min_transid); + while (1) { + ret = btrfs_search_forward(root, &key, path, sk->min_transid); if (ret != 0) { if (ret > 0) ret = 0; @@ -2018,7 +2009,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; - while(1) { + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -2047,7 +2038,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, } *(ptr + len) = '/'; - read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); + read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); if (key.offset == BTRFS_FIRST_FREE_OBJECTID) break; @@ -2058,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, dirid = key.objectid; } memmove(name, ptr, total_len); - name[total_len]='\0'; + name[total_len] = '\0'; ret = 0; out: btrfs_free_path(path); @@ -2098,7 +2089,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, static noinline int btrfs_ioctl_snap_destroy(struct file *file, void __user *arg) { - struct dentry *parent = fdentry(file); + struct dentry *parent = file->f_path.dentry; struct dentry *dentry; struct inode *dir = parent->d_inode; struct inode *inode; @@ -2144,7 +2135,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode = dentry->d_inode; dest = BTRFS_I(inode)->root; - if (!capable(CAP_SYS_ADMIN)){ + if (!capable(CAP_SYS_ADMIN)) { /* * Regular user. Only allow this with a special mount * option, when the user has write+exec access to the @@ -2727,15 +2718,10 @@ static long btrfs_ioctl_file_extent_same(struct file *file, size = sizeof(tmp) + tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info); - same = kmalloc(size, GFP_NOFS); - if (!same) { - ret = -EFAULT; - goto out; - } + same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size); - if (copy_from_user(same, - (struct btrfs_ioctl_same_args __user *)argp, size)) { - ret = -EFAULT; + if (IS_ERR(same)) { + ret = PTR_ERR(same); goto out; } @@ -3119,7 +3105,7 @@ out: static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct fd src_file; struct inode *src; @@ -3679,9 +3665,10 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - + if (root->fs_info->sb->s_flags & MS_RDONLY) { + ret = -EROFS; + goto out; + } if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { @@ -3707,7 +3694,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) if (copy_to_user(arg, p, sizeof(*p))) ret = -EFAULT; - +out: kfree(p); return ret; } @@ -4317,7 +4304,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4557,9 +4544,15 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_logical_to_ino(root, argp); case BTRFS_IOC_SPACE_INFO: return btrfs_ioctl_space_info(root, argp); - case BTRFS_IOC_SYNC: - btrfs_sync_fs(file->f_dentry->d_sb, 1); - return 0; + case BTRFS_IOC_SYNC: { + int ret; + + ret = btrfs_start_delalloc_roots(root->fs_info, 0); + if (ret) + return ret; + ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); + return ret; + } case BTRFS_IOC_START_SYNC: return btrfs_ioctl_start_sync(root, argp); case BTRFS_IOC_WAIT_SYNC: diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c702cb62f78a..69582d5b69d1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -537,7 +537,9 @@ void btrfs_remove_ordered_extent(struct inode *inode, */ if (RB_EMPTY_ROOT(&tree->tree) && !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { + spin_lock(&root->fs_info->ordered_root_lock); list_del_init(&BTRFS_I(inode)->ordered_operations); + spin_unlock(&root->fs_info->ordered_root_lock); } if (!root->nr_ordered_extents) { @@ -563,10 +565,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) * wait for all the ordered extents in a root. This is done when balancing * space between drives. */ -void btrfs_wait_ordered_extents(struct btrfs_root *root) +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) { struct list_head splice, works; struct btrfs_ordered_extent *ordered, *next; + int count = 0; INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&works); @@ -574,7 +577,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root) mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->ordered_extent_lock); list_splice_init(&root->ordered_extents, &splice); - while (!list_empty(&splice)) { + while (!list_empty(&splice) && nr) { ordered = list_first_entry(&splice, struct btrfs_ordered_extent, root_extent_list); list_move_tail(&ordered->root_extent_list, @@ -589,7 +592,11 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root) cond_resched(); spin_lock(&root->ordered_extent_lock); + if (nr != -1) + nr--; + count++; } + list_splice_tail(&splice, &root->ordered_extents); spin_unlock(&root->ordered_extent_lock); list_for_each_entry_safe(ordered, next, &works, work_list) { @@ -599,18 +606,21 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root) cond_resched(); } mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return count; } -void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info) +void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) { struct btrfs_root *root; struct list_head splice; + int done; INIT_LIST_HEAD(&splice); spin_lock(&fs_info->ordered_root_lock); list_splice_init(&fs_info->ordered_roots, &splice); - while (!list_empty(&splice)) { + while (!list_empty(&splice) && nr) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); root = btrfs_grab_fs_root(root); @@ -619,11 +629,16 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info) &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); - btrfs_wait_ordered_extents(root); + done = btrfs_wait_ordered_extents(root, nr); btrfs_put_fs_root(root); spin_lock(&fs_info->ordered_root_lock); + if (nr != -1) { + nr -= done; + WARN_ON(nr < 0); + } } + list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); } @@ -734,8 +749,9 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * Used to wait on ordered extents across a large range of bytes. */ -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { + int ret = 0; u64 end; u64 orig_end; struct btrfs_ordered_extent *ordered; @@ -751,8 +767,9 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) /* start IO across the range first to instantiate any delalloc * extents */ - filemap_fdatawrite_range(inode->i_mapping, start, orig_end); - + ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + if (ret) + return ret; /* * So with compression we will find and lock a dirty page and clear the * first one as dirty, setup an async extent, and immediately return @@ -768,10 +785,15 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) * right and you are wrong. */ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) - filemap_fdatawrite_range(inode->i_mapping, start, orig_end); - - filemap_fdatawait_range(inode->i_mapping, start, orig_end); + &BTRFS_I(inode)->runtime_flags)) { + ret = filemap_fdatawrite_range(inode->i_mapping, start, + orig_end); + if (ret) + return ret; + } + ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); + if (ret) + return ret; end = orig_end; while (1) { @@ -782,17 +804,20 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) btrfs_put_ordered_extent(ordered); break; } - if (ordered->file_offset + ordered->len < start) { + if (ordered->file_offset + ordered->len <= start) { btrfs_put_ordered_extent(ordered); break; } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) + ret = -EIO; btrfs_put_ordered_extent(ordered); - if (end == 0 || end == start) + if (ret || end == 0 || end == start) break; end--; } + return ret; } /* @@ -1076,7 +1101,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, * if this file hasn't been changed since the last transaction * commit, we can safely return without doing anything */ - if (last_mod < root->fs_info->last_trans_committed) + if (last_mod <= root->fs_info->last_trans_committed) return; spin_lock(&root->fs_info->ordered_root_lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 0c0b35612d7a..9b0450f7ac20 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -180,7 +180,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry, int wait); -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, @@ -195,8 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); -void btrfs_wait_ordered_extents(struct btrfs_root *root); -void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info); +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); +void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0088bedc8631..417053b17181 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -193,7 +193,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = btrfs_item_nr(l, i); + item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index d0ecfbd9cc9f..24ac21840a9a 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -33,7 +33,6 @@ #include <linux/raid/xor.h> #include <linux/vmalloc.h> #include <asm/div64.h> -#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4a355726151e..ce459a7cb16d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1383,6 +1383,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, { struct btrfs_root *reloc_root; struct reloc_control *rc = root->fs_info->reloc_ctl; + struct btrfs_block_rsv *rsv; int clear_rsv = 0; int ret; @@ -1396,13 +1397,14 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) return 0; - if (!trans->block_rsv) { + if (!trans->reloc_reserved) { + rsv = trans->block_rsv; trans->block_rsv = rc->block_rsv; clear_rsv = 1; } reloc_root = create_reloc_root(trans, root, root->root_key.objectid); if (clear_rsv) - trans->block_rsv = NULL; + trans->block_rsv = rsv; ret = __add_reloc_root(reloc_root); BUG_ON(ret < 0); @@ -1775,8 +1777,7 @@ again: new_ptr_gen = 0; } - if (new_bytenr > 0 && new_bytenr == old_bytenr) { - WARN_ON(1); + if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { ret = level; break; } @@ -2058,7 +2059,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, LIST_HEAD(inode_list); struct btrfs_key key; struct btrfs_key next_key; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; struct btrfs_path *path; @@ -2107,18 +2108,19 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, memset(&next_key, 0, sizeof(next_key)); while (1) { - trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); - trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, BTRFS_RESERVE_FLUSH_ALL); if (ret) { - BUG_ON(ret != -EAGAIN); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - continue; + err = ret; + goto out; } + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + goto out; + } + trans->block_rsv = rc->block_rsv; replaced = 0; max_level = level; @@ -2164,6 +2166,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, root_item->drop_level = level; btrfs_end_transaction_throttle(trans, root); + trans = NULL; btrfs_btree_balance_dirty(root); @@ -2192,7 +2195,8 @@ out: btrfs_update_reloc_root(trans, root); } - btrfs_end_transaction_throttle(trans, root); + if (trans) + btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root); @@ -3258,7 +3262,7 @@ static int add_tree_block(struct reloc_control *rc, struct rb_node *rb_node; u32 item_size; int level = -1; - int generation; + u64 generation; eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, path->slots[0]); @@ -3407,7 +3411,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, struct inode *inode, u64 ino) { struct btrfs_key key; - struct btrfs_path *path; struct btrfs_root *root = fs_info->tree_root; struct btrfs_trans_handle *trans; int ret = 0; @@ -3432,22 +3435,14 @@ truncate: if (ret) goto out; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - btrfs_free_path(path); ret = PTR_ERR(trans); goto out; } - ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + ret = btrfs_truncate_free_space_cache(root, trans, inode); - btrfs_free_path(path); btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); out: @@ -3549,10 +3544,8 @@ static int find_data_references(struct reloc_control *rc, err = ret; goto out; } - if (ret > 0) { - WARN_ON(1); + if (WARN_ON(ret > 0)) goto out; - } leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3572,11 +3565,9 @@ static int find_data_references(struct reloc_control *rc, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid != ref_objectid || - key.type != BTRFS_EXTENT_DATA_KEY) { - WARN_ON(1); + if (WARN_ON(key.objectid != ref_objectid || + key.type != BTRFS_EXTENT_DATA_KEY)) break; - } fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -4001,16 +3992,6 @@ restart: } } - ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); - if (ret < 0) { - if (ret != -ENOSPC) { - err = ret; - WARN_ON(1); - break; - } - rc->commit_transaction = 1; - } - if (rc->commit_transaction) { rc->commit_transaction = 0; ret = btrfs_commit_transaction(trans, rc->extent_root); @@ -4241,12 +4222,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", rc->block_group->key.objectid, rc->block_group->flags); - ret = btrfs_start_all_delalloc_inodes(fs_info, 0); + ret = btrfs_start_delalloc_roots(fs_info, 0); if (ret < 0) { err = ret; goto out; } - btrfs_wait_all_ordered_extents(fs_info); + btrfs_wait_ordered_roots(fs_info, -1); while (1) { mutex_lock(&fs_info->cleaner_mutex); @@ -4264,7 +4245,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { - btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); + ret = btrfs_wait_ordered_range(rc->data_inode, 0, + (u64)-1); + if (ret) { + err = ret; + goto out; + } invalidate_mapping_pages(rc->data_inode->i_mapping, 0, -1); rc->stage = UPDATE_DATA_PTRS; @@ -4481,6 +4467,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) struct btrfs_root *root = BTRFS_I(inode)->root; int ret; u64 disk_bytenr; + u64 new_bytenr; LIST_HEAD(list); ordered = btrfs_lookup_ordered_extent(inode, file_pos); @@ -4492,13 +4479,24 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) if (ret) goto out; - disk_bytenr = ordered->start; while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); list_del_init(&sums->list); - sums->bytenr = disk_bytenr; - disk_bytenr += sums->len; + /* + * We need to offset the new_bytenr based on where the csum is. + * We need to do this because we will read in entire prealloc + * extents but we may have written to say the middle of the + * prealloc extent, so we need to make sure the csum goes with + * the right disk offset. + * + * We can do this because the data reloc inode refers strictly + * to the on disk bytes, so we don't have to worry about + * disk_len vs real len like with real inodes since it's all + * disk length. + */ + new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); + sums->bytenr = new_bytenr; btrfs_add_ordered_sum(inode, ordered, sums); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a18e0e23f6a6..561e2f16ba3e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) BTRFS_DEV_STAT_CORRUPTION_ERRS); } - if (sctx->readonly && !sctx->is_dev_replace) - goto did_not_correct_error; + if (sctx->readonly) { + ASSERT(!sctx->is_dev_replace); + goto out; + } if (!is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; @@ -2717,8 +2719,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, mutex_unlock(&fs_info->scrub_lock); wake_up(&fs_info->scrub_pause_wait); - dev_replace->cursor_left = dev_replace->cursor_right; - dev_replace->item_needs_writeback = 1; btrfs_put_block_group(cache); if (ret) break; @@ -2732,6 +2732,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, break; } + dev_replace->cursor_left = dev_replace->cursor_right; + dev_replace->item_needs_writeback = 1; + key.offset = found_key.offset + length; btrfs_release_path(path); } @@ -2783,7 +2786,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, { int ret = 0; - mutex_lock(&fs_info->scrub_lock); if (fs_info->scrub_workers_refcnt == 0) { if (is_dev_replace) btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, @@ -2813,21 +2815,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, } ++fs_info->scrub_workers_refcnt; out: - mutex_unlock(&fs_info->scrub_lock); - return ret; } static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) { - mutex_lock(&fs_info->scrub_lock); if (--fs_info->scrub_workers_refcnt == 0) { btrfs_stop_workers(&fs_info->scrub_workers); btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); btrfs_stop_workers(&fs_info->scrub_nocow_workers); } WARN_ON(fs_info->scrub_workers_refcnt < 0); - mutex_unlock(&fs_info->scrub_lock); } int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, @@ -2888,23 +2886,18 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -EINVAL; } - ret = scrub_workers_get(fs_info, is_dev_replace); - if (ret) - return ret; mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info, devid, NULL, NULL); if (!dev || (dev->missing && !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); - scrub_workers_put(fs_info); return -ENODEV; } - mutex_lock(&fs_info->scrub_lock); + mutex_lock(&fs_info->scrub_lock); if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); - scrub_workers_put(fs_info); return -EIO; } @@ -2915,10 +2908,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, btrfs_dev_replace_unlock(&fs_info->dev_replace); mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); - scrub_workers_put(fs_info); return -EINPROGRESS; } btrfs_dev_replace_unlock(&fs_info->dev_replace); + + ret = scrub_workers_get(fs_info, is_dev_replace); + if (ret) { + mutex_unlock(&fs_info->scrub_lock); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + return ret; + } + sctx = scrub_setup_ctx(dev, is_dev_replace); if (IS_ERR(sctx)) { mutex_unlock(&fs_info->scrub_lock); @@ -2931,13 +2931,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, atomic_inc(&fs_info->scrubs_running); mutex_unlock(&fs_info->scrub_lock); - mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (!is_dev_replace) { - down_read(&fs_info->scrub_super_lock); + /* + * by holding device list mutex, we can + * kick off writing super in log tree sync. + */ ret = scrub_supers(sctx, dev); - up_read(&fs_info->scrub_super_lock); } + mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (!ret) ret = scrub_enumerate_chunks(sctx, dev, start, end, @@ -2954,10 +2956,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->scrub_lock); dev->scrub_device = NULL; + scrub_workers_put(fs_info); mutex_unlock(&fs_info->scrub_lock); scrub_free_ctx(sctx); - scrub_workers_put(fs_info); return ret; } @@ -2987,16 +2989,6 @@ void btrfs_scrub_continue(struct btrfs_root *root) wake_up(&fs_info->scrub_pause_wait); } -void btrfs_scrub_pause_super(struct btrfs_root *root) -{ - down_write(&root->fs_info->scrub_super_lock); -} - -void btrfs_scrub_continue_super(struct btrfs_root *root) -{ - up_write(&root->fs_info->scrub_super_lock); -} - int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->scrub_lock); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index e46e0ed74925..6837fe87f3a6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -121,7 +121,6 @@ struct send_ctx { struct list_head name_cache_list; int name_cache_size; - struct file *cur_inode_filp; char *read_buf; }; @@ -565,10 +564,8 @@ static int begin_cmd(struct send_ctx *sctx, int cmd) { struct btrfs_cmd_header *hdr; - if (!sctx->send_buf) { - WARN_ON(1); + if (WARN_ON(!sctx->send_buf)) return -EINVAL; - } BUG_ON(sctx->send_size); @@ -791,7 +788,7 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, if (found_key->type == BTRFS_INODE_REF_KEY) { ptr = (unsigned long)btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); - item = btrfs_item_nr(eb, slot); + item = btrfs_item_nr(slot); total = btrfs_item_size(eb, item); elem_size = sizeof(*iref); } else { @@ -905,7 +902,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, eb = path->nodes[0]; slot = path->slots[0]; - item = btrfs_item_nr(eb, slot); + item = btrfs_item_nr(slot); di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); cur = 0; len = 0; @@ -2120,77 +2117,6 @@ out: } /* - * Called for regular files when sending extents data. Opens a struct file - * to read from the file. - */ -static int open_cur_inode_file(struct send_ctx *sctx) -{ - int ret = 0; - struct btrfs_key key; - struct path path; - struct inode *inode; - struct dentry *dentry; - struct file *filp; - int new = 0; - - if (sctx->cur_inode_filp) - goto out; - - key.objectid = sctx->cur_ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root, - &new); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - goto out; - } - - dentry = d_obtain_alias(inode); - inode = NULL; - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - goto out; - } - - path.mnt = sctx->mnt; - path.dentry = dentry; - filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred()); - dput(dentry); - dentry = NULL; - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); - goto out; - } - sctx->cur_inode_filp = filp; - -out: - /* - * no xxxput required here as every vfs op - * does it by itself on failure - */ - return ret; -} - -/* - * Closes the struct file that was created in open_cur_inode_file - */ -static int close_cur_inode_file(struct send_ctx *sctx) -{ - int ret = 0; - - if (!sctx->cur_inode_filp) - goto out; - - ret = filp_close(sctx->cur_inode_filp, NULL); - sctx->cur_inode_filp = NULL; - -out: - return ret; -} - -/* * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace */ static int send_subvol_begin(struct send_ctx *sctx) @@ -3622,6 +3548,72 @@ out: return ret; } +static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) +{ + struct btrfs_root *root = sctx->send_root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct inode *inode; + struct page *page; + char *addr; + struct btrfs_key key; + pgoff_t index = offset >> PAGE_CACHE_SHIFT; + pgoff_t last_index; + unsigned pg_offset = offset & ~PAGE_CACHE_MASK; + ssize_t ret = 0; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + inode = btrfs_iget(fs_info->sb, &key, root, NULL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (offset + len > i_size_read(inode)) { + if (offset > i_size_read(inode)) + len = 0; + else + len = offset - i_size_read(inode); + } + if (len == 0) + goto out; + + last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; + while (index <= last_index) { + unsigned cur_len = min_t(unsigned, len, + PAGE_CACHE_SIZE - pg_offset); + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + break; + } + + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + ret = -EIO; + break; + } + } + + addr = kmap(page); + memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); + kunmap(page); + unlock_page(page); + page_cache_release(page); + index++; + pg_offset = 0; + len -= cur_len; + ret += cur_len; + } +out: + iput(inode); + return ret; +} + /* * Read some bytes from the current inode/file and send a write command to * user space. @@ -3630,35 +3622,20 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) { int ret = 0; struct fs_path *p; - loff_t pos = offset; - int num_read = 0; - mm_segment_t old_fs; + ssize_t num_read = 0; p = fs_path_alloc(); if (!p) return -ENOMEM; - /* - * vfs normally only accepts user space buffers for security reasons. - * we only read from the file and also only provide the read_buf buffer - * to vfs. As this buffer does not come from a user space call, it's - * ok to temporary allow kernel space buffers. - */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); - ret = open_cur_inode_file(sctx); - if (ret < 0) - goto out; - - ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); - if (ret < 0) - goto out; - num_read = ret; - if (!num_read) + num_read = fill_read_buf(sctx, offset, len); + if (num_read <= 0) { + if (num_read < 0) + ret = num_read; goto out; + } ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); if (ret < 0) @@ -3677,7 +3654,6 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); tlv_put_failure: out: fs_path_free(p); - set_fs(old_fs); if (ret < 0) return ret; return num_read; @@ -3926,16 +3902,16 @@ static int is_extent_unchanged(struct send_ctx *sctx, while (key.offset < ekey->offset + left_len) { ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); right_type = btrfs_file_extent_type(eb, ei); - right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); - right_len = btrfs_file_extent_num_bytes(eb, ei); - right_offset = btrfs_file_extent_offset(eb, ei); - right_gen = btrfs_file_extent_generation(eb, ei); - if (right_type != BTRFS_FILE_EXTENT_REG) { ret = 0; goto out; } + right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); + right_len = btrfs_file_extent_num_bytes(eb, ei); + right_offset = btrfs_file_extent_offset(eb, ei); + right_gen = btrfs_file_extent_generation(eb, ei); + /* * Are we at extent 8? If yes, we know the extent is changed. * This may only happen on the first iteration. @@ -4222,10 +4198,6 @@ static int changed_inode(struct send_ctx *sctx, u64 left_gen = 0; u64 right_gen = 0; - ret = close_cur_inode_file(sctx); - if (ret < 0) - goto out; - sctx->cur_ino = key->objectid; sctx->cur_inode_new_gen = 0; @@ -4686,11 +4658,6 @@ static int send_subvol(struct send_ctx *sctx) } out: - if (!ret) - ret = close_cur_inode_file(sctx); - else - close_cur_inode_file(sctx); - free_recorded_refs(sctx); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e913328d0f2a..2d8ac1bf0cf9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -42,7 +42,6 @@ #include <linux/cleancache.h> #include <linux/ratelimit.h> #include <linux/btrfs.h> -#include "compat.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" @@ -921,7 +920,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } - btrfs_wait_all_ordered_extents(fs_info); + btrfs_wait_ordered_roots(fs_info, -1); trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { @@ -1330,6 +1329,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) * this also happens on 'umount -rf' or on shutdown, when * the filesystem is busy. */ + + /* wait for the uuid_scan task to finish */ + down(&fs_info->uuid_tree_rescan_sem); + /* avoid complains from lockdep et al. */ + up(&fs_info->uuid_tree_rescan_sem); + sb->s_flags |= MS_RDONLY; btrfs_dev_replace_suspend_for_unmount(fs_info); @@ -1465,7 +1470,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) nr_devices = fs_info->fs_devices->open_devices; BUG_ON(!nr_devices); - devices_info = kmalloc(sizeof(*devices_info) * nr_devices, + devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), GFP_NOFS); if (!devices_info) return -ENOMEM; @@ -1789,7 +1794,25 @@ static void btrfs_print_info(void) static int btrfs_run_sanity_tests(void) { - return btrfs_test_free_space_cache(); + int ret; + + ret = btrfs_init_test_fs(); + if (ret) + return ret; + + ret = btrfs_test_free_space_cache(); + if (ret) + goto out; + ret = btrfs_test_extent_buffer_operations(); + if (ret) + goto out; + ret = btrfs_test_extent_io(); + if (ret) + goto out; + ret = btrfs_test_inodes(); +out: + btrfs_destroy_test_fs(); + return ret; } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c new file mode 100644 index 000000000000..757ef00a75a4 --- /dev/null +++ b/fs/btrfs/tests/btrfs-tests.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/magic.h> +#include "btrfs-tests.h" +#include "../ctree.h" + +static struct vfsmount *test_mnt = NULL; + +static const struct super_operations btrfs_test_super_ops = { + .alloc_inode = btrfs_alloc_inode, + .destroy_inode = btrfs_test_destroy_inode, +}; + +static struct dentry *btrfs_test_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops, + NULL, BTRFS_TEST_MAGIC); +} + +static struct file_system_type test_type = { + .name = "btrfs_test_fs", + .mount = btrfs_test_mount, + .kill_sb = kill_anon_super, +}; + +struct inode *btrfs_new_test_inode(void) +{ + return new_inode(test_mnt->mnt_sb); +} + +int btrfs_init_test_fs(void) +{ + int ret; + + ret = register_filesystem(&test_type); + if (ret) { + printk(KERN_ERR "btrfs: cannot register test file system\n"); + return ret; + } + + test_mnt = kern_mount(&test_type); + if (IS_ERR(test_mnt)) { + printk(KERN_ERR "btrfs: cannot mount test file system\n"); + unregister_filesystem(&test_type); + return ret; + } + return 0; +} + +void btrfs_destroy_test_fs(void) +{ + kern_unmount(test_mnt); + unregister_filesystem(&test_type); +} diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 580877625776..b353bc806ca0 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -24,11 +24,36 @@ #define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) int btrfs_test_free_space_cache(void); +int btrfs_test_extent_buffer_operations(void); +int btrfs_test_extent_io(void); +int btrfs_test_inodes(void); +int btrfs_init_test_fs(void); +void btrfs_destroy_test_fs(void); +struct inode *btrfs_new_test_inode(void); #else static inline int btrfs_test_free_space_cache(void) { return 0; } +static inline int btrfs_test_extent_buffer_operations(void) +{ + return 0; +} +static inline int btrfs_init_test_fs(void) +{ + return 0; +} +static inline void btrfs_destroy_test_fs(void) +{ +} +static inline int btrfs_test_extent_io(void) +{ + return 0; +} +static inline int btrfs_test_inodes(void) +{ + return 0; +} #endif #endif diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c new file mode 100644 index 000000000000..cc286ce97d1e --- /dev/null +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/slab.h> +#include "btrfs-tests.h" +#include "../ctree.h" +#include "../extent_io.h" +#include "../disk-io.h" + +static int test_btrfs_split_item(void) +{ + struct btrfs_path *path; + struct btrfs_root *root; + struct extent_buffer *eb; + struct btrfs_item *item; + char *value = "mary had a little lamb"; + char *split1 = "mary had a little"; + char *split2 = " lamb"; + char *split3 = "mary"; + char *split4 = " had a little"; + char buf[32]; + struct btrfs_key key; + u32 value_len = strlen(value); + int ret = 0; + + test_msg("Running btrfs_split_item tests\n"); + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Could not allocate root\n"); + return PTR_ERR(root); + } + + path = btrfs_alloc_path(); + if (!path) { + test_msg("Could not allocate path\n"); + kfree(root); + return -ENOMEM; + } + + path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); + if (!eb) { + test_msg("Could not allocate dummy buffer\n"); + ret = -ENOMEM; + goto out; + } + path->slots[0] = 0; + + key.objectid = 0; + key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = 0; + + setup_items_for_insert(root, path, &key, &value_len, value_len, + value_len + sizeof(struct btrfs_item), 1); + item = btrfs_item_nr(0); + write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0), + value_len); + + key.offset = 3; + + /* + * Passing NULL trans here should be safe because we have plenty of + * space in this leaf to split the item without having to split the + * leaf. + */ + ret = btrfs_split_item(NULL, root, path, &key, 17); + if (ret) { + test_msg("Split item failed %d\n", ret); + goto out; + } + + /* + * Read the first slot, it should have the original key and contain only + * 'mary had a little' + */ + btrfs_item_key_to_cpu(eb, &key, 0); + if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || + key.offset != 0) { + test_msg("Invalid key at slot 0\n"); + ret = -EINVAL; + goto out; + } + + item = btrfs_item_nr(0); + if (btrfs_item_size(eb, item) != strlen(split1)) { + test_msg("Invalid len in the first split\n"); + ret = -EINVAL; + goto out; + } + + read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0), + strlen(split1)); + if (memcmp(buf, split1, strlen(split1))) { + test_msg("Data in the buffer doesn't match what it should " + "in the first split have='%.*s' want '%s'\n", + (int)strlen(split1), buf, split1); + ret = -EINVAL; + goto out; + } + + btrfs_item_key_to_cpu(eb, &key, 1); + if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || + key.offset != 3) { + test_msg("Invalid key at slot 1\n"); + ret = -EINVAL; + goto out; + } + + item = btrfs_item_nr(1); + if (btrfs_item_size(eb, item) != strlen(split2)) { + test_msg("Invalid len in the second split\n"); + ret = -EINVAL; + goto out; + } + + read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1), + strlen(split2)); + if (memcmp(buf, split2, strlen(split2))) { + test_msg("Data in the buffer doesn't match what it should " + "in the second split\n"); + ret = -EINVAL; + goto out; + } + + key.offset = 1; + /* Do it again so we test memmoving the other items in the leaf */ + ret = btrfs_split_item(NULL, root, path, &key, 4); + if (ret) { + test_msg("Second split item failed %d\n", ret); + goto out; + } + + btrfs_item_key_to_cpu(eb, &key, 0); + if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || + key.offset != 0) { + test_msg("Invalid key at slot 0\n"); + ret = -EINVAL; + goto out; + } + + item = btrfs_item_nr(0); + if (btrfs_item_size(eb, item) != strlen(split3)) { + test_msg("Invalid len in the first split\n"); + ret = -EINVAL; + goto out; + } + + read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0), + strlen(split3)); + if (memcmp(buf, split3, strlen(split3))) { + test_msg("Data in the buffer doesn't match what it should " + "in the third split"); + ret = -EINVAL; + goto out; + } + + btrfs_item_key_to_cpu(eb, &key, 1); + if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || + key.offset != 1) { + test_msg("Invalid key at slot 1\n"); + ret = -EINVAL; + goto out; + } + + item = btrfs_item_nr(1); + if (btrfs_item_size(eb, item) != strlen(split4)) { + test_msg("Invalid len in the second split\n"); + ret = -EINVAL; + goto out; + } + + read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1), + strlen(split4)); + if (memcmp(buf, split4, strlen(split4))) { + test_msg("Data in the buffer doesn't match what it should " + "in the fourth split\n"); + ret = -EINVAL; + goto out; + } + + btrfs_item_key_to_cpu(eb, &key, 2); + if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || + key.offset != 3) { + test_msg("Invalid key at slot 2\n"); + ret = -EINVAL; + goto out; + } + + item = btrfs_item_nr(2); + if (btrfs_item_size(eb, item) != strlen(split2)) { + test_msg("Invalid len in the second split\n"); + ret = -EINVAL; + goto out; + } + + read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2), + strlen(split2)); + if (memcmp(buf, split2, strlen(split2))) { + test_msg("Data in the buffer doesn't match what it should " + "in the last chunk\n"); + ret = -EINVAL; + goto out; + } +out: + btrfs_free_path(path); + kfree(root); + return ret; +} + +int btrfs_test_extent_buffer_operations(void) +{ + test_msg("Running extent buffer operation tests"); + return test_btrfs_split_item(); +} diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c new file mode 100644 index 000000000000..7e99c2f98dd0 --- /dev/null +++ b/fs/btrfs/tests/extent-io-tests.c @@ -0,0 +1,276 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/pagemap.h> +#include <linux/sched.h> +#include "btrfs-tests.h" +#include "../extent_io.h" + +#define PROCESS_UNLOCK (1 << 0) +#define PROCESS_RELEASE (1 << 1) +#define PROCESS_TEST_LOCKED (1 << 2) + +static noinline int process_page_range(struct inode *inode, u64 start, u64 end, + unsigned long flags) +{ + int ret; + struct page *pages[16]; + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long nr_pages = end_index - index + 1; + int i; + int count = 0; + int loops = 0; + + while (nr_pages > 0) { + ret = find_get_pages_contig(inode->i_mapping, index, + min_t(unsigned long, nr_pages, + ARRAY_SIZE(pages)), pages); + for (i = 0; i < ret; i++) { + if (flags & PROCESS_TEST_LOCKED && + !PageLocked(pages[i])) + count++; + if (flags & PROCESS_UNLOCK && PageLocked(pages[i])) + unlock_page(pages[i]); + page_cache_release(pages[i]); + if (flags & PROCESS_RELEASE) + page_cache_release(pages[i]); + } + nr_pages -= ret; + index += ret; + cond_resched(); + loops++; + if (loops > 100000) { + printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret); + break; + } + } + return count; +} + +static int test_find_delalloc(void) +{ + struct inode *inode; + struct extent_io_tree tmp; + struct page *page; + struct page *locked_page = NULL; + unsigned long index = 0; + u64 total_dirty = 256 * 1024 * 1024; + u64 max_bytes = 128 * 1024 * 1024; + u64 start, end, test_start; + u64 found; + int ret = -EINVAL; + + inode = btrfs_new_test_inode(); + if (!inode) { + test_msg("Failed to allocate test inode\n"); + return -ENOMEM; + } + + extent_io_tree_init(&tmp, &inode->i_data); + + /* + * First go through and create and mark all of our pages dirty, we pin + * everything to make sure our pages don't get evicted and screw up our + * test. + */ + for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + if (!page) { + test_msg("Failed to allocate test page\n"); + ret = -ENOMEM; + goto out; + } + SetPageDirty(page); + if (index) { + unlock_page(page); + } else { + page_cache_get(page); + locked_page = page; + } + } + + /* Test this scenario + * |--- delalloc ---| + * |--- search ---| + */ + set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); + start = 0; + end = 0; + found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + &end, max_bytes); + if (!found) { + test_msg("Should have found at least one delalloc\n"); + goto out_bits; + } + if (start != 0 || end != 4095) { + test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n", + start, end); + goto out_bits; + } + unlock_extent(&tmp, start, end); + unlock_page(locked_page); + page_cache_release(locked_page); + + /* + * Test this scenario + * + * |--- delalloc ---| + * |--- search ---| + */ + test_start = 64 * 1024 * 1024; + locked_page = find_lock_page(inode->i_mapping, + test_start >> PAGE_CACHE_SHIFT); + if (!locked_page) { + test_msg("Couldn't find the locked page\n"); + goto out_bits; + } + set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); + start = test_start; + end = 0; + found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + &end, max_bytes); + if (!found) { + test_msg("Couldn't find delalloc in our range\n"); + goto out_bits; + } + if (start != test_start || end != max_bytes - 1) { + test_msg("Expected start %Lu end %Lu, got start %Lu, end " + "%Lu\n", test_start, max_bytes - 1, start, end); + goto out_bits; + } + if (process_page_range(inode, start, end, + PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) { + test_msg("There were unlocked pages in the range\n"); + goto out_bits; + } + unlock_extent(&tmp, start, end); + /* locked_page was unlocked above */ + page_cache_release(locked_page); + + /* + * Test this scenario + * |--- delalloc ---| + * |--- search ---| + */ + test_start = max_bytes + 4096; + locked_page = find_lock_page(inode->i_mapping, test_start >> + PAGE_CACHE_SHIFT); + if (!locked_page) { + test_msg("Could'nt find the locked page\n"); + goto out_bits; + } + start = test_start; + end = 0; + found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + &end, max_bytes); + if (found) { + test_msg("Found range when we shouldn't have\n"); + goto out_bits; + } + if (end != (u64)-1) { + test_msg("Did not return the proper end offset\n"); + goto out_bits; + } + + /* + * Test this scenario + * [------- delalloc -------| + * [max_bytes]|-- search--| + * + * We are re-using our test_start from above since it works out well. + */ + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); + start = test_start; + end = 0; + found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + &end, max_bytes); + if (!found) { + test_msg("Didn't find our range\n"); + goto out_bits; + } + if (start != test_start || end != total_dirty - 1) { + test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n", + test_start, total_dirty - 1, start, end); + goto out_bits; + } + if (process_page_range(inode, start, end, + PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) { + test_msg("Pages in range were not all locked\n"); + goto out_bits; + } + unlock_extent(&tmp, start, end); + + /* + * Now to test where we run into a page that is no longer dirty in the + * range we want to find. + */ + page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024)) + >> PAGE_CACHE_SHIFT); + if (!page) { + test_msg("Couldn't find our page\n"); + goto out_bits; + } + ClearPageDirty(page); + page_cache_release(page); + + /* We unlocked it in the previous test */ + lock_page(locked_page); + start = test_start; + end = 0; + /* + * Currently if we fail to find dirty pages in the delalloc range we + * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If + * this changes at any point in the future we will need to fix this + * tests expected behavior. + */ + found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + &end, max_bytes); + if (!found) { + test_msg("Didn't find our range\n"); + goto out_bits; + } + if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) { + test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n", + test_start, test_start + PAGE_CACHE_SIZE - 1, start, + end); + goto out_bits; + } + if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED | + PROCESS_UNLOCK)) { + test_msg("Pages in range were not all locked\n"); + goto out_bits; + } + ret = 0; +out_bits: + clear_extent_bits(&tmp, 0, total_dirty - 1, + (unsigned long)-1, GFP_NOFS); +out: + if (locked_page) + page_cache_release(locked_page); + process_page_range(inode, 0, total_dirty - 1, + PROCESS_UNLOCK | PROCESS_RELEASE); + iput(inode); + return ret; +} + +int btrfs_test_extent_io(void) +{ + test_msg("Running find delalloc tests\n"); + return test_find_delalloc(); +} diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c new file mode 100644 index 000000000000..397d1f99a8eb --- /dev/null +++ b/fs/btrfs/tests/inode-tests.c @@ -0,0 +1,955 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "btrfs-tests.h" +#include "../ctree.h" +#include "../btrfs_inode.h" +#include "../disk-io.h" +#include "../extent_io.h" +#include "../volumes.h" + +static struct btrfs_fs_info *alloc_dummy_fs_info(void) +{ + struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), + GFP_NOFS); + if (!fs_info) + return fs_info; + fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), + GFP_NOFS); + if (!fs_info->fs_devices) { + kfree(fs_info); + return NULL; + } + return fs_info; +} +static void free_dummy_root(struct btrfs_root *root) +{ + if (!root) + return; + if (root->fs_info) { + kfree(root->fs_info->fs_devices); + kfree(root->fs_info); + } + if (root->node) + free_extent_buffer(root->node); + kfree(root); +} + +static void insert_extent(struct btrfs_root *root, u64 start, u64 len, + u64 ram_bytes, u64 offset, u64 disk_bytenr, + u64 disk_len, u32 type, u8 compression, int slot) +{ + struct btrfs_path path; + struct btrfs_file_extent_item *fi; + struct extent_buffer *leaf = root->node; + struct btrfs_key key; + u32 value_len = sizeof(struct btrfs_file_extent_item); + + if (type == BTRFS_FILE_EXTENT_INLINE) + value_len += len; + memset(&path, 0, sizeof(path)); + + path.nodes[0] = leaf; + path.slots[0] = slot; + + key.objectid = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = start; + + setup_items_for_insert(root, &path, &key, &value_len, value_len, + value_len + sizeof(struct btrfs_item), 1); + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, 1); + btrfs_set_file_extent_type(leaf, fi, type); + btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len); + btrfs_set_file_extent_offset(leaf, fi, offset); + btrfs_set_file_extent_num_bytes(leaf, fi, len); + btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes); + btrfs_set_file_extent_compression(leaf, fi, compression); + btrfs_set_file_extent_encryption(leaf, fi, 0); + btrfs_set_file_extent_other_encoding(leaf, fi, 0); +} + +static void insert_inode_item_key(struct btrfs_root *root) +{ + struct btrfs_path path; + struct extent_buffer *leaf = root->node; + struct btrfs_key key; + u32 value_len = 0; + + memset(&path, 0, sizeof(path)); + + path.nodes[0] = leaf; + path.slots[0] = 0; + + key.objectid = BTRFS_INODE_ITEM_KEY; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + setup_items_for_insert(root, &path, &key, &value_len, value_len, + value_len + sizeof(struct btrfs_item), 1); +} + +/* + * Build the most complicated map of extents the earth has ever seen. We want + * this so we can test all of the corner cases of btrfs_get_extent. Here is a + * diagram of how the extents will look though this may not be possible we still + * want to make sure everything acts normally (the last number is not inclusive) + * + * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288] + * [hole ][inline][ hole ][ regular ][regular1 split][ hole ] + * + * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056] + * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ] + * + * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ] + * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent] + * + * [81920-86016] + * [ regular ] + */ +static void setup_file_extents(struct btrfs_root *root) +{ + int slot = 0; + u64 disk_bytenr = 1 * 1024 * 1024; + u64 offset = 0; + + /* First we want a hole */ + insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, + slot); + slot++; + offset += 5; + + /* + * Now we want an inline extent, I don't think this is possible but hey + * why not? Also keep in mind if we have an inline extent it counts as + * the whole first page. If we were to expand it we would have to cow + * and we wouldn't have an inline extent anymore. + */ + insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, + slot); + slot++; + offset = 4096; + + /* Now another hole */ + insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, + slot); + slot++; + offset += 4; + + /* Now for a regular extent */ + insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + disk_bytenr += 4096; + offset += 4095; + + /* + * Now for 3 extents that were split from a hole punch so we test + * offsets properly. + */ + insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, + BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + offset += 4096; + insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG, + 0, slot); + slot++; + offset += 4096; + insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + offset += 8192; + disk_bytenr += 16384; + + /* Now for a unwritten prealloc extent */ + insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + slot++; + offset += 4096; + + /* + * We want to jack up disk_bytenr a little more so the em stuff doesn't + * merge our records. + */ + disk_bytenr += 8192; + + /* + * Now for a partially written prealloc extent, basically the same as + * the hole punch example above. Ram_bytes never changes when you mark + * extents written btw. + */ + insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, + BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + slot++; + offset += 4096; + insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384, + BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + offset += 4096; + insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, + BTRFS_FILE_EXTENT_PREALLOC, 0, slot); + slot++; + offset += 8192; + disk_bytenr += 16384; + + /* Now a normal compressed extent */ + insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + slot++; + offset += 8192; + /* No merges */ + disk_bytenr += 8192; + + /* Now a split compressed extent */ + insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + slot++; + offset += 4096; + insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096, + BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + offset += 4096; + insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); + slot++; + offset += 8192; + disk_bytenr += 8192; + + /* Now extents that have a hole but no hole extent */ + insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_REG, 0, slot); + slot++; + offset += 16384; + disk_bytenr += 4096; + insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, + BTRFS_FILE_EXTENT_REG, 0, slot); +} + +static unsigned long prealloc_only = 0; +static unsigned long compressed_only = 0; +static unsigned long vacancy_only = 0; + +static noinline int test_btrfs_get_extent(void) +{ + struct inode *inode = NULL; + struct btrfs_root *root = NULL; + struct extent_map *em = NULL; + u64 orig_start; + u64 disk_bytenr; + u64 offset; + int ret = -ENOMEM; + + inode = btrfs_new_test_inode(); + if (!inode) { + test_msg("Couldn't allocate inode\n"); + return ret; + } + + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; + BTRFS_I(inode)->location.offset = 0; + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); + goto out; + } + + /* + * We do this since btrfs_get_extent wants to assign em->bdev to + * root->fs_info->fs_devices->latest_bdev. + */ + root->fs_info = alloc_dummy_fs_info(); + if (!root->fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + goto out; + } + + root->node = alloc_dummy_extent_buffer(0, 4096); + if (!root->node) { + test_msg("Couldn't allocate dummy buffer\n"); + goto out; + } + + /* + * We will just free a dummy node if it's ref count is 2 so we need an + * extra ref so our searches don't accidently release our page. + */ + extent_buffer_get(root->node); + btrfs_set_header_nritems(root->node, 0); + btrfs_set_header_level(root->node, 0); + ret = -EINVAL; + + /* First with no extents */ + BTRFS_I(inode)->root = root; + em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0); + if (IS_ERR(em)) { + em = NULL; + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_HOLE) { + test_msg("Expected a hole, got %llu\n", em->block_start); + goto out; + } + if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + test_msg("Vacancy flag wasn't set properly\n"); + goto out; + } + free_extent_map(em); + btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); + + /* + * All of the magic numbers are based on the mapping setup in + * setup_file_extents, so if you change anything there you need to + * update the comment and update the expected values below. + */ + setup_file_extents(root); + + em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_HOLE) { + test_msg("Expected a hole, got %llu\n", em->block_start); + goto out; + } + if (em->start != 0 || em->len != 5) { + test_msg("Unexpected extent wanted start 0 len 5, got start " + "%llu len %llu\n", em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_INLINE) { + test_msg("Expected an inline, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4091) { + test_msg("Unexpected extent wanted start %llu len 1, got start " + "%llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + /* + * We don't test anything else for inline since it doesn't get set + * unless we have a page for it to write into. Maybe we should change + * this? + */ + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_HOLE) { + test_msg("Expected a hole, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4) { + test_msg("Unexpected extent wanted start %llu len 4, got start " + "%llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* Regular extent */ + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4095) { + test_msg("Unexpected extent wanted start %llu len 4095, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* The next 3 are split extents */ + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + disk_bytenr = em->block_start; + orig_start = em->start; + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_HOLE) { + test_msg("Expected a hole, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 8192) { + test_msg("Unexpected extent wanted start %llu len 8192, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != orig_start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", + orig_start, em->orig_start); + goto out; + } + disk_bytenr += (em->start - orig_start); + if (em->block_start != disk_bytenr) { + test_msg("Wrong block start, want %llu, have %llu\n", + disk_bytenr, em->block_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* Prealloc extent */ + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != prealloc_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + prealloc_only, em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* The next 3 are a half written prealloc extent */ + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != prealloc_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + prealloc_only, em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + disk_bytenr = em->block_start; + orig_start = em->start; + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_HOLE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != orig_start) { + test_msg("Unexpected orig offset, wanted %llu, have %llu\n", + orig_start, em->orig_start); + goto out; + } + if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) { + test_msg("Unexpected block start, wanted %llu, have %llu\n", + disk_bytenr + (em->start - em->orig_start), + em->block_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 8192) { + test_msg("Unexpected extent wanted start %llu len 8192, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != prealloc_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + prealloc_only, em->flags); + goto out; + } + if (em->orig_start != orig_start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start, + em->orig_start); + goto out; + } + if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) { + test_msg("Unexpected block start, wanted %llu, have %llu\n", + disk_bytenr + (em->start - em->orig_start), + em->block_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* Now for the compressed extent */ + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 8192) { + test_msg("Unexpected extent wanted start %llu len 8192, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != compressed_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + compressed_only, em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", + em->start, em->orig_start); + goto out; + } + if (em->compress_type != BTRFS_COMPRESS_ZLIB) { + test_msg("Unexpected compress type, wanted %d, got %d\n", + BTRFS_COMPRESS_ZLIB, em->compress_type); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* Split compressed extent */ + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != compressed_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + compressed_only, em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", + em->start, em->orig_start); + goto out; + } + if (em->compress_type != BTRFS_COMPRESS_ZLIB) { + test_msg("Unexpected compress type, wanted %d, got %d\n", + BTRFS_COMPRESS_ZLIB, em->compress_type); + goto out; + } + disk_bytenr = em->block_start; + orig_start = em->start; + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != disk_bytenr) { + test_msg("Block start does not match, want %llu got %llu\n", + disk_bytenr, em->block_start); + goto out; + } + if (em->start != offset || em->len != 8192) { + test_msg("Unexpected extent wanted start %llu len 8192, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != compressed_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + compressed_only, em->flags); + goto out; + } + if (em->orig_start != orig_start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", + em->start, orig_start); + goto out; + } + if (em->compress_type != BTRFS_COMPRESS_ZLIB) { + test_msg("Unexpected compress type, wanted %d, got %d\n", + BTRFS_COMPRESS_ZLIB, em->compress_type); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + /* A hole between regular extents but no hole extent */ + em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_HOLE) { + test_msg("Expected a hole extent, got %llu\n", em->block_start); + goto out; + } + /* + * Currently we just return a length that we requested rather than the + * length of the actual hole, if this changes we'll have to change this + * test. + */ + if (em->start != offset || em->len != 12288) { + test_msg("Unexpected extent wanted start %llu len 12288, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != vacancy_only) { + test_msg("Unexpected flags set, want %lu have %lu\n", + vacancy_only, em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + offset = em->start + em->len; + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != offset || em->len != 4096) { + test_msg("Unexpected extent wanted start %llu len 4096, got " + "start %llu len %llu\n", offset, em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); + goto out; + } + if (em->orig_start != em->start) { + test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, + em->orig_start); + goto out; + } + ret = 0; +out: + if (!IS_ERR(em)) + free_extent_map(em); + iput(inode); + free_dummy_root(root); + return ret; +} + +static int test_hole_first(void) +{ + struct inode *inode = NULL; + struct btrfs_root *root = NULL; + struct extent_map *em = NULL; + int ret = -ENOMEM; + + inode = btrfs_new_test_inode(); + if (!inode) { + test_msg("Couldn't allocate inode\n"); + return ret; + } + + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; + BTRFS_I(inode)->location.offset = 0; + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); + goto out; + } + + root->fs_info = alloc_dummy_fs_info(); + if (!root->fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + goto out; + } + + root->node = alloc_dummy_extent_buffer(0, 4096); + if (!root->node) { + test_msg("Couldn't allocate dummy buffer\n"); + goto out; + } + + extent_buffer_get(root->node); + btrfs_set_header_nritems(root->node, 0); + btrfs_set_header_level(root->node, 0); + BTRFS_I(inode)->root = root; + ret = -EINVAL; + + /* + * Need a blank inode item here just so we don't confuse + * btrfs_get_extent. + */ + insert_inode_item_key(root); + insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096, + BTRFS_FILE_EXTENT_REG, 0, 1); + em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != EXTENT_MAP_HOLE) { + test_msg("Expected a hole, got %llu\n", em->block_start); + goto out; + } + if (em->start != 0 || em->len != 4096) { + test_msg("Unexpected extent wanted start 0 len 4096, got start " + "%llu len %llu\n", em->start, em->len); + goto out; + } + if (em->flags != vacancy_only) { + test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only, + em->flags); + goto out; + } + free_extent_map(em); + + em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0); + if (IS_ERR(em)) { + test_msg("Got an error when we shouldn't have\n"); + goto out; + } + if (em->block_start != 4096) { + test_msg("Expected a real extent, got %llu\n", em->block_start); + goto out; + } + if (em->start != 4096 || em->len != 4096) { + test_msg("Unexpected extent wanted start 4096 len 4096, got " + "start %llu len %llu\n", em->start, em->len); + goto out; + } + if (em->flags != 0) { + test_msg("Unexpected flags set, wanted 0 got %lu\n", + em->flags); + goto out; + } + ret = 0; +out: + if (!IS_ERR(em)) + free_extent_map(em); + iput(inode); + free_dummy_root(root); + return ret; +} + +int btrfs_test_inodes(void) +{ + int ret; + + set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only); + set_bit(EXTENT_FLAG_VACANCY, &vacancy_only); + set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); + + test_msg("Running btrfs_get_extent tests\n"); + ret = test_btrfs_get_extent(); + if (ret) + return ret; + test_msg("Running hole first btrfs_get_extent test\n"); + return test_hole_first(); +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8c81bdc1ef9b..c6a872a8a468 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { __TRANS_JOIN_NOLOCK), }; -static void put_transaction(struct btrfs_transaction *transaction) +void btrfs_put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { @@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root) wait_event(root->fs_info->transaction_wait, cur_trans->state >= TRANS_STATE_UNBLOCKED || cur_trans->aborted); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); } @@ -353,6 +353,17 @@ static int may_wait_transaction(struct btrfs_root *root, int type) return 0; } +static inline bool need_reserve_reloc_root(struct btrfs_root *root) +{ + if (!root->fs_info->reloc_ctl || + !root->ref_cows || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || + root->reloc_root) + return false; + + return true; +} + static struct btrfs_trans_handle * start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, enum btrfs_reserve_flush_enum flush) @@ -360,8 +371,9 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; u64 num_bytes = 0; - int ret; u64 qgroup_reserved = 0; + bool reloc_reserved = false; + int ret; if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); @@ -390,6 +402,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, } num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + /* + * Do the reservation for the relocation root creation + */ + if (unlikely(need_reserve_reloc_root(root))) { + num_bytes += root->nodesize; + reloc_reserved = true; + } + ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, num_bytes, flush); @@ -451,6 +471,7 @@ again: h->delayed_ref_elem.seq = 0; h->type = type; h->allocating_chunk = false; + h->reloc_reserved = false; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); @@ -466,6 +487,7 @@ again: h->transid, num_bytes, 1); h->block_rsv = &root->fs_info->trans_block_rsv; h->bytes_reserved = num_bytes; + h->reloc_reserved = reloc_reserved; } h->qgroup_reserved = qgroup_reserved; @@ -610,7 +632,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) } wait_for_commit(root, cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); out: return ret; } @@ -735,7 +757,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, smp_mb(); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); if (current->journal_info == trans) current->journal_info = NULL; @@ -744,8 +766,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_run_delayed_iputs(root); if (trans->aborted || - test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) + test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { + wake_up_process(info->transaction_kthread); err = -EIO; + } assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -948,16 +972,19 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, return ret; ret = btrfs_run_dev_stats(trans, root->fs_info); - WARN_ON(ret); + if (ret) + return ret; ret = btrfs_run_dev_replace(trans, root->fs_info); - WARN_ON(ret); - + if (ret) + return ret; ret = btrfs_run_qgroups(trans, root->fs_info); - BUG_ON(ret); + if (ret) + return ret; /* run_qgroups might have added some more refs */ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - BUG_ON(ret); + if (ret) + return ret; while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; @@ -1453,7 +1480,7 @@ static void do_async_commit(struct work_struct *work) * We've got freeze protection passed with the transaction. * Tell lockdep about it. */ - if (ac->newtrans->type < TRANS_JOIN_NOLOCK) + if (ac->newtrans->type & __TRANS_FREEZABLE) rwsem_acquire_read( &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 0, 1, _THIS_IP_); @@ -1494,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * Tell lockdep we've released the freeze rwsem, since the * async commit thread will be the one to unlock it. */ - if (trans->type < TRANS_JOIN_NOLOCK) + if (ac->newtrans->type & __TRANS_FREEZABLE) rwsem_release( &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1, _THIS_IP_); @@ -1510,7 +1537,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, if (current->journal_info == trans) current->journal_info = NULL; - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); return 0; } @@ -1552,8 +1579,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, root->fs_info->running_transaction = NULL; spin_unlock(&root->fs_info->trans_lock); - put_transaction(cur_trans); - put_transaction(cur_trans); + if (trans->type & __TRANS_FREEZABLE) + sb_end_intwrite(root->fs_info->sb); + btrfs_put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); trace_btrfs_transaction_commit(root); @@ -1571,15 +1600,19 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, int ret; ret = btrfs_run_delayed_items(trans, root); - if (ret) - return ret; - /* * running the delayed items may have added new refs. account * them now so that they hinder processing of more delayed refs * as little as possible. */ - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); + if (ret) { + btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); + return ret; + } + + ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); + if (ret) + return ret; /* * rename don't use btrfs_join_transaction, so, once we @@ -1596,14 +1629,14 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) - return btrfs_start_all_delalloc_inodes(fs_info, 1); + return btrfs_start_delalloc_roots(fs_info, 1); return 0; } static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) { if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) - btrfs_wait_all_ordered_extents(fs_info); + btrfs_wait_ordered_roots(fs_info, -1); } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -1669,7 +1702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); return ret; } @@ -1686,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, prev_trans); - put_transaction(prev_trans); + btrfs_put_transaction(prev_trans); } else { spin_unlock(&root->fs_info->trans_lock); } @@ -1885,8 +1918,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_del_init(&cur_trans->list); spin_unlock(&root->fs_info->trans_lock); - put_transaction(cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 5c2af8491621..7657d115067d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -92,6 +92,7 @@ struct btrfs_trans_handle { short aborted; short adding_csums; bool allocating_chunk; + bool reloc_reserved; unsigned int type; /* * this root is only needed to validate that the root passed to @@ -166,4 +167,5 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); +void btrfs_put_transaction(struct btrfs_transaction *transaction); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 94e05c1f118a..76928ca97741 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -37,7 +37,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int ret = 0; int wret; int level; - int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; u64 min_trans = 0; @@ -50,7 +49,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } - if (root->ref_cows == 0 && !is_extent) + if (root->ref_cows == 0) goto out; if (btrfs_test_opt(root, SSD)) @@ -85,7 +84,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, path->keep_locks = 1; - ret = btrfs_search_forward(root, &key, NULL, path, min_trans); + ret = btrfs_search_forward(root, &key, path, min_trans); if (ret < 0) goto out; if (ret > 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 79f057c0619a..9f7fc51ca334 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -26,7 +26,6 @@ #include "locking.h" #include "print-tree.h" #include "backref.h" -#include "compat.h" #include "tree-log.h" #include "hash.h" @@ -936,7 +935,7 @@ again: parent_objectid, victim_name, victim_name_len)) { - btrfs_inc_nlink(inode); + inc_nlink(inode); btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, dir, @@ -1006,7 +1005,7 @@ again: victim_parent = read_one_inode(root, parent_objectid); if (victim_parent) { - btrfs_inc_nlink(inode); + inc_nlink(inode); btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, @@ -1113,11 +1112,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir; - struct inode *inode; + struct inode *dir = NULL; + struct inode *inode = NULL; unsigned long ref_ptr; unsigned long ref_end; - char *name; + char *name = NULL; int namelen; int ret; int search_done = 0; @@ -1150,13 +1149,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * care of the rest */ dir = read_one_inode(root, parent_objectid); - if (!dir) - return -ENOENT; + if (!dir) { + ret = -ENOENT; + goto out; + } inode = read_one_inode(root, inode_objectid); if (!inode) { - iput(dir); - return -EIO; + ret = -EIO; + goto out; } while (ref_ptr < ref_end) { @@ -1169,14 +1170,16 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, */ if (!dir) dir = read_one_inode(root, parent_objectid); - if (!dir) - return -ENOENT; + if (!dir) { + ret = -ENOENT; + goto out; + } } else { ret = ref_get_fields(eb, ref_ptr, &namelen, &name, &ref_index); } if (ret) - return ret; + goto out; /* if we already have a perfect match, we're done */ if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), @@ -1196,12 +1199,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, parent_objectid, ref_index, name, namelen, &search_done); - if (ret == 1) { - ret = 0; + if (ret) { + if (ret == 1) + ret = 0; goto out; } - if (ret) - goto out; } /* insert our name */ @@ -1215,6 +1217,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; kfree(name); + name = NULL; if (log_ref_ver) { iput(dir); dir = NULL; @@ -1225,6 +1228,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ret = overwrite_item(trans, root, path, eb, slot, key); out: btrfs_release_path(path); + kfree(name); iput(dir); iput(inode); return ret; @@ -1307,6 +1311,7 @@ static int count_inode_refs(struct btrfs_root *root, break; path->slots[0]--; } +process_slot: btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid != ino || @@ -1327,6 +1332,10 @@ static int count_inode_refs(struct btrfs_root *root, if (key.offset == 0) break; + if (path->slots[0] > 0) { + path->slots[0]--; + goto process_slot; + } key.offset--; btrfs_release_path(path); } @@ -1480,7 +1489,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, if (!inode->i_nlink) set_nlink(inode, 1); else - btrfs_inc_nlink(inode); + inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; @@ -1823,7 +1832,7 @@ again: dir_key->offset, name, name_len, 0); } - if (IS_ERR_OR_NULL(log_di)) { + if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); @@ -1841,7 +1850,7 @@ again: goto out; } - btrfs_inc_nlink(inode); + inc_nlink(inode); ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) @@ -1860,6 +1869,9 @@ again: goto again; ret = 0; goto out; + } else if (IS_ERR(log_di)) { + kfree(name); + return PTR_ERR(log_di); } btrfs_release_path(log_path); kfree(name); @@ -2118,8 +2130,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; - if (btrfs_header_level(cur) != *level) - WARN_ON(1); + WARN_ON(btrfs_header_level(cur) != *level); if (path->slots[*level] >= btrfs_header_nritems(cur)) @@ -2151,11 +2162,13 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return ret; } - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - clean_tree_block(trans, root, next); - btrfs_wait_tree_block_writeback(next); - btrfs_tree_unlock(next); + if (trans) { + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); @@ -2227,11 +2240,13 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[*level]; - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - clean_tree_block(trans, root, next); - btrfs_wait_tree_block_writeback(next); - btrfs_tree_unlock(next); + if (trans) { + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); ret = btrfs_free_and_pin_reserved_extent(root, @@ -2301,11 +2316,13 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[orig_level]; - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - clean_tree_block(trans, log, next); - btrfs_wait_tree_block_writeback(next); - btrfs_tree_unlock(next); + if (trans) { + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + clean_tree_block(trans, log, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + } WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); @@ -2571,9 +2588,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * the running transaction open, so a full commit can't hop * in and cause problems either. */ - btrfs_scrub_pause_super(root); ret = write_ctree_super(trans, root->fs_info->tree_root, 1); - btrfs_scrub_continue_super(root); if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_wake_log_root; @@ -2608,13 +2623,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans, .process_func = process_one_buffer }; - if (trans) { - ret = walk_log_tree(trans, log, &wc); - - /* I don't think this can happen but just in case */ - if (ret) - btrfs_abort_transaction(trans, log, ret); - } + ret = walk_log_tree(trans, log, &wc); + /* I don't think this can happen but just in case */ + if (ret) + btrfs_abort_transaction(trans, log, ret); while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, @@ -2867,7 +2879,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, u64 min_offset, u64 *last_offset_ret) { struct btrfs_key min_key; - struct btrfs_key max_key; struct btrfs_root *log = root->log_root; struct extent_buffer *src; int err = 0; @@ -2879,9 +2890,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); log = root->log_root; - max_key.objectid = ino; - max_key.offset = (u64)-1; - max_key.type = key_type; min_key.objectid = ino; min_key.type = key_type; @@ -2889,8 +2897,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->keep_locks = 1; - ret = btrfs_search_forward(root, &min_key, &max_key, - path, trans->transid); + ret = btrfs_search_forward(root, &min_key, path, trans->transid); /* * we didn't find anything from this transaction, see if there @@ -2943,10 +2950,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, /* find the first key from this transaction again */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); - if (ret != 0) { - WARN_ON(1); + if (WARN_ON(ret != 0)) goto done; - } /* * we have a block from this transaction, log every item in it @@ -3172,11 +3177,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_inode_item *inode_item; - struct btrfs_key key; int ret; - memcpy(&key, &BTRFS_I(inode)->location, sizeof(key)); - ret = btrfs_insert_empty_item(trans, log, path, &key, + ret = btrfs_insert_empty_item(trans, log, path, + &BTRFS_I(inode)->location, sizeof(*inode_item)); if (ret && ret != -EEXIST) return ret; @@ -3375,7 +3379,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_set_token_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG, &token); - if (em->block_start == 0) + if (em->block_start == EXTENT_MAP_HOLE) skip_csum = true; } @@ -3417,11 +3421,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (skip_csum) return 0; - if (em->compress_type) { - csum_offset = 0; - csum_len = block_len; - } - /* * First check and see if our csums are on our outstanding ordered * extents. @@ -3505,8 +3504,13 @@ unlocked: if (!mod_len || ret) return ret; - csum_offset = mod_start - em->start; - csum_len = mod_len; + if (em->compress_type) { + csum_offset = 0; + csum_len = block_len; + } else { + csum_offset = mod_start - em->start; + csum_len = mod_len; + } /* block start is already adjusted for the file extent offset. */ ret = btrfs_lookup_csums_range(log->fs_info->csum_root, @@ -3693,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags)) { + &BTRFS_I(inode)->runtime_flags) || + inode_only == LOG_INODE_EXISTS) { if (inode_only == LOG_INODE_ALL) fast_search = true; max_key.type = BTRFS_XATTR_ITEM_KEY; @@ -3719,7 +3724,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, while (1) { ins_nr = 0; - ret = btrfs_search_forward(root, &min_key, &max_key, + ret = btrfs_search_forward(root, &min_key, path, trans->transid); if (ret != 0) break; @@ -3769,14 +3774,14 @@ next_slot: } btrfs_release_path(path); - if (min_key.offset < (u64)-1) + if (min_key.offset < (u64)-1) { min_key.offset++; - else if (min_key.type < (u8)-1) + } else if (min_key.type < max_key.type) { min_key.type++; - else if (min_key.objectid < (u64)-1) - min_key.objectid++; - else + min_key.offset = 0; + } else { break; + } } if (ins_nr) { ret = copy_items(trans, inode, dst_path, src, ins_start_slot, @@ -3797,7 +3802,7 @@ log_extents: err = ret; goto out_unlock; } - } else { + } else if (inode_only == LOG_INODE_ALL) { struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index dd0dea3766f7..fbda90004fe9 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -260,7 +260,6 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, { struct btrfs_root *root = fs_info->uuid_root; struct btrfs_key key; - struct btrfs_key max_key; struct btrfs_path *path; int ret = 0; struct extent_buffer *leaf; @@ -277,13 +276,10 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, key.objectid = 0; key.type = 0; key.offset = 0; - max_key.objectid = (u64)-1; - max_key.type = (u8)-1; - max_key.offset = (u64)-1; again_search_slot: path->keep_locks = 1; - ret = btrfs_search_forward(root, &key, &max_key, path, 0); + ret = btrfs_search_forward(root, &key, path, 0); if (ret) { if (ret > 0) ret = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 043b215769c2..92303f42baaa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -28,7 +28,6 @@ #include <linux/raid/pq.h> #include <linux/semaphore.h> #include <asm/div64.h> -#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -666,7 +665,8 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->bdev) fs_devices->open_devices--; - if (device->writeable && !device->is_tgtdev_for_dev_replace) { + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { list_del_init(&device->dev_alloc_list); fs_devices->rw_devices--; } @@ -2041,6 +2041,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; + device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); if (seeding_dev) { @@ -2208,6 +2209,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; + device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); @@ -2550,8 +2552,7 @@ again: failed = 0; retried = true; goto again; - } else if (failed && retried) { - WARN_ON(1); + } else if (WARN_ON(failed && retried)) { ret = -ENOSPC; } error: @@ -3423,6 +3424,9 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) { + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + mutex_lock(&fs_info->balance_mutex); if (!fs_info->balance_ctl) { mutex_unlock(&fs_info->balance_mutex); @@ -3488,7 +3492,7 @@ static int btrfs_uuid_scan_kthread(void *data) path->keep_locks = 1; while (1) { - ret = btrfs_search_forward(root, &key, &max_key, path, 0); + ret = btrfs_search_forward(root, &key, path, 0); if (ret) { if (ret > 0) ret = 0; @@ -4488,6 +4492,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); + free_extent_map(em); return 1; } @@ -4668,6 +4673,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " "found %Lu-%Lu\n", logical, em->start, em->start + em->len); + free_extent_map(em); return -EINVAL; } @@ -4895,7 +4901,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_stripes = map->num_stripes; max_errors = nr_parity_stripes(map); - raid_map = kmalloc(sizeof(u64) * num_stripes, + raid_map = kmalloc_array(num_stripes, sizeof(u64), GFP_NOFS); if (!raid_map) { ret = -ENOMEM; @@ -5388,17 +5394,15 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, { struct bio_vec *prev; struct request_queue *q = bdev_get_queue(bdev); - unsigned short max_sectors = queue_max_sectors(q); + unsigned int max_sectors = queue_max_sectors(q); struct bvec_merge_data bvm = { .bi_bdev = bdev, .bi_sector = sector, .bi_rw = bio->bi_rw, }; - if (bio->bi_vcnt == 0) { - WARN_ON(1); + if (WARN_ON(bio->bi_vcnt == 0)) return 1; - } prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (bio_sectors(bio) > max_sectors) @@ -5631,10 +5635,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, struct btrfs_device *dev; u64 tmp; - if (!devid && !fs_info) { - WARN_ON(1); + if (WARN_ON(!devid && !fs_info)) return ERR_PTR(-EINVAL); - } dev = __alloc_device(); if (IS_ERR(dev)) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index b72f540c8b29..8b3cd142b373 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -43,9 +43,8 @@ struct btrfs_device { /* WRITE_SYNC bios */ struct btrfs_pending_bios pending_sync_bios; - int running_pending; u64 generation; - + int running_pending; int writeable; int in_fs_metadata; int missing; @@ -53,11 +52,11 @@ struct btrfs_device { int is_tgtdev_for_dev_replace; spinlock_t io_lock; + /* the mode sent to blkdev_get */ + fmode_t mode; struct block_device *bdev; - /* the mode sent to blkdev_get */ - fmode_t mode; struct rcu_string *name; @@ -78,16 +77,21 @@ struct btrfs_device { /* optimal io width for this device */ u32 io_width; + /* type and info about this device */ + u64 type; /* minimal io size for this device */ u32 sector_size; - /* type and info about this device */ - u64 type; /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_UUID_SIZE]; + /* for sending down flush barriers */ + int nobarriers; + struct bio *flush_bio; + struct completion flush_wait; + /* per-device scrub information */ struct scrub_ctx *scrub_device; @@ -103,10 +107,6 @@ struct btrfs_device { struct radix_tree_root reada_zones; struct radix_tree_root reada_extents; - /* for sending down flush barriers */ - struct bio *flush_bio; - struct completion flush_wait; - int nobarriers; /* disk I/O failure stats. For detailed description refer to * enum btrfs_dev_stat_values in ioctl.h */ @@ -132,7 +132,9 @@ struct btrfs_fs_devices { /* all of the devices in the FS, protected by a mutex * so we can safely walk it to write out the supers without - * worrying about add/remove by the multi-device code + * worrying about add/remove by the multi-device code. + * Scrubbing super can kick off supers writing by holding + * this mutex lock. */ struct mutex device_list_mutex; struct list_head devices; diff --git a/fs/char_dev.c b/fs/char_dev.c index 94b5f60076da..f77f7702fabe 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -576,7 +576,8 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data) void __init chrdev_init(void) { cdev_map = kobj_map_init(base_probe, &chrdevs_lock); - bdi_init(&directly_mappable_cdev_bdi); + if (bdi_init(&directly_mappable_cdev_bdi)) + panic("Failed to init directly mappable cdev bdi"); } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index fc6f4f3a1a9d..4934347321d3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -548,7 +548,13 @@ static int CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) { int rc; - unsigned int offset = CIFS_SESS_KEY_SIZE + 8; + struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) + (ses->auth_key.response + CIFS_SESS_KEY_SIZE); + unsigned int hash_len; + + /* The MD5 hash starts at challenge_key.key */ + hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + + offsetof(struct ntlmv2_resp, challenge.key[0])); if (!ses->server->secmech.sdeschmacmd5) { cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); @@ -556,7 +562,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) } rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, - ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", __func__); @@ -570,20 +576,21 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) } if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) - memcpy(ses->auth_key.response + offset, - ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ntlmv2->challenge.key, + ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else - memcpy(ses->auth_key.response + offset, - ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ntlmv2->challenge.key, + ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, - ses->auth_key.response + offset, ses->auth_key.len - offset); + ntlmv2->challenge.key, hash_len); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } + /* Note that the MD5 digest over writes anon.challenge_key.key */ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, - ses->auth_key.response + CIFS_SESS_KEY_SIZE); + ntlmv2->ntlmv2_hash); if (rc) cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); @@ -627,7 +634,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) int rc; int baselen; unsigned int tilen; - struct ntlmv2_resp *buf; + struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; unsigned char *tiblob = NULL; /* target info blob */ @@ -660,13 +667,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } ses->auth_key.len += baselen; - buf = (struct ntlmv2_resp *) + ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); - buf->blob_signature = cpu_to_le32(0x00000101); - buf->reserved = 0; - buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); - buf->reserved2 = 0; + ntlmv2->blob_signature = cpu_to_le32(0x00000101); + ntlmv2->reserved = 0; + /* Must be within 5 minutes of the server */ + ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); + ntlmv2->reserved2 = 0; memcpy(ses->auth_key.response + baselen, tiblob, tilen); @@ -706,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, - ses->auth_key.response + CIFS_SESS_KEY_SIZE, + ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 26b1c1dc93f6..d9ea7ada1378 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -261,7 +261,7 @@ struct smb_version_operations { /* query path data from the server */ int (*query_path_info)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, - FILE_ALL_INFO *, bool *); + FILE_ALL_INFO *, bool *, bool *); /* query file data from the server */ int (*query_file_info)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, FILE_ALL_INFO *); @@ -381,6 +381,9 @@ struct smb_version_operations { char * (*create_lease_buf)(u8 *, u8); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *, unsigned int *); + int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, + struct cifsFileInfo *target_file, u64 src_off, u64 len, + u64 dest_off); }; struct smb_version_values { @@ -855,6 +858,9 @@ struct cifs_tcon { __le64 vol_create_time; __u32 ss_flags; /* sector size flags */ __u32 perf_sector_size; /* best sector size for perf */ + __u32 max_chunks; + __u32 max_bytes_chunk; + __u32 max_bytes_copy; #endif /* CONFIG_CIFS_SMB2 */ #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 9e5ee34de986..33df36ef9d52 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -697,7 +697,13 @@ struct ntlmssp2_name { } __attribute__((packed)); struct ntlmv2_resp { - char ntlmv2_hash[CIFS_ENCPWD_SIZE]; + union { + char ntlmv2_hash[CIFS_ENCPWD_SIZE]; + struct { + __u8 reserved[8]; + __u8 key[CIFS_SERVER_CHALLENGE_SIZE]; + } __attribute__((packed)) challenge; + } __attribute__((packed)); __le32 blob_signature; __u32 reserved; __le64 time; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 93b29474714a..124aa0230c1b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3369,11 +3369,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return 0; } cifs_acl->version = cpu_to_le16(1); - if (acl_type == ACL_TYPE_ACCESS) + if (acl_type == ACL_TYPE_ACCESS) { cifs_acl->access_entry_count = cpu_to_le16(count); - else if (acl_type == ACL_TYPE_DEFAULT) + cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF); + } else if (acl_type == ACL_TYPE_DEFAULT) { cifs_acl->default_entry_count = cpu_to_le16(count); - else { + cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF); + } else { cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5384c2a640ca..11ff5f116b20 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -756,7 +756,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ } else if (rc != -EACCES) { - cifs_dbg(VFS, "Unexpected lookup error %d\n", rc); + cifs_dbg(FYI, "Unexpected lookup error %d\n", rc); /* We special case check for Access Denied - since that is a common return code */ } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7ddddf2e2504..5a5a87240fe2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3663,6 +3663,27 @@ void cifs_oplock_break(struct work_struct *work) } } +/* + * The presence of cifs_direct_io() in the address space ops vector + * allowes open() O_DIRECT flags which would have failed otherwise. + * + * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests + * so this method should never be called. + * + * Direct IO is not yet supported in the cached mode. + */ +static ssize_t +cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t pos, unsigned long nr_segs) +{ + /* + * FIXME + * Eventually need to support direct IO for non forcedirectio mounts + */ + return -EINVAL; +} + + const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, @@ -3672,6 +3693,7 @@ const struct address_space_operations cifs_addr_ops = { .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = cifs_release_page, + .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, }; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 867b7cdc794a..36f9ebb93ceb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -542,7 +542,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, - struct cifs_sb_info *cifs_sb, bool adjust_tz) + struct cifs_sb_info *cifs_sb, bool adjust_tz, + bool symlink) { struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); @@ -569,7 +570,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); - if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + + if (symlink) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; + } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; /* @@ -578,10 +583,6 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, */ if (!tcon->unix_ext) fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; - } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { - fattr->cf_mode = S_IFLNK; - fattr->cf_dtype = DT_LNK; - fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; @@ -626,7 +627,8 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); + cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false, + false); break; case -EREMOTE: cifs_create_dfs_fattr(&fattr, inode->i_sb); @@ -673,6 +675,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, bool adjust_tz = false; struct cifs_fattr fattr; struct cifs_search_info *srchinf = NULL; + bool symlink = false; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -702,12 +705,12 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } data = (FILE_ALL_INFO *)buf; rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, - data, &adjust_tz); + data, &adjust_tz, &symlink); } if (!rc) { - cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb, - adjust_tz); + cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz, + symlink); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index ba54bf6ab116..409b45eefe70 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -22,12 +22,120 @@ */ #include <linux/fs.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/btrfs.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" +static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, + unsigned long srcfd, u64 off, u64 len, u64 destoff) +{ + int rc; + struct cifsFileInfo *smb_file_target = dst_file->private_data; + struct inode *target_inode = file_inode(dst_file); + struct cifs_tcon *target_tcon; + struct fd src_file; + struct cifsFileInfo *smb_file_src; + struct inode *src_inode; + struct cifs_tcon *src_tcon; + + cifs_dbg(FYI, "ioctl clone range\n"); + /* the destination must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) { + cifs_dbg(FYI, "file target not open for write\n"); + return -EINVAL; + } + + /* check if target volume is readonly and take reference */ + rc = mnt_want_write_file(dst_file); + if (rc) { + cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc); + return rc; + } + + src_file = fdget(srcfd); + if (!src_file.file) { + rc = -EBADF; + goto out_drop_write; + } + + if ((!src_file.file->private_data) || (!dst_file->private_data)) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out_fput; + } + + rc = -EXDEV; + smb_file_target = dst_file->private_data; + smb_file_src = src_file.file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); + target_tcon = tlink_tcon(smb_file_target->tlink); + + /* check if source and target are on same tree connection */ + if (src_tcon != target_tcon) { + cifs_dbg(VFS, "file copy src and target on different volume\n"); + goto out_fput; + } + + src_inode = src_file.file->f_dentry->d_inode; + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + + /* so we do not deadlock racing two ioctls on same files */ + if (target_inode < src_inode) { + mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD); + } + + /* determine range to clone */ + rc = -EINVAL; + if (off + len > src_inode->i_size || off + len < off) + goto out_unlock; + if (len == 0) + len = src_inode->i_size - off; + + cifs_dbg(FYI, "about to flush pages\n"); + /* should we flush first and last page first */ + truncate_inode_pages_range(&target_inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len)-1); + + if (target_tcon->ses->server->ops->clone_range) + rc = target_tcon->ses->server->ops->clone_range(xid, + smb_file_src, smb_file_target, off, len, destoff); + + /* force revalidate of size and timestamps of target file now + that target is updated on the server */ + CIFS_I(target_inode)->time = 0; +out_unlock: + /* although unlocking in the reverse order from locking is not + strictly necessary here it is a little cleaner to be consistent */ + if (target_inode < src_inode) { + mutex_unlock(&src_inode->i_mutex); + mutex_unlock(&target_inode->i_mutex); + } else { + mutex_unlock(&target_inode->i_mutex); + mutex_unlock(&src_inode->i_mutex); + } +out_fput: + fdput(src_file); +out_drop_write: + mnt_drop_write_file(dst_file); + return rc; +} + long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = file_inode(filep); @@ -105,6 +213,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) cifs_dbg(FYI, "set compress flag rc %d\n", rc); } break; + case BTRFS_IOC_CLONE: + rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 651a5279607b..049884552e76 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -51,7 +51,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRnoaccess, -EACCES}, {ERRbadfid, -EBADF}, {ERRbadmcb, -EIO}, - {ERRnomem, -ENOMEM}, + {ERRnomem, -EREMOTEIO}, {ERRbadmem, -EFAULT}, {ERRbadenv, -EFAULT}, {ERRbadformat, -EINVAL}, diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 53a75f3d0179..5940ecabbe6a 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -134,22 +134,6 @@ out: dput(dentry); } -/* - * Is it possible that this directory might turn out to be a DFS referral - * once we go to try and use it? - */ -static bool -cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb) -{ -#ifdef CONFIG_CIFS_DFS_UPCALL - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - - if (tcon->Flags & SMB_SHARE_IS_IN_DFS) - return true; -#endif - return false; -} - static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { @@ -159,27 +143,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; - /* - * Windows CIFS servers generally make DFS referrals look - * like directories in FIND_* responses with the reparse - * attribute flag also set (since DFS junctions are - * reparse points). We must revalidate at least these - * directory inodes before trying to use them (if - * they are DFS we will get PATH_NOT_COVERED back - * when queried directly and can then try to connect - * to the DFS target) - */ - if (cifs_dfs_is_possible(cifs_sb) && - (fattr->cf_cifsattrs & ATTR_REPARSE)) - fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; - } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { - fattr->cf_mode = S_IFLNK; - fattr->cf_dtype = DT_LNK; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; } + /* + * We need to revalidate it further to make a decision about whether it + * is a symbolic link, DFS referral or a reparse point with a direct + * access like junctions, deduplicated files, NFS symlinks. + */ + if (fattr->cf_cifsattrs & ATTR_REPARSE) + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; + /* non-unix readdir doesn't provide nlink */ fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 384cffe42850..5f5ba0dc2ee1 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -534,10 +534,12 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjustTZ) + FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink) { int rc; + *symlink = false; + /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -554,6 +556,23 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_MAP_SPECIAL_CHR); *adjustTZ = true; } + + if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) { + int tmprc; + int oplock = 0; + __u16 netfid; + + /* Need to check if this is a symbolic link or not */ + tmprc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, + FILE_READ_ATTRIBUTES, 0, &netfid, &oplock, + NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (tmprc == -EOPNOTSUPP) + *symlink = true; + else + CIFSSMBClose(xid, tcon, netfid); + } + return rc; } diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 78ff88c467b9..84c012a6aba0 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -123,12 +123,13 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjust_tz) + FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink) { int rc; struct smb2_file_all_info *smb2_data; *adjust_tz = false; + *symlink = false; smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, GFP_KERNEL); @@ -136,9 +137,16 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, - OPEN_REPARSE_POINT, smb2_data, - SMB2_OP_QUERY_INFO); + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, + smb2_data, SMB2_OP_QUERY_INFO); + if (rc == -EOPNOTSUPP) { + *symlink = true; + /* Failed on a symbolic link - query a reparse point info */ + rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, + FILE_READ_ATTRIBUTES, FILE_OPEN, + OPEN_REPARSE_POINT, smb2_data, + SMB2_OP_QUERY_INFO); + } if (rc) goto out; diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7c2f45c06fc2..94bd4fbb13d3 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -306,7 +306,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NONEXISTENT_SECTOR, -EIO, "STATUS_NONEXISTENT_SECTOR"}, {STATUS_MORE_PROCESSING_REQUIRED, -EIO, "STATUS_MORE_PROCESSING_REQUIRED"}, - {STATUS_NO_MEMORY, -ENOMEM, "STATUS_NO_MEMORY"}, + {STATUS_NO_MEMORY, -EREMOTEIO, "STATUS_NO_MEMORY"}, {STATUS_CONFLICTING_ADDRESSES, -EADDRINUSE, "STATUS_CONFLICTING_ADDRESSES"}, {STATUS_NOT_MAPPED_VIEW, -EIO, "STATUS_NOT_MAPPED_VIEW"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c571be8cb76e..11dde4b24f8a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -494,6 +494,85 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, } static int +SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + struct copychunk_ioctl *pcchunk) +{ + int rc; + unsigned int ret_data_len; + struct resume_key_req *res_key; + + rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, + FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, + NULL, 0 /* no input */, + (char **)&res_key, &ret_data_len); + + if (rc) { + cifs_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc); + goto req_res_key_exit; + } + if (ret_data_len < sizeof(struct resume_key_req)) { + cifs_dbg(VFS, "Invalid refcopy resume key length\n"); + rc = -EINVAL; + goto req_res_key_exit; + } + memcpy(pcchunk->SourceKey, res_key->ResumeKey, COPY_CHUNK_RES_KEY_SIZE); + +req_res_key_exit: + kfree(res_key); + return rc; +} + +static int +smb2_clone_range(const unsigned int xid, + struct cifsFileInfo *srcfile, + struct cifsFileInfo *trgtfile, u64 src_off, + u64 len, u64 dest_off) +{ + int rc; + unsigned int ret_data_len; + struct copychunk_ioctl *pcchunk; + char *retbuf = NULL; + + pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); + + if (pcchunk == NULL) + return -ENOMEM; + + cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n"); + /* Request a key from the server to identify the source of the copy */ + rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), + srcfile->fid.persistent_fid, + srcfile->fid.volatile_fid, pcchunk); + + /* Note: request_res_key sets res_key null only if rc !=0 */ + if (rc) + return rc; + + /* For now array only one chunk long, will make more flexible later */ + pcchunk->ChunkCount = __constant_cpu_to_le32(1); + pcchunk->Reserved = 0; + pcchunk->SourceOffset = cpu_to_le64(src_off); + pcchunk->TargetOffset = cpu_to_le64(dest_off); + pcchunk->Length = cpu_to_le32(len); + pcchunk->Reserved2 = 0; + + /* Request that server copy to target from src file identified by key */ + rc = SMB2_ioctl(xid, tlink_tcon(trgtfile->tlink), + trgtfile->fid.persistent_fid, + trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, + true /* is_fsctl */, (char *)pcchunk, + sizeof(struct copychunk_ioctl), &retbuf, &ret_data_len); + + /* BB need to special case rc = EINVAL to alter chunk size */ + + cifs_dbg(FYI, "rc %d data length out %d\n", rc, ret_data_len); + + kfree(pcchunk); + return rc; +} + +static int smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { @@ -1017,6 +1096,7 @@ struct smb_version_operations smb20_operations = { .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, + .clone_range = smb2_clone_range, }; struct smb_version_operations smb21_operations = { @@ -1090,6 +1170,7 @@ struct smb_version_operations smb21_operations = { .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, + .clone_range = smb2_clone_range, }; struct smb_version_operations smb30_operations = { @@ -1165,6 +1246,7 @@ struct smb_version_operations smb30_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, + .clone_range = smb2_clone_range, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8ab05b0d6778..d65270c290a1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -630,6 +630,8 @@ ssetup_ntlmssp_authenticate: goto ssetup_exit; ses->session_flags = le16_to_cpu(rsp->SessionFlags); + if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) + cifs_dbg(VFS, "SMB3 encryption not supported yet\n"); ssetup_exit: free_rsp_buf(resp_buftype, rsp); @@ -717,6 +719,14 @@ static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code) #define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */) +/* These are similar values to what Windows uses */ +static inline void init_copy_chunk_defaults(struct cifs_tcon *tcon) +{ + tcon->max_chunks = 256; + tcon->max_bytes_chunk = 1048576; + tcon->max_bytes_copy = 16777216; +} + int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *cp) @@ -818,7 +828,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); - + init_copy_chunk_defaults(tcon); tcon_exit: free_rsp_buf(resp_buftype, rsp); kfree(unc_path); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6183b1b7550f..f88320bbb477 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -122,6 +122,23 @@ struct smb2_pdu { __le16 StructureSize2; /* size of wct area (varies, request specific) */ } __packed; +struct smb2_transform_hdr { + __be32 smb2_buf_length; /* big endian on wire */ + /* length is only two or three bytes - with + one or two byte type preceding it that MBZ */ + __u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */ + __u8 Signature[16]; + __u8 Nonce[11]; + __u8 Reserved[5]; + __le32 OriginalMessageSize; + __u16 Reserved1; + __le16 EncryptionAlgorithm; + __u64 SessionId; +} __packed; + +/* Encryption Algorithms */ +#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001) + /* * SMB2 flag definitions */ @@ -237,6 +254,7 @@ struct smb2_sess_setup_req { /* Currently defined SessionFlags */ #define SMB2_SESSION_FLAG_IS_GUEST 0x0001 #define SMB2_SESSION_FLAG_IS_NULL 0x0002 +#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004 struct smb2_sess_setup_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 9 */ @@ -534,9 +552,16 @@ struct create_durable { } Data; } __packed; +#define COPY_CHUNK_RES_KEY_SIZE 24 +struct resume_key_req { + char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; + __le32 ContextLength; /* MBZ */ + char Context[0]; /* ignored, Windows sets to 4 bytes of zero */ +} __packed; + /* this goes in the ioctl buffer when doing a copychunk request */ struct copychunk_ioctl { - char SourceKey[24]; + char SourceKey[COPY_CHUNK_RES_KEY_SIZE]; __le32 ChunkCount; /* we are only sending 1 */ __le32 Reserved; /* array will only be one chunk long for us */ @@ -546,6 +571,12 @@ struct copychunk_ioctl { __u32 Reserved2; } __packed; +struct copychunk_ioctl_rsp { + __le32 ChunksWritten; + __le32 ChunkBytesWritten; + __le32 TotalBytesWritten; +} __packed; + /* Response and Request are the same format */ struct validate_negotiate_info { __le32 Capabilities; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 313813e4c19b..b4eea105b08c 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -61,7 +61,7 @@ extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, - bool *adjust_tz); + bool *adjust_tz, bool *symlink); extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, __u64 size, struct cifs_sb_info *cifs_sb, bool set_alloc); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 277bd1be21fd..e081acbac2e7 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry, struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - sd->s_dentry = NULL; + /* Coordinate with configfs_attach_attr where will increase + * sd->s_count and update sd->s_dentry to new allocated one. + * Only set sd->dentry to null when this dentry is the only + * sd owner. + * If not do so, configfs_d_iput may run just after + * configfs_attach_attr and set sd->s_dentry to null + * even it's still in use. + */ + if (atomic_read(&sd->s_count) <= 2) + sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); } -/* - * We _must_ delete our dentries on last dput, as the chain-to-parent - * behavior is required to clear the parents of default_groups. - */ -static int configfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, - /* simple_delete_dentry() isn't exported */ - .d_delete = configfs_d_delete, + .d_delete = always_delete_dentry, }; #ifdef CONFIG_LOCKDEP @@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den struct configfs_attribute * attr = sd->s_element; int error; + spin_lock(&configfs_dirent_lock); dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; + spin_unlock(&configfs_dirent_lock); + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, configfs_init_file); if (error) { diff --git a/fs/coredump.c b/fs/coredump.c index 62406b6959b6..bc3fbcd32558 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) while (nr) { if (dump_interrupted()) return 0; - n = vfs_write(file, addr, nr, &pos); + n = __kernel_write(file, addr, nr, &pos); if (n <= 0) return 0; file->f_pos = pos; @@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align) { unsigned mod = cprm->written & (align - 1); if (align & (align - 1)) - return -EINVAL; - return mod ? dump_skip(cprm, align - mod) : 0; + return 0; + return mod ? dump_skip(cprm, align - mod) : 1; } EXPORT_SYMBOL(dump_align); diff --git a/fs/dcache.c b/fs/dcache.c index 1f24cd684c51..4bdb300b16e2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -/** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. - */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq) * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + hash = hash + (hash >> d_hash_shift); + return dentry_hashtable + (hash & d_hash_mask); } /* Statistics gathering. */ @@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) { list_del(&dentry->d_u.d_child); /* - * Inform try_to_ascend() that we are no longer attached to the + * Inform d_walk() that we are no longer attached to the * dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; @@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1185,9 +1126,24 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, seq); - if (!this_parent) + this_parent = child->d_parent; + + rcu_read_lock(); + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (this_parent != child->d_parent || + (child->d_flags & DCACHE_DENTRY_KILLED) || + need_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); goto rename_retry; + } + rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -2606,7 +2562,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&target->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ @@ -2738,7 +2694,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&anon->d_seq); + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); dparent = dentry->d_parent; @@ -2912,9 +2868,9 @@ static int prepend_path(const struct path *path, const struct path *root, char **buffer, int *buflen) { - struct dentry *dentry = path->dentry; - struct vfsmount *vfsmnt = path->mnt; - struct mount *mnt = real_mount(vfsmnt); + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; int error = 0; unsigned seq, m_seq = 0; char *bptr; @@ -2924,10 +2880,14 @@ static int prepend_path(const struct path *path, restart_mnt: read_seqbegin_or_lock(&mount_lock, &m_seq); seq = 0; + rcu_read_lock(); restart: bptr = *buffer; blen = *buflen; error = 0; + dentry = path->dentry; + vfsmnt = path->mnt; + mnt = real_mount(vfsmnt); read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2971,6 +2931,9 @@ restart: goto restart; } done_seqretry(&rename_lock, seq); + + if (!(m_seq & 1)) + rcu_read_unlock(); if (need_seqretry(&mount_lock, m_seq)) { m_seq = 1; goto restart_mnt; diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 60a327863b11..e7cfbaf8d0e2 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -74,14 +74,16 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info) return 0; } -static struct genl_ops dlm_nl_ops = { - .cmd = DLM_CMD_HELLO, - .doit = user_cmd, +static struct genl_ops dlm_nl_ops[] = { + { + .cmd = DLM_CMD_HELLO, + .doit = user_cmd, + }, }; int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, &dlm_nl_ops, 1); + return genl_register_family_with_ops(&family, dlm_nl_ops); } void dlm_netlink_exit(void) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 000eae2782b6..2f6735dbf1a9 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -392,7 +392,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, wait_for_completion(&ecr->completion); rc = ecr->rc; - INIT_COMPLETION(ecr->completion); + reinit_completion(&ecr->completion); } out: ablkcipher_request_free(req); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index a8766b880c07..becc725a1953 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) return 0; } -/* - * Retaining negative dentries for an in-memory filesystem just wastes - * memory and lookup time: arrange for them to be deleted immediately. - */ -static int efivarfs_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - static struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, - .d_delete = efivarfs_delete_dentry, + .d_delete = always_delete_dentry, }; static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) diff --git a/fs/exec.c b/fs/exec.c index 977319fd77f3..7ea097f6b341 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval) return retval; - retval = audit_bprm(bprm); - if (retval) - return retval; - retval = -ENOENT; retry: read_lock(&binfmt_lock); @@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm) ret = search_binary_handler(bprm); if (ret >= 0) { + audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); current->did_exec = 1; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index dc5d572ebd6a..6ea7b1436bbc 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -640,6 +640,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) struct ext4_group_desc *gdp; ext4_group_t i; ext4_group_t ngroups = ext4_get_groups_count(sb); + struct ext4_group_info *grp; #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -655,7 +656,11 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += ext4_free_group_clusters(sb, gdp); + grp = NULL; + if (EXT4_SB(sb)->s_group_info) + grp = ext4_get_group_info(sb, i); + if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + desc_count += ext4_free_group_clusters(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); if (bitmap_bh == NULL) @@ -679,7 +684,11 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += ext4_free_group_clusters(sb, gdp); + grp = NULL; + if (EXT4_SB(sb)->s_group_info) + grp = ext4_get_group_info(sb, i); + if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + desc_count += ext4_free_group_clusters(sb, gdp); } return desc_count; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d01d62315f7e..e6185031c1cc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -29,6 +29,7 @@ #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> +#include <linux/ratelimit.h> #include <crypto/hash.h> #ifdef __KERNEL__ #include <linux/compat.h> @@ -1314,6 +1315,11 @@ struct ext4_sb_info { unsigned long s_es_last_sorted; struct percpu_counter s_extent_cache_cnt; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; + + /* Ratelimit ext4 messages. */ + struct ratelimit_state s_err_ratelimit_state; + struct ratelimit_state s_warning_ratelimit_state; + struct ratelimit_state s_msg_ratelimit_state; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1396,7 +1402,18 @@ static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ } +/* Add these declarations here only so that these functions can be + * found by name. Otherwise, they are very hard to locate. */ +static inline int ext4_test_inode_flag(struct inode *inode, int bit); +static inline void ext4_set_inode_flag(struct inode *inode, int bit); +static inline void ext4_clear_inode_flag(struct inode *inode, int bit); EXT4_INODE_BIT_FNS(flag, flags, 0) + +/* Add these declarations here only so that these functions can be + * found by name. Otherwise, they are very hard to locate. */ +static inline int ext4_test_inode_state(struct inode *inode, int bit); +static inline void ext4_set_inode_state(struct inode *inode, int bit); +static inline void ext4_clear_inode_state(struct inode *inode, int bit); #if (BITS_PER_LONG < 64) EXT4_INODE_BIT_FNS(state, state_flags, 0) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54d52afcdb19..35f65cf4f318 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1666,7 +1666,7 @@ int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len, max_len; + unsigned short ext1_ee_len, ext2_ee_len; /* * Make sure that both extents are initialized. We don't merge @@ -1677,11 +1677,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) return 0; - if (ext4_ext_is_uninitialized(ex1)) - max_len = EXT_UNINIT_MAX_LEN; - else - max_len = EXT_INIT_MAX_LEN; - ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1694,7 +1689,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > max_len) + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -1720,7 +1715,6 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_extent_header *eh; unsigned int depth, len; int merge_done = 0; - int uninitialized = 0; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1730,12 +1724,8 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */ - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(ex + 1)); - if (uninitialized) - ext4_ext_mark_uninitialized(ex); if (ex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - ex - 1) @@ -1890,7 +1880,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *npath = NULL; int depth, len, err; ext4_lblk_t next; - unsigned uninitialized = 0; int mb_flags = 0; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { @@ -1942,18 +1931,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, if (err) return err; - /* - * ext4_can_extents_be_merged should have checked - * that either both extents are uninitialized, or - * both aren't. Thus we need to check only one of - * them here. - */ - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninitialized) - ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1976,20 +1955,10 @@ prepend: if (err) return err; - /* - * ext4_can_extents_be_merged should have checked - * that either both extents are uninitialized, or - * both aren't. Thus we need to check only one of - * them here. - */ - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex->ee_block = newext->ee_block; ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninitialized) - ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 137193ff389b..0ee59a6644e2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -432,7 +432,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ext4fs_dirhash(qstr->name, qstr->len, &hinfo); grp = hinfo.hash; } else - get_random_bytes(&grp, sizeof(grp)); + grp = prandom_u32(); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d9ecbf1113a7..bae987549dc3 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -994,11 +994,9 @@ static int ext4_add_dirent_to_inline(handle_t *handle, struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; - unsigned short reclen; int err; struct ext4_dir_entry_2 *de; - reclen = EXT4_DIR_REC_LEN(namelen); err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start, inline_size, name, namelen, &de); @@ -1442,6 +1440,7 @@ int ext4_read_inline_dir(struct file *file, if (ret < 0) goto out; + ret = 0; sb = inode->i_sb; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); offset = ctx->pos; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e274e9c1171f..075763474118 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2178,6 +2178,9 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) * * @handle - handle for journal operations * @mpd - extent to map + * @give_up_on_write - we set this to true iff there is a fatal error and there + * is no hope of writing the data. The caller should discard + * dirty pages to avoid infinite loops. * * The function maps extent starting at mpd->lblk of length mpd->len. If it is * delayed, blocks are allocated, if it is unwritten, we may need to convert @@ -2295,6 +2298,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct address_space *mapping = mpd->inode->i_mapping; struct pagevec pvec; unsigned int nr_pages; + long left = mpd->wbc->nr_to_write; pgoff_t index = mpd->first_page; pgoff_t end = mpd->last_page; int tag; @@ -2330,6 +2334,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (page->index > end) goto out; + /* + * Accumulated enough dirty pages? This doesn't apply + * to WB_SYNC_ALL mode. For integrity sync we have to + * keep going because someone may be concurrently + * dirtying pages, and we might have synced a lot of + * newly appeared dirty pages, but have not synced all + * of the old dirty pages. + */ + if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) + goto out; + /* If we can't merge this page, we are done. */ if (mpd->map.m_len > 0 && mpd->next_page != page->index) goto out; @@ -2364,19 +2379,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (err <= 0) goto out; err = 0; - - /* - * Accumulated enough dirty pages? This doesn't apply - * to WB_SYNC_ALL mode. For integrity sync we have to - * keep going because someone may be concurrently - * dirtying pages, and we might have synced a lot of - * newly appeared dirty pages, but have not synced all - * of the old dirty pages. - */ - if (mpd->wbc->sync_mode == WB_SYNC_NONE && - mpd->next_page - mpd->first_page >= - mpd->wbc->nr_to_write) - goto out; + left--; } pagevec_release(&pvec); cond_resched(); @@ -2420,16 +2423,15 @@ static int ext4_writepages(struct address_space *mapping, * because that could violate lock ordering on umount */ if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - return 0; + goto out_writepages; if (ext4_should_journal_data(inode)) { struct blk_plug plug; - int ret; blk_start_plug(&plug); ret = write_cache_pages(mapping, wbc, __writepage, mapping); blk_finish_plug(&plug); - return ret; + goto out_writepages; } /* @@ -2442,8 +2444,10 @@ static int ext4_writepages(struct address_space *mapping, * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) - return -EROFS; + if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { + ret = -EROFS; + goto out_writepages; + } if (ext4_should_dioread_nolock(inode)) { /* @@ -4690,6 +4694,15 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, generic_fillattr(inode, stat); /* + * If there is inline data in the inode, the inode will normally not + * have data blocks allocated (it may have an external xattr block). + * Report at least one sector for such files, so tools like tar, rsync, + * others doen't incorrectly think the file is completely sparse. + */ + if (unlikely(ext4_has_inline_data(inode))) + stat->blocks += (stat->size + 511) >> 9; + + /* * We can't update i_blocks if the block allocation is delayed * otherwise in the case of system crash before the real block * allocation is done, we will have i_blocks inconsistent with @@ -4700,9 +4713,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * blocks for this file. */ delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), - EXT4_I(inode)->i_reserved_data_blocks); - - stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); + EXT4_I(inode)->i_reserved_data_blocks); + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); return 0; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a41e3ba8cfaa..4d113efa024c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4794,8 +4794,8 @@ do_more: " group:%d block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } - + } else + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 214461e42a05..04434ad3e8e0 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -259,7 +259,7 @@ static unsigned int mmp_new_seq(void) u32 new_seq; do { - get_random_bytes(&new_seq, sizeof(u32)); + new_seq = prandom_u32(); } while (new_seq > EXT4_MMP_SEQ_MAX); return new_seq; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index d7d0c7b46ed4..d488f80ee32d 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -197,14 +197,15 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head) static void ext4_add_complete_io(ext4_io_end_t *io_end) { struct ext4_inode_info *ei = EXT4_I(io_end->inode); + struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb); struct workqueue_struct *wq; unsigned long flags; /* Only reserved conversions from writeback should enter here */ WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); - WARN_ON(!io_end->handle); + WARN_ON(!io_end->handle && sbi->s_journal); spin_lock_irqsave(&ei->i_completed_io_lock, flags); - wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; + wq = sbi->rsv_conversion_wq; if (list_empty(&ei->i_rsv_conversion_list)) queue_work(wq, &ei->i_rsv_conversion_work); list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2c2e6cbc6bed..c977f4e4e63b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } +#define ext4_error_ratelimit(sb) \ + ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ + "EXT4-fs error") + void __ext4_error(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", - sb->s_id, function, line, current->comm, &vaf); - va_end(args); + if (ext4_error_ratelimit(sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", + sb->s_id, function, line, current->comm, &vaf); + va_end(args); + } save_error_info(sb, function, line); - ext4_handle_error(sb); } @@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function, es->s_last_error_ino = cpu_to_le32(inode->i_ino); es->s_last_error_block = cpu_to_le64(block); + if (ext4_error_ratelimit(inode->i_sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: block %llu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, &vaf); + else + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, &vaf); + va_end(args); + } save_error_info(inode->i_sb, function, line); - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: block %llu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, &vaf); - else - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, &vaf); - va_end(args); - ext4_handle_error(inode->i_sb); } @@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); + if (ext4_error_ratelimit(inode->i_sb)) { + path = d_path(&(file->f_path), pathname, sizeof(pathname)); + if (IS_ERR(path)) + path = "(unknown)"; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "block %llu: comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, path, &vaf); + else + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, path, &vaf); + va_end(args); + } save_error_info(inode->i_sb, function, line); - path = d_path(&(file->f_path), pathname, sizeof(pathname)); - if (IS_ERR(path)) - path = "(unknown)"; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "block %llu: comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, path, &vaf); - else - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, path, &vaf); - va_end(args); - ext4_handle_error(inode->i_sb); } @@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function, (sb->s_flags & MS_RDONLY)) return; - errstr = ext4_decode_error(sb, errno, nbuf); - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", - sb->s_id, function, line, errstr); - save_error_info(sb, function, line); + if (ext4_error_ratelimit(sb)) { + errstr = ext4_decode_error(sb, errno, nbuf); + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); + } + save_error_info(sb, function, line); ext4_handle_error(sb); } @@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb, struct va_format vaf; va_list args; + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function, struct va_format vaf; va_list args; + if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), + "EXT4-fs warning")) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -633,18 +650,20 @@ __acquires(bitlock) es->s_last_error_block = cpu_to_le64(block); __save_error_info(sb, function, line); - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", - sb->s_id, function, line, grp); - if (ino) - printk(KERN_CONT "inode %lu: ", ino); - if (block) - printk(KERN_CONT "block %llu:", (unsigned long long) block); - printk(KERN_CONT "%pV\n", &vaf); - va_end(args); + if (ext4_error_ratelimit(sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", + sb->s_id, function, line, grp); + if (ino) + printk(KERN_CONT "inode %lu: ", ino); + if (block) + printk(KERN_CONT "block %llu:", + (unsigned long long) block); + printk(KERN_CONT "%pV\n", &vaf); + va_end(args); + } if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); @@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(trigger_fs_error), + ATTR_LIST(err_ratelimit_interval_ms), + ATTR_LIST(err_ratelimit_burst), + ATTR_LIST(warning_ratelimit_interval_ms), + ATTR_LIST(warning_ratelimit_burst), + ATTR_LIST(msg_ratelimit_interval_ms), + ATTR_LIST(msg_ratelimit_burst), NULL, }; @@ -3037,7 +3068,6 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr; - unsigned long rnd; elr = kzalloc(sizeof(*elr), GFP_KERNEL); if (!elr) @@ -3052,10 +3082,8 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, * spread the inode table initialization requests * better. */ - get_random_bytes(&rnd, sizeof(rnd)); - elr->lr_next_sched = jiffies + (unsigned long)rnd % - (EXT4_DEF_LI_MAX_START_DELAY * HZ); - + elr->lr_next_sched = jiffies + (prandom_u32() % + (EXT4_DEF_LI_MAX_START_DELAY * HZ)); return elr; } @@ -4118,6 +4146,11 @@ no_journal: if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ + /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ + ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); + ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); + ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); + kfree(orig_data); return 0; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 03e9bebba198..1423c4816a47 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1352,6 +1352,7 @@ retry: new_extra_isize = s_min_extra_isize; kfree(is); is = NULL; kfree(bs); bs = NULL; + brelse(bh); goto retry; } error = -1; diff --git a/fs/fs_struct.c b/fs/fs_struct.c index d8ac61d0c932..7dca743b2ce1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), - .seq = SEQCNT_ZERO, + .seq = SEQCNT_ZERO(init_fs.seq), .umask = 0022, }; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index dcb821617774..53d35c504240 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -799,7 +799,7 @@ void fscache_enqueue_object(struct fscache_object *object) */ bool fscache_object_sleep_till_congested(signed long *timeoutp) { - wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait); + wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait); DEFINE_WAIT(wait); if (fscache_object_congested()) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e66a8009aff1..c8420f7e4db6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref)); + } while (gi->sdp != gi->gl->gl_sbd || + __lockref_is_dead(&gi->gl->gl_lockref)); return 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1615df16cf4e..7119504159f1 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (d != NULL) dentry = d; if (dentry->d_inode) { - if (!(*opened & FILE_OPENED)) + if (!(*opened & FILE_OPENED)) { + if (d == NULL) + dget(dentry); return finish_no_open(file, dentry); + } dput(d); return 0; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c8423d6de6c3..2a6ba06bee6f 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(uint32_t)); + memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); + memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 453b50eaddec..98236d0df3ca 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr, *ptr; - struct gfs2_quota q, *qp; + struct gfs2_quota q; int err, nbytes; u64 size; @@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; err = -EIO; - qp = &q; - qp->qu_value = be64_to_cpu(qp->qu_value); - qp->qu_value += change; - qp->qu_value = cpu_to_be64(qp->qu_value); - qd->qd_qb.qb_value = qp->qu_value; + be64_add_cpu(&q.qu_value, change); + qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_warn = qp->qu_warn; + q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_warn = q.qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_limit = qp->qu_limit; + q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_limit = q.qu_limit; } if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = qp->qu_value; + q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = q.qu_value; } } /* Write the quota into the quota file on disk */ - ptr = qp; + ptr = &q; nbytes = sizeof(struct gfs2_quota); get_a_page: page = find_or_create_page(mapping, index, GFP_NOFS); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d83abdd5635..c8d6161bd682 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_flags & GFS2_RDF_UPTODATE) return 0; - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) return gfs2_rgrp_bh_get(rgd); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index efc85b1377cc..3c6136f98c73 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -129,7 +129,7 @@ static int can_set_xattr(struct inode *inode, const char *name, static void hfsplus_init_header_node(struct inode *attr_file, u32 clump_size, - char *buf, size_t node_size) + char *buf, u16 node_size) { struct hfs_bnode_desc *desc; struct hfs_btree_header_rec *head; @@ -139,8 +139,9 @@ static void hfsplus_init_header_node(struct inode *attr_file, char *bmp; u32 used_nodes; u32 used_bmp_bytes; + loff_t tmp; - hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %zu\n", + hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %u\n", clump_size, node_size); /* The end of the node contains list of record offsets */ @@ -154,7 +155,9 @@ static void hfsplus_init_header_node(struct inode *attr_file, head = (struct hfs_btree_header_rec *)(buf + offset); head->node_size = cpu_to_be16(node_size); - head->node_count = cpu_to_be32(i_size_read(attr_file) / node_size); + tmp = i_size_read(attr_file); + do_div(tmp, node_size); + head->node_count = cpu_to_be32(tmp); head->free_nodes = cpu_to_be32(be32_to_cpu(head->node_count) - 1); head->clump_size = cpu_to_be32(clump_size); head->attributes |= cpu_to_be32(HFS_TREE_BIGKEYS | HFS_TREE_VARIDXKEYS); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 25437280a207..db23ce1bd903 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) -static int hostfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - -static const struct dentry_operations hostfs_dentry_ops = { - .d_delete = hostfs_d_delete, -}; - /* Changed in hostfs_args before the kernel starts running */ static char *root_ino = ""; static int append = 0; @@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; - sb->s_d_op = &hostfs_dentry_ops; + sb->s_d_op = &simple_dentry_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index fe3c0527545f..09b3ed455724 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -515,6 +515,10 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) c = JFFS2_SB_INFO(sb); + /* Do not support the MLC nand */ + if (c->mtd->type == MTD_MLCNANDFLASH) + return -EINVAL; + #ifndef CONFIG_JFFS2_FS_WRITEBUFFER if (c->mtd->type == MTD_NANDFLASH) { pr_err("Cannot operate on NAND flash unless jffs2 NAND support is compiled in\n"); diff --git a/fs/libfs.c b/fs/libfs.c index 5de06947ba5e..a1844244246f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs); * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -static int simple_delete_dentry(const struct dentry *dentry) +int always_delete_dentry(const struct dentry *dentry) { return 1; } +EXPORT_SYMBOL(always_delete_dentry); + +const struct dentry_operations simple_dentry_operations = { + .d_delete = always_delete_dentry, +}; +EXPORT_SYMBOL(simple_dentry_operations); /* * Lookup the data. This is trivial - if the dentry didn't already @@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry) */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - static const struct dentry_operations simple_dentry_operations = { - .d_delete = simple_delete_dentry, - }; - if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (!dentry->d_sb->s_d_op) diff --git a/fs/locks.c b/fs/locks.c index f99d52bdd05a..92a0f0a52b06 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1494,6 +1494,7 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp if (is_deleg && arg == F_WRLCK) { /* Write delegations are not currently supported: */ + mutex_unlock(&inode->i_mutex); WARN_ON_ONCE(1); return -EINVAL; } diff --git a/fs/namei.c b/fs/namei.c index e029a4cbff7d..8f77a8cea289 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) */ static inline int may_create(struct inode *dir, struct dentry *child) { + audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 38c1768b4142..3dece03f2fc8 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -116,17 +116,17 @@ config NFS_V4_2 config PNFS_FILE_LAYOUT tristate depends on NFS_V4_1 - default m + default NFS_V4 config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM - default m + default NFS_V4 config PNFS_OBJLAYOUT tristate depends on NFS_V4_1 && SCSI_OSD_ULD - default m + default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c8e729deb4f7..059c01b67a71 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -244,7 +244,7 @@ static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid != NFS4_NO_SLOT) { - INIT_COMPLETION(tbl->complete); + reinit_completion(&tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); } @@ -2093,10 +2093,15 @@ again: nfs4_root_machine_cred(clp); goto again; } - if (i > 2) + if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: + /* No point in retrying if we already used RPC_AUTH_UNIX */ + if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) { + status = -EPERM; + break; + } clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 317d6fc2160e..910ed906eb82 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1614,7 +1614,7 @@ static int nfs_parse_mount_options(char *raw, goto out_minorversion_mismatch; if (mnt->options & NFS_OPTION_MIGRATION && - mnt->version != 4 && mnt->minorversion != 0) + (mnt->version != 4 || mnt->minorversion != 0)) goto out_migration_misuse; /* diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index dc8f1ef665ce..f994e750e0d1 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -95,7 +95,7 @@ config NFSD_V4_SECURITY_LABEL Smack policies on NFSv4 files, say N. WARNING: there is still a chance of backwards-incompatible protocol changes. - For now we recommend "Y" only for developers and testers." + For now we recommend "Y" only for developers and testers. config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5f38ea36e266..8513c598fabf 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -536,16 +536,12 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (err) goto out3; exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); - if (!uid_valid(exp.ex_anon_uid)) - goto out3; /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); - if (!gid_valid(exp.ex_anon_gid)) - goto out3; /* fsid */ err = get_int(&mesg, &an_int); @@ -583,6 +579,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_uuid); if (err) goto out4; + /* + * No point caching this if it would immediately expire. + * Also, this protects exportfs's dummy export from the + * anon_uid/anon_gid checks: + */ + if (exp.h.expiry_time < seconds_since_boot()) + goto out4; + /* + * For some reason exportfs has been passing down an + * invalid (-1) uid & gid on the "dummy" export which it + * uses to test export support. To make sure exportfs + * sees errors from check_export we therefore need to + * delay these checks till after check_export: + */ + err = -EINVAL; + if (!uid_valid(exp.ex_anon_uid)) + goto out4; + if (!gid_valid(exp.ex_anon_gid)) + goto out4; + err = 0; } expp = svc_export_lookup(&exp); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f36a30a9f2d1..105d6fa7c514 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -402,11 +402,16 @@ static void remove_stid(struct nfs4_stid *s) idr_remove(stateids, s->sc_stateid.si_opaque.so_id); } +static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) +{ + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - kmem_cache_free(deleg_slab, dp); + nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; } } @@ -610,7 +615,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { remove_stid(&stp->st_stid); - kmem_cache_free(stateid_slab, stp); + nfs4_free_stid(stateid_slab, &stp->st_stid); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -668,7 +673,6 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp) static void release_open_stateid(struct nfs4_ol_stateid *stp) { unhash_open_stateid(stp); - unhash_stid(&stp->st_stid); free_generic_stateid(stp); } @@ -690,7 +694,6 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; if (s) { - unhash_stid(&s->st_stid); free_generic_stateid(s); oo->oo_last_closed_stid = NULL; } @@ -1127,6 +1130,11 @@ destroy_client(struct nfs4_client *clp) dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); destroy_delegation(dp); } + list_splice_init(&clp->cl_revoked, &reaplist); + while (!list_empty(&reaplist)) { + dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + destroy_revoked_delegation(dp); + } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); release_openowner(oo); @@ -3154,7 +3162,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - unhash_stid(&dp->dl_stid); + remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; @@ -3995,10 +4003,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); - if (cstate->minorversion) { - unhash_stid(&stp->st_stid); + if (cstate->minorversion) free_generic_stateid(stp); - } else + else oo->oo_last_closed_stid = stp; if (list_empty(&oo->oo_owner.so_stateids)) { @@ -5119,7 +5126,6 @@ out_recovery: return ret; } -/* should be called with the state lock held */ void nfs4_state_shutdown_net(struct net *net) { @@ -5130,6 +5136,7 @@ nfs4_state_shutdown_net(struct net *net) cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); + nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { @@ -5144,6 +5151,7 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); + nfs4_unlock_state(); } void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d9454fe5653f..ee7237f99f54 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -141,8 +141,8 @@ xdr_error: \ static void next_decode_page(struct nfsd4_compoundargs *argp) { - argp->pagelist++; argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -411,6 +411,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, label->data = kzalloc(dummy32 + 1, GFP_KERNEL); if (!label->data) return nfserr_jukebox; + label->len = dummy32; defer_free(argp, kfree, label->data); memcpy(label->data, buf, dummy32); } @@ -945,13 +946,16 @@ static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { DECODE_HEAD; - + + if (argp->minorversion >= 1) + return nfserr_notsupp; + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); if (status) return status; READ_BUF(4); READ32(open_conf->oc_seqid); - + DECODE_TAIL; } @@ -991,6 +995,14 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +{ + if (argp->minorversion == 0) + return nfs_ok; + return nfserr_notsupp; +} + +static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { DECODE_HEAD; @@ -1061,6 +1073,9 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(sizeof(clientid_t)); COPYMEM(clientid, sizeof(clientid_t)); @@ -1111,6 +1126,9 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); @@ -1137,6 +1155,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(8 + NFS4_VERIFIER_SIZE); COPYMEM(&scd_c->sc_clientid, 8); COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); @@ -1208,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) len -= pages * PAGE_SIZE; argp->p = (__be32 *)page_address(argp->pagelist[0]); + argp->pagelist++; argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } argp->p += XDR_QUADLEN(len); @@ -1220,6 +1242,9 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); READ32(rlockowner->rl_owner.len); @@ -1519,7 +1544,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_READ] = (nfsd4_dec)nfsd4_decode_read, [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, @@ -1536,46 +1561,6 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, -}; - -static nfsd4_dec nfsd41_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, @@ -1599,24 +1584,53 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, }; -struct nfsd4_minorversion_ops { - nfsd4_dec *decoders; - int nops; -}; +static inline bool +nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) +{ + if (op->opnum < FIRST_NFS4_OP) + return false; + else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP) + return false; + else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP) + return false; + else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP) + return false; + return true; +} -static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { - [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, - [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, - [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, -}; +/* + * Return a rough estimate of the maximum possible reply size. Note the + * estimate includes rpc headers so is meant to be passed to + * svc_reserve, not svc_reserve_auth. + * + * Also note the current compound encoding permits only one operation to + * use pages beyond the first one, so the maximum possible length is the + * maximum over these values, not the sum. + */ +static int nfsd4_max_reply(u32 opnum) +{ + switch (opnum) { + case OP_READLINK: + case OP_READDIR: + /* + * Both of these ops take a single page for data and put + * the head and tail in another page: + */ + return 2 * PAGE_SIZE; + case OP_READ: + return INT_MAX; + default: + return PAGE_SIZE; + } +} static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; - struct nfsd4_minorversion_ops *ops; bool cachethis = false; + int max_reply = PAGE_SIZE; int i; READ_BUF(4); @@ -1640,10 +1654,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } } - if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) + if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION) argp->opcnt = 0; - ops = &nfsd4_minorversion[argp->minorversion]; for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; @@ -1651,8 +1664,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) READ_BUF(4); READ32(op->opnum); - if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) - op->status = ops->decoders[op->opnum](argp, &op->u); + if (nfsd4_opnum_in_range(argp, op)) + op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; @@ -1667,10 +1680,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) * op in the compound wants to be cached: */ cachethis |= nfsd4_cache_this_op(op); + + max_reply = max(max_reply, nfsd4_max_reply(op->opnum)); } /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; + if (max_reply != INT_MAX) + svc_reserve(argp->rqstp, max_reply); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; DECODE_TAIL; @@ -2375,7 +2392,7 @@ out_acl: if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { if ((buflen -= 8) < 0) goto out_resource; - WRITE64(~(u64)0); + WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { if ((buflen -= 4) < 0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3d0e15ae6f72..3c37b160dcad 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -598,22 +598,20 @@ fh_update(struct svc_fh *fhp) _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); } else { if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - goto out; + return 0; _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } -out: return 0; - out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); - goto out; + return nfserr_serverfault; out_negative: printk(KERN_ERR "fh_update: %pd2 still negative!\n", dentry); - goto out; + return nfserr_serverfault; } /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 94b5f5d2bfed..7eea63cada1d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } +} - /* Change the attributes. */ +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - iap->ia_valid |= ATTR_CTIME; + if (iap->ia_size < inode->i_size) { + __be32 err; - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_nfserr; - fh_lock(fhp); + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } - host_err = notify_change(dentry, iap, NULL); - err = nfserrno(host_err); - fh_unlock(fhp); + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); +} + +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) +{ + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + goto out; + + nfsd_sanitize_attrs(inode, iap); + + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; } + + iap->ia_valid |= ATTR_CTIME; + + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; + } + + host_err = nfsd_break_lease(inode); + if (host_err) + goto out_put_write_access_nfserror; + + fh_lock(fhp); + host_err = notify_change(dentry, iap, NULL); + fh_unlock(fhp); + +out_put_write_access_nfserror: + err = nfserrno(host_err); +out_put_write_access: if (size_change) put_write_access(inode); if (!err) commit_metadata(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } #if defined(CONFIG_NFSD_V2_ACL) || \ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3a44a648dae7..3407b2c62b21 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1304,7 +1304,7 @@ static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) { wait_for_completion(&mw->mw_complete); /* Re-arm the completion in case we want to wait on it again */ - INIT_COMPLETION(mw->mw_complete); + reinit_completion(&mw->mw_complete); return mw->mw_status; } @@ -1355,7 +1355,7 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, else ret = mw->mw_status; /* Re-arm the completion in case we want to wait on it again */ - INIT_COMPLETION(mw->mw_complete); + reinit_completion(&mw->mw_complete); return ret; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1485e38daaa3..03c8d747be48 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - kloginuid = make_kuid(file->f_cred->user_ns, loginuid); - if (!uid_valid(kloginuid)) { - length = -EINVAL; - goto out_free_page; + + /* is userspace tring to explicitly UNSET the loginuid? */ + if (loginuid == AUDIT_UID_UNSET) { + kloginuid = INVALID_UID; + } else { + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } } length = audit_set_loginuid(kloginuid); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index b701eaa482bf..51942d5abcec 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -29,7 +29,6 @@ static int show_console_dev(struct seq_file *m, void *v) char flags[ARRAY_SIZE(con_flags) + 1]; struct console *con = v; unsigned int a; - int len; dev_t dev = 0; if (con->device) { @@ -47,11 +46,10 @@ static int show_console_dev(struct seq_file *m, void *v) con_flags[a].name : ' '; flags[a] = 0; - seq_printf(m, "%s%d%n", con->name, con->index, &len); - len = 21 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-', + seq_setwidth(m, 21 - 1); + seq_printf(m, "%s%d", con->name, con->index); + seq_pad(m, ' '); + seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con->write ? 'W' : '-', con->unblank ? 'U' : '-', flags); if (dev) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 737e15615b04..cca93b6fb9a9 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -175,22 +175,6 @@ static const struct inode_operations proc_link_inode_operations = { }; /* - * As some entries in /proc are volatile, we want to - * get rid of unused dentries. This could be made - * smarter: we could keep a "volatile" flag in the - * inode to indicate which ones to keep. - */ -static int proc_delete_dentry(const struct dentry * dentry) -{ - return 1; -} - -static const struct dentry_operations proc_dentry_operations = -{ - .d_delete = proc_delete_dentry, -}; - -/* * Don't create negative dentries here, return -ENOENT by hand * instead. */ @@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_dentry_operations); + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c805d5b69ba1..a77d2b299199 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -1,8 +1,8 @@ #include <linux/fs.h> -#include <linux/hugetlb.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/mmzone.h> #include <linux/proc_fs.h> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 49a7fff2e83a..9ae46b87470d 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = { .setattr = proc_setattr, }; -static int ns_delete_dentry(const struct dentry *dentry) -{ - /* Don't cache namespace inodes when not in use */ - return 1; -} - static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; @@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) const struct dentry_operations ns_dentry_operations = { - .d_delete = ns_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = ns_dname, }; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index ccfd99bd1c5a..5f9bc8a746c9 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; flags = region->vm_flags; file = region->vm_file; @@ -50,8 +50,9 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) ino = inode->i_ino; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", region->vm_start, region->vm_end, flags & VM_READ ? 'r' : '-', @@ -59,13 +60,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', ((loff_t)region->vm_pgoff) << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - len = 25 + sizeof(void *) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index abbe825d20ff..fb52b548080d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -62,7 +62,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, + (PTRS_PER_PTE * sizeof(pte_t) * + atomic_long_read(&mm->nr_ptes)) >> 10, swap << (PAGE_SHIFT-10)); } @@ -83,14 +84,6 @@ unsigned long task_statm(struct mm_struct *mm, return mm->total_vm; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - #ifdef CONFIG_NUMA /* * These functions are for numa_maps but called in generic **maps seq_file @@ -268,7 +261,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; - int len; const char *name = NULL; if (file) { @@ -286,7 +278,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) if (stack_guard_page_end(vma, end)) end -= PAGE_SIZE; - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", start, end, flags & VM_READ ? 'r' : '-', @@ -294,14 +287,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, "\n"); goto done; } @@ -333,7 +326,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; } else { /* Thread stack in /proc/PID/maps */ - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_printf(m, "[stack:%d]", tid); } } @@ -341,7 +334,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) done: if (name) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); @@ -505,9 +498,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -998,13 +991,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { struct vm_area_struct *vma; struct pagemapread *pm = walk->private; + spinlock_t *ptl; pte_t *pte; int err = 0; pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { int pmd_flags2; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) @@ -1022,7 +1016,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (err) break; } - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return err; } @@ -1324,7 +1318,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, md = walk->private; - if (pmd_trans_huge_lock(pmd, md->vma) == 1) { + if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; @@ -1332,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (page) gather_stats(page, md, pte_dirty(huge_pte), HPAGE_PMD_SIZE/PAGE_SIZE); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return 0; } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 56123a6f462e..678455d2d683 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,14 +123,6 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - /* * display a single VMA to a sequenced file */ @@ -142,7 +134,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; unsigned long long pgoff = 0; flags = vma->vm_flags; @@ -155,8 +147,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', @@ -164,16 +157,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } else if (mm) { pid_t tid = vm_is_stack(priv->task, vma, is_pid); if (tid != 0) { - pad_len_spaces(m, len); + seq_pad(m, ' '); /* * Thread stack in /proc/PID/task/TID/maps or * the main process stack. diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 16e8abb7709b..72d29177998e 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -9,13 +9,25 @@ #include <net/netlink.h> #include <net/genetlink.h> +static const struct genl_multicast_group quota_mcgrps[] = { + { .name = "events", }, +}; + /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, + /* + * Needed due to multicast group ID abuse - old code assumed + * the family ID was also a valid multicast group ID (which + * isn't true) and userspace might thus rely on it. Assign a + * static ID for this group to make dealing with that easier. + */ + .id = GENL_ID_VFS_DQUOT, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, .maxattr = QUOTA_NL_A_MAX, + .mcgrps = quota_mcgrps, + .n_mcgrps = ARRAY_SIZE(quota_mcgrps), }; /** @@ -78,7 +90,7 @@ void quota_send_warning(struct kqid qid, dev_t dev, goto attr_err_out; genlmsg_end(skb, msg_head); - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + genlmsg_multicast("a_genl_family, skb, 0, 0, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); diff --git a/fs/seq_file.c b/fs/seq_file.c index a290157265ef..1d641bb108d2 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -136,6 +136,7 @@ static int traverse(struct seq_file *m, loff_t offset) Eoverflow: m->op->stop(m, p); kfree(m->buf); + m->count = 0; m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); return !m->buf ? -ENOMEM : -EAGAIN; } @@ -232,10 +233,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Fill; m->op->stop(m, p); kfree(m->buf); + m->count = 0; m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); if (!m->buf) goto Enomem; - m->count = 0; m->version = 0; pos = m->index; p = m->op->start(m, &pos); @@ -766,6 +767,21 @@ int seq_write(struct seq_file *seq, const void *data, size_t len) } EXPORT_SYMBOL(seq_write); +/** + * seq_pad - write padding spaces to buffer + * @m: seq_file identifying the buffer to which data should be written + * @c: the byte to append after padding if non-zero + */ +void seq_pad(struct seq_file *m, char c) +{ + int size = m->pad_until - m->count; + if (size > 0) + seq_printf(m, "%*s", size, ""); + if (c) + seq_putc(m, c); +} +EXPORT_SYMBOL(seq_pad); + struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index c70111ebefd4..b6fa8657dcbc 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,78 @@ config SQUASHFS If unsure, say N. +choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + +choice + prompt "Decompressor parallelisation options" + depends on SQUASHFS + help + Squashfs now supports three parallelisation options for + decompression. Each one exhibits various trade-offs between + decompression performance and CPU and memory usage. + + If in doubt, select "Single threaded compression" + +config SQUASHFS_DECOMP_SINGLE + bool "Single threaded compression" + help + Traditionally Squashfs has used single-threaded decompression. + Only one block (data or metadata) can be decompressed at any + one time. This limits CPU and memory usage to a minimum. + +config SQUASHFS_DECOMP_MULTI + bool "Use multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + This decompressor implementation uses up to two parallel + decompressors per core. It dynamically allocates decompressors + on a demand basis. + +config SQUASHFS_DECOMP_MULTI_PERCPU + bool "Use percpu multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + This decompressor implementation uses a maximum of one + decompressor per core. It uses percpu variables to ensure + decompression is load-balanced across the cores. + +endchoice + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 110b0476f3b4..4132520b4ff2 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,6 +5,11 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 41d108ecc9be..0cea9b9236d0 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -36,6 +36,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" /* * Read the metadata block length, this is stored in the first two @@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with compression * algorithms). */ -int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, - int length, u64 *next_index, int srclength, int pages) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail; + int bytes, compressed, b = 0, k = 0, avail, i; - bh = kcalloc(((srclength + msblk->devblksize - 1) + bh = kcalloc(((output->length + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; @@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, *next_index = index + length; TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, srclength); + index, compressed ? "" : "un", length, output->length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto read_failure; @@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ? "" : "un", length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto block_release; @@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, ll_rw_block(READ, b - 1, bh + 1); } + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } + if (compressed) { - length = squashfs_decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + length = squashfs_decompress(msblk, bh, b, offset, length, + output); if (length < 0) goto read_failure; } else { @@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, * Block is uncompressed. */ int in, pg_offset = 0; + void *data = squashfs_first_page(output); for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { - page++; + data = squashfs_next_page(output); pg_offset = 0; } avail = min_t(int, in, PAGE_CACHE_SIZE - pg_offset); - memcpy(buffer[page] + pg_offset, - bh[k]->b_data + offset, avail); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); in -= avail; pg_offset += avail; offset += avail; @@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, offset = 0; put_bh(bh[k]); } + squashfs_finish_page(output); } kfree(bh); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index af0b73802592..1cb70a0b2168 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -56,6 +56,7 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" +#include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read @@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->error = 0; spin_unlock(&cache->lock); - entry->length = squashfs_read_data(sb, entry->data, - block, length, &entry->next_index, - cache->block_size, cache->pages); + entry->length = squashfs_read_data(sb, block, length, + &entry->next_index, entry->actor); spin_lock(&cache->lock); @@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } + kfree(cache->entry[i].actor); } kfree(cache->entry); @@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); + if (entry->actor == NULL) { + ERROR("Failed to allocate %s cache entry\n", name); + goto cleanup; + } } return cache; @@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int i, res; void *table, *buffer, **data; + struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) @@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; - res = squashfs_read_data(sb, data, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); + res = squashfs_read_data(sb, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); kfree(data); + kfree(actor); if (res < 0) goto failed; return table; +failed2: + kfree(data); failed: kfree(table); return ERR_PTR(res); diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 3f6271d86abc..ac22fe73b0ad 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -30,6 +30,7 @@ #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" +#include "page_actor.h" /* * This file (and decompressor.h) implements a decompressor framework for @@ -37,29 +38,29 @@ */ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { - NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 + NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { - NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 + NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif #ifndef CONFIG_SQUASHFS_XZ static const struct squashfs_decompressor squashfs_xz_comp_ops = { - NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 + NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 }; #endif #ifndef CONFIG_SQUASHFS_ZLIB static const struct squashfs_decompressor squashfs_zlib_comp_ops = { - NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 + NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 }; #endif static const struct squashfs_decompressor squashfs_unknown_comp_ops = { - NULL, NULL, NULL, 0, "unknown", 0 + NULL, NULL, NULL, NULL, 0, "unknown", 0 }; static const struct squashfs_decompressor *decompressor[] = { @@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) } -void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) +static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *strm, *buffer = NULL; + void *buffer = NULL, *comp_opts; + struct squashfs_page_actor *actor = NULL; int length = 0; /* @@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) */ if (SQUASHFS_COMP_OPTS(flags)) { buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) - return ERR_PTR(-ENOMEM); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } - length = squashfs_read_data(sb, &buffer, - sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); if (length < 0) { - strm = ERR_PTR(length); - goto finished; + comp_opts = ERR_PTR(length); + goto out; } } - strm = msblk->decompressor->init(msblk, buffer, length); + comp_opts = squashfs_comp_opts(msblk, buffer, length); -finished: +out: + kfree(actor); kfree(buffer); + return comp_opts; +} + + +void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + void *stream, *comp_opts = get_comp_opts(sb, flags); + + if (IS_ERR(comp_opts)) + return comp_opts; + + stream = squashfs_decompressor_create(msblk, comp_opts); + if (IS_ERR(stream)) + kfree(comp_opts); - return strm; + return stream; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 330073e29029..af0985321808 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -24,28 +24,22 @@ */ struct squashfs_decompressor { - void *(*init)(struct squashfs_sb_info *, void *, int); + void *(*init)(struct squashfs_sb_info *, void *); + void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void **, - struct buffer_head **, int, int, int, int, int); + int (*decompress)(struct squashfs_sb_info *, void *, + struct buffer_head **, int, int, int, + struct squashfs_page_actor *); int id; char *name; int supported; }; -static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, - void *s) +static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int length) { - if (msblk->decompressor) - msblk->decompressor->free(s); -} - -static inline int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) -{ - return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + return msblk->decompressor->comp_opts ? + msblk->decompressor->comp_opts(msblk, buff, length) : NULL; } #ifdef CONFIG_SQUASHFS_XZ diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c new file mode 100644 index 000000000000..d6008a636479 --- /dev/null +++ b/fs/squashfs/decompressor_multi.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2013 + * Minchan Kim <minchan@kernel.org> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/cpumask.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression in the + * decompressor framework + */ + + +/* + * The reason that multiply two is that a CPU can request new I/O + * while it is waiting previous request. + */ +#define MAX_DECOMPRESSOR (num_online_cpus() * 2) + + +int squashfs_max_decompressors(void) +{ + return MAX_DECOMPRESSOR; +} + + +struct squashfs_stream { + void *comp_opts; + struct list_head strm_list; + struct mutex mutex; + int avail_decomp; + wait_queue_head_t wait; +}; + + +struct decomp_stream { + void *stream; + struct list_head list; +}; + + +static void put_decomp_stream(struct decomp_stream *decomp_strm, + struct squashfs_stream *stream) +{ + mutex_lock(&stream->mutex); + list_add(&decomp_strm->list, &stream->strm_list); + mutex_unlock(&stream->mutex); + wake_up(&stream->wait); +} + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct decomp_stream *decomp_strm = NULL; + int err = -ENOMEM; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + goto out; + + stream->comp_opts = comp_opts; + mutex_init(&stream->mutex); + INIT_LIST_HEAD(&stream->strm_list); + init_waitqueue_head(&stream->wait); + + /* + * We should have a decompressor at least as default + * so if we fail to allocate new decompressor dynamically, + * we could always fall back to default decompressor and + * file system works. + */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto out; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + err = PTR_ERR(decomp_strm->stream); + goto out; + } + + list_add(&decomp_strm->list, &stream->strm_list); + stream->avail_decomp = 1; + return stream; + +out: + kfree(decomp_strm); + kfree(stream); + return ERR_PTR(err); +} + + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + if (stream) { + struct decomp_stream *decomp_strm; + + while (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + msblk->decompressor->free(decomp_strm->stream); + kfree(decomp_strm); + stream->avail_decomp--; + } + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); + } +} + + +static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, + struct squashfs_stream *stream) +{ + struct decomp_stream *decomp_strm; + + while (1) { + mutex_lock(&stream->mutex); + + /* There is available decomp_stream */ + if (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + mutex_unlock(&stream->mutex); + break; + } + + /* + * If there is no available decomp and already full, + * let's wait for releasing decomp from other users. + */ + if (stream->avail_decomp >= MAX_DECOMPRESSOR) + goto wait; + + /* Let's allocate new decomp */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto wait; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + kfree(decomp_strm); + goto wait; + } + + stream->avail_decomp++; + WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); + + mutex_unlock(&stream->mutex); + break; +wait: + /* + * If system memory is tough, let's for other's + * releasing instead of hurting VM because it could + * make page cache thrashing. + */ + mutex_unlock(&stream->mutex); + wait_event(stream->wait, + !list_empty(&stream->strm_list)); + } + + return decomp_strm; +} + + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); + res = msblk->decompressor->decompress(msblk, decomp_stream->stream, + bh, b, offset, length, output); + put_decomp_stream(decomp_stream, stream); + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + return res; +} diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 000000000000..23a9c28ad8ea --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/percpu.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression using percpu + * variables, one thread per cpu core. + */ + +struct squashfs_stream { + void *stream; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu; + int err, cpu; + + percpu = alloc_percpu(struct squashfs_stream); + if (percpu == NULL) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + } + + kfree(comp_opts); + return (__force void *) percpu; + +out: + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + if (!IS_ERR_OR_NULL(stream->stream)) + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream; + int cpu; + + if (msblk->stream) { + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream = get_cpu_ptr(percpu); + int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + put_cpu_ptr(stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return num_possible_cpus(); +} diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c new file mode 100644 index 000000000000..a6c75929a00e --- /dev/null +++ b/fs/squashfs/decompressor_single.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements single-threaded decompression in the + * decompressor framework + */ + +struct squashfs_stream { + void *stream; + struct mutex mutex; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + int err = -ENOMEM; + + stream = kmalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto out; + + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + + kfree(comp_opts); + mutex_init(&stream->mutex); + return stream; + +out: + kfree(stream); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + + if (stream) { + msblk->decompressor->free(stream->stream); + kfree(stream); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + + mutex_lock(&stream->mutex); + res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + mutex_unlock(&stream->mutex); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return 1; +} diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 8ca62c28fe12..e5c9689062ba 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return le32_to_cpu(size); } - -static int squashfs_readpage(struct file *file, struct page *page) +/* Copy data into page cache */ +void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, + int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes, i, offset = 0, sparse = 0; - struct squashfs_cache_entry *buffer = NULL; void *pageaddr; - - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); - int start_index = page->index & ~mask; - int end_index = start_index | mask; - int file_end = i_size_read(inode) >> msblk->block_log; - - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); - - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - /* - * Reading a datablock from disk. Need to read block list - * to get location and block size. - */ - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; - - if (bsize == 0) { /* hole */ - bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - sparse = 1; - } else { - /* - * Read and decompress datablock. - */ - buffer = squashfs_get_datablock(inode->i_sb, - block, bsize); - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x" - "\n", block, bsize); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = buffer->length; - } - } else { - /* - * Datablock is stored inside a fragment (tail-end packed - * block). - */ - buffer = squashfs_get_fragment(inode->i_sb, - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x\n", - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = i_size_read(inode) & (msblk->block_size - 1); - offset = squashfs_i(inode)->fragment_offset; - } + int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = page->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page) for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { struct page *push_page; - int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); + int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); @@ -475,11 +413,75 @@ skip_page: if (i != page->index) page_cache_release(push_page); } +} + +/* Read datablock stored packed inside a fragment (tail-end packed block) */ +static int squashfs_readpage_fragment(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + else + squashfs_copy_cache(page, buffer, i_size_read(inode) & + (msblk->block_size - 1), + squashfs_i(inode)->fragment_offset); + + squashfs_cache_put(buffer); + return res; +} - if (!sparse) - squashfs_cache_put(buffer); +static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + squashfs_copy_cache(page, NULL, bytes, 0); return 0; +} + +static int squashfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int file_end = i_size_read(inode) >> msblk->block_log; + int res; + void *pageaddr; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) + res = squashfs_readpage_sparse(page, index, file_end); + else + res = squashfs_readpage_block(page, block, bsize); + } else + res = squashfs_readpage_fragment(page); + + if (!res) + return 0; error_out: SetPageError(page); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 000000000000..f2310d2a2019 --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, buffer->length, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c new file mode 100644 index 000000000000..2943b2bfae48 --- /dev/null +++ b/fs/squashfs/file_direct.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "page_actor.h" + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page); + +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) + +{ + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; + struct page **page; + struct squashfs_page_actor *actor; + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; + + page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; + + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; + } + + if (PageUptodate(page[i])) { + unlock_page(page[i]); + page_cache_release(page[i]); + page[i] = NULL; + missing_pages++; + } + } + + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page); + goto out; + } + + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; + + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_CACHE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + page_cache_release(page[i]); + } + + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller + */ + for (i = 0; i < pages; i++) { + if (page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + page_cache_release(page[i]); + } + +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int bytes = buffer->length, res = buffer->error, n, offset = 0; + void *pageaddr; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; + } + + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + int avail = min_t(int, bytes, PAGE_CACHE_SIZE); + + if (page[n] == NULL) + continue; + + pageaddr = kmap_atomic(page[n]); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr); + flush_dcache_page(page[n]); + SetPageUptodate(page[n]); + unlock_page(page[n]); + if (page[n] != target_page) + page_cache_release(page[n]); + } + +out: + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 00f4dfc5f088..244b9fbfff7b 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -31,13 +31,14 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_lzo { void *input; void *output; }; -static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) +static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) { int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); @@ -74,22 +75,16 @@ static void lzo_free(void *strm) } -static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - struct squashfs_lzo *stream = msblk->stream; - void *buff = stream->input; + struct squashfs_lzo *stream = strm; + void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t out_len = srclength; - - mutex_lock(&msblk->read_data_mutex); + size_t out_len = output->length; for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - avail = min(bytes, msblk->devblksize - offset); memcpy(buff, bh[i]->b_data + offset, avail); buff += avail; @@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, goto failed; res = bytes = (int)out_len; - for (i = 0, buff = stream->output; bytes && i < pages; i++) { - avail = min_t(int, bytes, PAGE_CACHE_SIZE); - memcpy(buffer[i], buff, avail); - buff += avail; - bytes -= avail; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } } + squashfs_finish_page(output); - mutex_unlock(&msblk->read_data_mutex); return res; -block_release: - for (; i < b; i++) - put_bh(bh[i]); - failed: - mutex_unlock(&msblk->read_data_mutex); - - ERROR("lzo decompression failed, data probably corrupt\n"); return -EIO; } diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c new file mode 100644 index 000000000000..5a1c11f56441 --- /dev/null +++ b/fs/squashfs/page_actor.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include "page_actor.h" + +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ + +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; +} + +static void *cache_next_page(struct squashfs_page_actor *actor) +{ + if (actor->next_page == actor->pages) + return NULL; + + return actor->buffer[actor->next_page++]; +} + +static void cache_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} + +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; +} + +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); +} + +static void *direct_next_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); +} + +static void direct_finish_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); +} + +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; +} diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h new file mode 100644 index 000000000000..26dd82008b82 --- /dev/null +++ b/fs/squashfs/page_actor.h @@ -0,0 +1,81 @@ +#ifndef PAGE_ACTOR_H +#define PAGE_ACTOR_H +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef CONFIG_SQUASHFS_FILE_DIRECT +struct squashfs_page_actor { + void **page; + int pages; + int length; + int next_page; +}; + +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} + +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->page[0]; +} + +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; +} + +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; + +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif +#endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d1266516ed08..9e1bb79f7e6f 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,8 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, - int, int); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int); /* decompressor.c */ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); -extern void *squashfs_decompressor_init(struct super_block *, unsigned short); +extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); + +/* decompressor_xxx.c */ +extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); +extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); +extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, + int, int, int, struct squashfs_page_actor *); +extern int squashfs_max_decompressors(void); /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, @@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); +/* file.c */ +void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, + int); + +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int); + /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 52934a22f296..1da565cb50c3 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -50,6 +50,7 @@ struct squashfs_cache_entry { wait_queue_head_t wait_queue; struct squashfs_cache *cache; void **data; + struct squashfs_page_actor *actor; }; struct squashfs_sb_info { @@ -63,10 +64,9 @@ struct squashfs_sb_info { __le64 *id_table; __le64 *fragment_index; __le64 *xattr_id_table; - struct mutex read_data_mutex; struct mutex meta_index_mutex; struct meta_index *meta_index; - void *stream; + struct squashfs_stream *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 60553a9053ca..202df6312d4e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); - mutex_init(&msblk->read_data_mutex); mutex_init(&msblk->meta_index_mutex); /* @@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; /* Allocate read_page block */ - msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); + msblk->read_page = squashfs_cache_init("data", + squashfs_max_decompressors(), msblk->block_size); if (msblk->read_page == NULL) { ERROR("Failed to allocate read_page block\n"); goto failed_mount; } - msblk->stream = squashfs_decompressor_init(sb, flags); + msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; @@ -336,7 +336,7 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_free(msblk, msblk->stream); + squashfs_decompressor_destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_free(sbi, sbi->stream); + squashfs_decompressor_destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 1760b7d108f6..c609624e4b8a 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -32,44 +32,70 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_xz { struct xz_dec *state; struct xz_buf buf; }; -struct comp_opts { +struct disk_comp_opts { __le32 dictionary_size; __le32 flags; }; -static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, - int len) +struct comp_opts { + int dict_size; +}; + +static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) { - struct comp_opts *comp_opts = buff; - struct squashfs_xz *stream; - int dict_size = msblk->block_size; - int err, n; + struct disk_comp_opts *comp_opts = buff; + struct comp_opts *opts; + int err = 0, n; + + opts = kmalloc(sizeof(*opts), GFP_KERNEL); + if (opts == NULL) { + err = -ENOMEM; + goto out2; + } if (comp_opts) { /* check compressor options are the expected length */ if (len < sizeof(*comp_opts)) { err = -EIO; - goto failed; + goto out; } - dict_size = le32_to_cpu(comp_opts->dictionary_size); + opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); /* the dictionary size should be 2^n or 2^n+2^(n+1) */ - n = ffs(dict_size) - 1; - if (dict_size != (1 << n) && dict_size != (1 << n) + + n = ffs(opts->dict_size) - 1; + if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + (1 << (n + 1))) { err = -EIO; - goto failed; + goto out; } - } + } else + /* use defaults */ + opts->dict_size = max_t(int, msblk->block_size, + SQUASHFS_METADATA_SIZE); + + return opts; + +out: + kfree(opts); +out2: + return ERR_PTR(err); +} + - dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); +static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) +{ + struct comp_opts *comp_opts = buff; + struct squashfs_xz *stream; + int err; stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) { @@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, goto failed; } - stream->state = xz_dec_init(XZ_PREALLOC, dict_size); + stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); if (stream->state == NULL) { kfree(stream); err = -ENOMEM; @@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm) } -static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { enum xz_ret xz_err; - int avail, total = 0, k = 0, page = 0; - struct squashfs_xz *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + int avail, total = 0, k = 0; + struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); stream->buf.in_pos = 0; stream->buf.in_size = 0; stream->buf.out_pos = 0; stream->buf.out_size = PAGE_CACHE_SIZE; - stream->buf.out = buffer[page++]; + stream->buf.out = squashfs_first_page(output); do { if (stream->buf.in_pos == stream->buf.in_size && k < b) { avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; offset = 0; } - if (stream->buf.out_pos == stream->buf.out_size - && page < pages) { - stream->buf.out = buffer[page++]; - stream->buf.out_pos = 0; - total += PAGE_CACHE_SIZE; + if (stream->buf.out_pos == stream->buf.out_size) { + stream->buf.out = squashfs_next_page(output); + if (stream->buf.out != NULL) { + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; + } } xz_err = xz_dec_run(stream->state, &stream->buf); @@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (xz_err == XZ_OK); - if (xz_err != XZ_STREAM_END) { - ERROR("xz_dec_run error, data probably corrupt\n"); - goto release_mutex; - } - - if (k < b) { - ERROR("xz_uncompress error, input remaining\n"); - goto release_mutex; - } + squashfs_finish_page(output); - total += stream->buf.out_pos; - mutex_unlock(&msblk->read_data_mutex); - return total; + if (xz_err != XZ_STREAM_END || k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return total + stream->buf.out_pos; +out: for (; k < b; k++) put_bh(bh[k]); @@ -172,6 +184,7 @@ release_mutex: const struct squashfs_decompressor squashfs_xz_comp_ops = { .init = squashfs_xz_init, + .comp_opts = squashfs_xz_comp_opts, .free = squashfs_xz_free, .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 55d918fd2d86..8727caba6882 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -32,8 +32,9 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" -static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) +static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); if (stream == NULL) @@ -61,44 +62,37 @@ static void zlib_free(void *strm) } -static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - int zlib_err, zlib_init = 0; - int k = 0, page = 0; - z_stream *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + int zlib_err, zlib_init = 0, k = 0; + z_stream *stream = strm; - stream->avail_out = 0; + stream->avail_out = PAGE_CACHE_SIZE; + stream->next_out = squashfs_first_page(output); stream->avail_in = 0; do { if (stream->avail_in == 0 && k < b) { int avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; } - if (stream->avail_out == 0 && page < pages) { - stream->next_out = buffer[page++]; - stream->avail_out = PAGE_CACHE_SIZE; + if (stream->avail_out == 0) { + stream->next_out = squashfs_next_page(output); + if (stream->next_out != NULL) + stream->avail_out = PAGE_CACHE_SIZE; } if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { - ERROR("zlib_inflateInit returned unexpected " - "result 0x%x, srclength %d\n", - zlib_err, srclength); - goto release_mutex; + squashfs_finish_page(output); + goto out; } zlib_init = 1; } @@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (zlib_err == Z_OK); - if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + squashfs_finish_page(output); - zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + if (zlib_err != Z_STREAM_END) + goto out; - if (k < b) { - ERROR("zlib_uncompress error, data remaining\n"); - goto release_mutex; - } + zlib_err = zlib_inflateEnd(stream); + if (zlib_err != Z_OK) + goto out; - length = stream->total_out; - mutex_unlock(&msblk->read_data_mutex); - return length; + if (k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return stream->total_out; +out: for (; k < b; k++) put_bh(bh[k]); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0719e4db93f2..c21f43506661 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -66,12 +66,14 @@ xfs-y += xfs_alloc.o \ xfs_bmap_btree.o \ xfs_btree.o \ xfs_da_btree.o \ + xfs_da_format.o \ xfs_dir2.o \ xfs_dir2_block.o \ xfs_dir2_data.o \ xfs_dir2_leaf.o \ xfs_dir2_node.o \ xfs_dir2_sf.o \ + xfs_dquot_buf.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ xfs_icreate_item.o \ @@ -103,7 +105,11 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_qm_bhv.o \ xfs_qm.o \ xfs_quotaops.o -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o + +# xfs_rtbitmap is shared with libxfs +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \ + xfs_rtbitmap.o + xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += xfs_stats.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a02cfb9e3bce..66a36befc5c0 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -63,17 +63,6 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) } void * -kmem_zalloc(size_t size, xfs_km_flags_t flags) -{ - void *ptr; - - ptr = kmem_alloc(size, flags); - if (ptr) - memset((char *)ptr, 0, (int)size); - return ptr; -} - -void * kmem_zalloc_large(size_t size, xfs_km_flags_t flags) { void *ptr; @@ -128,14 +117,3 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } - -void * -kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) -{ - void *ptr; - - ptr = kmem_zone_alloc(zone, flags); - if (ptr) - memset((char *)ptr, 0, kmem_cache_size(zone)); - return ptr; -} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 3a7371cab508..64db0e53edea 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -32,6 +32,7 @@ typedef unsigned __bitwise xfs_km_flags_t; #define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) #define KM_NOFS ((__force xfs_km_flags_t)0x0004u) #define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) +#define KM_ZERO ((__force xfs_km_flags_t)0x0010u) /* * We use a special process flag to avoid recursive callbacks into @@ -43,7 +44,7 @@ kmem_flags_convert(xfs_km_flags_t flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -52,11 +53,14 @@ kmem_flags_convert(xfs_km_flags_t flags) if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) lflags &= ~__GFP_FS; } + + if (flags & KM_ZERO) + lflags |= __GFP_ZERO; + return lflags; } extern void *kmem_alloc(size_t, xfs_km_flags_t); -extern void *kmem_zalloc(size_t, xfs_km_flags_t); extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); extern void kmem_free(const void *); @@ -64,6 +68,12 @@ extern void kmem_free(const void *); extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); +static inline void * +kmem_zalloc(size_t size, xfs_km_flags_t flags) +{ + return kmem_alloc(size, flags | KM_ZERO); +} + /* * Zone interfaces */ @@ -102,6 +112,11 @@ kmem_zone_destroy(kmem_zone_t *zone) } extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); -extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); + +static inline void * +kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) +{ + return kmem_zone_alloc(zone, flags | KM_ZERO); +} #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 0e2f37efedd0..370eb3e121d1 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -16,15 +16,15 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" +#include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" #include "xfs_ag.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_acl.h" +#include "xfs_attr.h" #include "xfs_trace.h" #include <linux/slab.h> #include <linux/xattr.h> diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 1cb740afd674..3fc109819c34 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -128,8 +128,6 @@ typedef struct xfs_agf { extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); -extern const struct xfs_buf_ops xfs_agf_buf_ops; - /* * Size of the unlinked inode hash table in the agi. */ @@ -191,8 +189,6 @@ typedef struct xfs_agi { extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); -extern const struct xfs_buf_ops xfs_agi_buf_ops; - /* * The third a.g. block contains the a.g. freelist, an array * of block pointers to blocks owned by the allocation btree code. diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 5a1393f5e020..9eab2dfdcbb5 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -17,25 +17,25 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_cksum.h" #include "xfs_trace.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_log.h" struct workqueue_struct *xfs_alloc_wq; @@ -2294,6 +2294,8 @@ xfs_read_agf( { int error; + trace_xfs_read_agf(mp, agno); + ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, @@ -2324,8 +2326,9 @@ xfs_alloc_read_agf( struct xfs_perag *pag; /* per allocation group data */ int error; - ASSERT(agno != NULLAGNUMBER); + trace_xfs_alloc_read_agf(mp, agno); + ASSERT(agno != NULLAGNUMBER); error = xfs_read_agf(mp, tp, agno, (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 99d0a6101558..feacb061bab7 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -231,7 +231,4 @@ xfs_alloc_get_rec( xfs_extlen_t *len, /* output: length of extent */ int *stat); /* output: success/failure */ -extern const struct xfs_buf_ops xfs_agf_buf_ops; -extern const struct xfs_buf_ops xfs_agfl_buf_ops; - #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index cafc90251d19..13085429e523 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -17,23 +17,21 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_trans.h" STATIC struct xfs_btree_cur * diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index e3a3f7424192..45e189e7e81c 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -27,39 +27,6 @@ struct xfs_btree_cur; struct xfs_mount; /* - * There are two on-disk btrees, one sorted by blockno and one sorted - * by blockcount and blockno. All blocks look the same to make the code - * simpler; if we have time later, we'll make the optimizations. - */ -#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ -#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */ -#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ -#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */ - -/* - * Data record/key structure - */ -typedef struct xfs_alloc_rec { - __be32 ar_startblock; /* starting block number */ - __be32 ar_blockcount; /* count of free blocks */ -} xfs_alloc_rec_t, xfs_alloc_key_t; - -typedef struct xfs_alloc_rec_incore { - xfs_agblock_t ar_startblock; /* starting block number */ - xfs_extlen_t ar_blockcount; /* count of free blocks */ -} xfs_alloc_rec_incore_t; - -/* btree pointer type */ -typedef __be32 xfs_alloc_ptr_t; - -/* - * Block numbers in the AG: - * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. - */ -#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) -#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) - -/* * Btree block header size depends on a superblock flag. */ #define XFS_ALLOC_BLOCK_LEN(mp) \ @@ -95,6 +62,4 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, xfs_agnumber_t, xfs_btnum_t); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); -extern const struct xfs_buf_ops xfs_allocbt_buf_ops; - #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e51e581454e9..71c8c9d2b882 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -16,14 +16,15 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_trans.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -31,6 +32,8 @@ #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include <linux/aio.h> #include <linux/gfp.h> #include <linux/mpage.h> @@ -333,7 +336,7 @@ xfs_map_blocks( if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, offset, count, imap); + error = xfs_iomap_write_allocate(ip, offset, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); return -XFS_ERROR(error); @@ -1569,8 +1572,7 @@ xfs_vm_write_begin( ASSERT(len <= PAGE_CACHE_SIZE); - page = grab_cache_page_write_begin(mapping, index, - flags | AOP_FLAG_NOFS); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index ddcf2267ffa6..b86127072ac3 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -17,23 +17,24 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" #include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_attr_remote.h" @@ -41,6 +42,7 @@ #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" +#include "xfs_dinode.h" /* * xfs_attr.c diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index bb24b07cbedb..09480c57f069 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -18,22 +18,20 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" +#include "xfs_inode.h" #include "xfs_alloc.h" -#include "xfs_btree.h" #include "xfs_attr_remote.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" @@ -41,7 +39,8 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trace.h" -#include "xfs_trans_priv.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h" /* * Look at all the extents for this logical region, @@ -232,13 +231,13 @@ xfs_attr3_node_inactive( } node = bp->b_addr; - xfs_da3_node_hdr_from_disk(&ichdr, node); + dp->d_ops->node_hdr_from_disk(&ichdr, node); parent_blkno = bp->b_bn; if (!ichdr.count) { xfs_trans_brelse(*trans, bp); return 0; } - btree = xfs_da3_node_tree_p(node); + btree = dp->d_ops->node_tree_p(node); child_fsb = be32_to_cpu(btree[0].before); xfs_trans_brelse(*trans, bp); /* no locks for later trans */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 86db20a9cc02..7b126f46a2f9 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -18,32 +18,31 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h" /* @@ -918,8 +917,8 @@ xfs_attr3_leaf_to_node( if (error) goto out; node = bp1->b_addr; - xfs_da3_node_hdr_from_disk(&icnodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&icnodehdr, node); + btree = dp->d_ops->node_tree_p(node); leaf = bp2->b_addr; xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); @@ -929,7 +928,7 @@ xfs_attr3_leaf_to_node( btree[0].hashval = entries[icleafhdr.count - 1].hashval; btree[0].before = cpu_to_be32(blkno); icnodehdr.count = 1; - xfs_da3_node_hdr_to_disk(node, &icnodehdr); + dp->d_ops->node_hdr_to_disk(node, &icnodehdr); xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1); error = 0; out: diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index c1022138c7e6..3ec5ec0b8678 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -19,16 +19,6 @@ #ifndef __XFS_ATTR_LEAF_H__ #define __XFS_ATTR_LEAF_H__ -/* - * Attribute storage layout, internal structure, access macros, etc. - * - * Attribute lists are structured around Btrees where all the data - * elements are in the leaf nodes. Attribute names are hashed into an int, - * then that int is used as the index into the Btree. Since the hashval - * of an attribute name may not be unique, we may have duplicate keys. The - * internal links in the Btree are logical block offsets into the file. - */ - struct attrlist; struct attrlist_cursor_kern; struct xfs_attr_list_context; @@ -38,226 +28,6 @@ struct xfs_da_state_blk; struct xfs_inode; struct xfs_trans; -/*======================================================================== - * Attribute structure when equal to XFS_LBSIZE(mp) bytes. - *========================================================================*/ - -/* - * This is the structure of the leaf nodes in the Btree. - * - * Struct leaf_entry's are packed from the top. Name/values grow from the - * bottom but are not packed. The freemap contains run-length-encoded entries - * for the free bytes after the leaf_entry's, but only the N largest such, - * smaller runs are dropped. When the freemap doesn't show enough space - * for an allocation, we compact the name/value area and try again. If we - * still don't have enough space, then we have to split the block. The - * name/value structs (both local and remote versions) must be 32bit aligned. - * - * Since we have duplicate hash keys, for each key that matches, compare - * the actual name string. The root and intermediate node search always - * takes the first-in-the-block key match found, so we should only have - * to work "forw"ard. If none matches, continue with the "forw"ard leaf - * nodes until the hash key changes or the attribute name is found. - * - * We store the fact that an attribute is a ROOT/USER/SECURE attribute in - * the leaf_entry. The namespaces are independent only because we also look - * at the namespace bit when we are looking for a matching attribute name. - * - * We also store an "incomplete" bit in the leaf_entry. It shows that an - * attribute is in the middle of being created and should not be shown to - * the user if we crash during the time that the bit is set. We clear the - * bit when we have finished setting up the attribute. We do this because - * we cannot create some large attributes inside a single transaction, and we - * need some indication that we weren't finished if we crash in the middle. - */ -#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ - -typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ - __be16 base; /* base of free region */ - __be16 size; /* length of free region */ -} xfs_attr_leaf_map_t; - -typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */ - xfs_da_blkinfo_t info; /* block type, links, etc. */ - __be16 count; /* count of active leaf_entry's */ - __be16 usedbytes; /* num bytes of names/values stored */ - __be16 firstused; /* first used byte in name area */ - __u8 holes; /* != 0 if blk needs compaction */ - __u8 pad1; - xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; - /* N largest free regions */ -} xfs_attr_leaf_hdr_t; - -typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ - __be32 hashval; /* hash value of name */ - __be16 nameidx; /* index into buffer of name/value */ - __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ - __u8 pad2; /* unused pad byte */ -} xfs_attr_leaf_entry_t; - -typedef struct xfs_attr_leaf_name_local { - __be16 valuelen; /* number of bytes in value */ - __u8 namelen; /* length of name bytes */ - __u8 nameval[1]; /* name/value bytes */ -} xfs_attr_leaf_name_local_t; - -typedef struct xfs_attr_leaf_name_remote { - __be32 valueblk; /* block number of value bytes */ - __be32 valuelen; /* number of bytes in value */ - __u8 namelen; /* length of name bytes */ - __u8 name[1]; /* name bytes */ -} xfs_attr_leaf_name_remote_t; - -typedef struct xfs_attr_leafblock { - xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ - xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ - xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ - xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ -} xfs_attr_leafblock_t; - -/* - * CRC enabled leaf structures. Called "version 3" structures to match the - * version number of the directory and dablk structures for this feature, and - * attr2 is already taken by the variable inode attribute fork size feature. - */ -struct xfs_attr3_leaf_hdr { - struct xfs_da3_blkinfo info; - __be16 count; - __be16 usedbytes; - __be16 firstused; - __u8 holes; - __u8 pad1; - struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; - __be32 pad2; /* 64 bit alignment */ -}; - -#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc)) - -struct xfs_attr3_leafblock { - struct xfs_attr3_leaf_hdr hdr; - struct xfs_attr_leaf_entry entries[1]; - - /* - * The rest of the block contains the following structures after the - * leaf entries, growing from the bottom up. The variables are never - * referenced, the locations accessed purely from helper functions. - * - * struct xfs_attr_leaf_name_local - * struct xfs_attr_leaf_name_remote - */ -}; - -/* - * incore, neutral version of the attribute leaf header - */ -struct xfs_attr3_icleaf_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t usedbytes; - __uint16_t firstused; - __u8 holes; - struct { - __uint16_t base; - __uint16_t size; - } freemap[XFS_ATTR_LEAF_MAPSIZE]; -}; - -/* - * Flags used in the leaf_entry[i].flags field. - * NOTE: the INCOMPLETE bit must not collide with the flags bits specified - * on the system call, they are "or"ed together for various operations. - */ -#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ -#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ -#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */ -#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ -#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT) -#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) -#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) -#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) - -/* - * Conversion macros for converting namespace bits from argument flags - * to ondisk flags. - */ -#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) -#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) -#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ - ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) -#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ - ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) - -/* - * Alignment for namelist and valuelist entries (since they are mixed - * there can be only one alignment value) - */ -#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) - -static inline int -xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) -{ - if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) - return sizeof(struct xfs_attr3_leaf_hdr); - return sizeof(struct xfs_attr_leaf_hdr); -} - -static inline struct xfs_attr_leaf_entry * -xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) -{ - if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) - return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; - return &leafp->entries[0]; -} - -/* - * Cast typed pointers for "local" and "remote" name/value structs. - */ -static inline char * -xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) -{ - struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); - - return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; -} - -static inline xfs_attr_leaf_name_remote_t * -xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) -{ - return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); -} - -static inline xfs_attr_leaf_name_local_t * -xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) -{ - return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx); -} - -/* - * Calculate total bytes used (including trailing pad for alignment) for - * a "local" name/value structure, a "remote" name/value structure, and - * a pointer which might be either. - */ -static inline int xfs_attr_leaf_entsize_remote(int nlen) -{ - return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); -} - -static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) -{ - return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); -} - -static inline int xfs_attr_leaf_entsize_local_max(int bsize) -{ - return (((bsize) >> 1) + ((bsize) >> 2)); -} - /* * Used to keep a list of "remote value" extents when unlinking an inode. */ @@ -336,6 +106,4 @@ void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to, void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to, struct xfs_attr3_icleaf_hdr *from); -extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; - #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index cbc80d485177..2d174b128153 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -18,31 +18,29 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h" STATIC int xfs_attr_shortform_compare(const void *a, const void *b) @@ -229,6 +227,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) struct xfs_da_node_entry *btree; int error, i; struct xfs_buf *bp; + struct xfs_inode *dp = context->dp; trace_xfs_attr_node_list(context); @@ -242,7 +241,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) */ bp = NULL; if (cursor->blkno > 0) { - error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1, + error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if ((error != 0) && (error != EFSCORRUPTED)) return(error); @@ -292,7 +291,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) for (;;) { __uint16_t magic; - error = xfs_da3_node_read(NULL, context->dp, + error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) @@ -312,8 +311,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return XFS_ERROR(EFSCORRUPTED); } - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); for (i = 0; i < nodehdr.count; btree++, i++) { if (cursor->hashval <= be32_to_cpu(btree->hashval)) { @@ -349,8 +348,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) break; cursor->blkno = leafhdr.forw; xfs_trans_brelse(NULL, bp); - error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1, - &bp); + error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp); if (error) return error; } diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 712a502de619..739e0a52deda 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c @@ -18,20 +18,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_error.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -42,6 +41,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" +#include "xfs_error.h" #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h index 92a8fd7977cc..5a9acfa156d7 100644 --- a/fs/xfs/xfs_attr_remote.h +++ b/fs/xfs/xfs_attr_remote.h @@ -18,35 +18,6 @@ #ifndef __XFS_ATTR_REMOTE_H__ #define __XFS_ATTR_REMOTE_H__ -#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ - -/* - * There is one of these headers per filesystem block in a remote attribute. - * This is done to ensure there is a 1:1 mapping between the attribute value - * length and the number of blocks needed to store the attribute. This makes the - * verification of a buffer a little more complex, but greatly simplifies the - * allocation, reading and writing of these attributes as we don't have to guess - * the number of blocks needed to store the attribute data. - */ -struct xfs_attr3_rmt_hdr { - __be32 rm_magic; - __be32 rm_offset; - __be32 rm_bytes; - __be32 rm_crc; - uuid_t rm_uuid; - __be64 rm_owner; - __be64 rm_blkno; - __be64 rm_lsn; -}; - -#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc) - -#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \ - ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ - sizeof(struct xfs_attr3_rmt_hdr) : 0)) - -extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; - int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); int xfs_attr_rmtval_get(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index 48228848f5ae..0e8885a59646 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -16,10 +16,8 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" +#include "xfs_log_format.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" /* * XFS bit manipulation routines, used in non-realtime code. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index f47e65c30be6..3ef11b22e750 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -17,39 +17,37 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_mount.h" -#include "xfs_itable.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_attr_leaf.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_buf_item.h" -#include "xfs_filestream.h" #include "xfs_trace.h" #include "xfs_symlink.h" +#include "xfs_attr_leaf.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -1139,6 +1137,7 @@ xfs_bmap_add_attrfork( int committed; /* xaction was committed */ int logflags; /* logging flags */ int error; /* error return value */ + int cancel_flags = 0; ASSERT(XFS_IFORK_Q(ip) == 0); @@ -1149,19 +1148,20 @@ xfs_bmap_add_attrfork( if (rsvd) tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); - if (error) - goto error0; + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + cancel_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); - if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); - return error; - } + if (error) + goto trans_cancel; + cancel_flags |= XFS_TRANS_ABORT; if (XFS_IFORK_Q(ip)) - goto error1; + goto trans_cancel; if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1171,7 +1171,7 @@ xfs_bmap_add_attrfork( } ASSERT(ip->i_d.di_anextents == 0); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -1193,7 +1193,7 @@ xfs_bmap_add_attrfork( default: ASSERT(0); error = XFS_ERROR(EINVAL); - goto error1; + goto trans_cancel; } ASSERT(ip->i_afp == NULL); @@ -1221,7 +1221,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto error2; + goto bmap_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; @@ -1244,14 +1244,16 @@ xfs_bmap_add_attrfork( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) - goto error2; - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -error2: + goto bmap_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +bmap_cancel: xfs_bmap_cancel(&flist); -error1: +trans_cancel: + xfs_trans_cancel(tp, cancel_flags); xfs_iunlock(ip, XFS_ILOCK_EXCL); -error0: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } @@ -1482,7 +1484,7 @@ xfs_bmap_search_extents( xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x lastx: %x\n", + "blkcnt: %llx extent-state: %x lastx: %x", (unsigned long long)ip->i_ino, (unsigned long long)gotp->br_startblock, (unsigned long long)gotp->br_startoff, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bb8de8e399c4..706bc3f777cb 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -17,27 +17,26 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" -#include "xfs_itable.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_dinode.h" /* * Determine the extent state. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index e367461a638e..6e42e1e50b89 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -18,9 +18,6 @@ #ifndef __XFS_BMAP_BTREE_H__ #define __XFS_BMAP_BTREE_H__ -#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ -#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */ - struct xfs_btree_cur; struct xfs_btree_block; struct xfs_mount; @@ -28,85 +25,6 @@ struct xfs_inode; struct xfs_trans; /* - * Bmap root header, on-disk form only. - */ -typedef struct xfs_bmdr_block { - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ -} xfs_bmdr_block_t; - -/* - * Bmap btree record and extent descriptor. - * l0:63 is an extent flag (value 1 indicates non-normal). - * l0:9-62 are startoff. - * l0:0-8 and l1:21-63 are startblock. - * l1:0-20 are blockcount. - */ -#define BMBT_EXNTFLAG_BITLEN 1 -#define BMBT_STARTOFF_BITLEN 54 -#define BMBT_STARTBLOCK_BITLEN 52 -#define BMBT_BLOCKCOUNT_BITLEN 21 - -typedef struct xfs_bmbt_rec { - __be64 l0, l1; -} xfs_bmbt_rec_t; - -typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ -typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; - -typedef struct xfs_bmbt_rec_host { - __uint64_t l0, l1; -} xfs_bmbt_rec_host_t; - -/* - * Values and macros for delayed-allocation startblock fields. - */ -#define STARTBLOCKVALBITS 17 -#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) -#define DSTARTBLOCKMASKBITS (15 + 20) -#define STARTBLOCKMASK \ - (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) -#define DSTARTBLOCKMASK \ - (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) - -static inline int isnullstartblock(xfs_fsblock_t x) -{ - return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; -} - -static inline int isnulldstartblock(xfs_dfsbno_t x) -{ - return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; -} - -static inline xfs_fsblock_t nullstartblock(int k) -{ - ASSERT(k < (1 << STARTBLOCKVALBITS)); - return STARTBLOCKMASK | (k); -} - -static inline xfs_filblks_t startblockval(xfs_fsblock_t x) -{ - return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); -} - -/* - * Possible extent formats. - */ -typedef enum { - XFS_EXTFMT_NOSTATE = 0, - XFS_EXTFMT_HASSTATE -} xfs_exntfmt_t; - -/* - * Possible extent states. - */ -typedef enum { - XFS_EXT_NORM, XFS_EXT_UNWRITTEN, - XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID -} xfs_exntst_t; - -/* * Extent state and extent format macros. */ #define XFS_EXTFMT_INODE(x) \ @@ -115,27 +33,6 @@ typedef enum { #define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) /* - * Incore version of above. - */ -typedef struct xfs_bmbt_irec -{ - xfs_fileoff_t br_startoff; /* starting file offset */ - xfs_fsblock_t br_startblock; /* starting block number */ - xfs_filblks_t br_blockcount; /* number of blocks */ - xfs_exntst_t br_state; /* extent state */ -} xfs_bmbt_irec_t; - -/* - * Key structure for non-leaf levels of the tree. - */ -typedef struct xfs_bmbt_key { - __be64 br_startoff; /* starting file offset */ -} xfs_bmbt_key_t, xfs_bmdr_key_t; - -/* btree pointer type */ -typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; - -/* * Btree block header size depends on a superblock flag. */ #define XFS_BMBT_BLOCK_LEN(mp) \ @@ -243,6 +140,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); -extern const struct xfs_buf_ops xfs_bmbt_buf_ops; - #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 97f952caea74..5887e41c0323 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -18,31 +18,31 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_trans.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_log.h" +#include "xfs_dinode.h" /* Kernel only BMAP related definitions and functions */ @@ -965,32 +965,12 @@ xfs_free_eofblocks( return error; } -/* - * xfs_alloc_file_space() - * This routine allocates disk space for the given file. - * - * If alloc_type == 0, this request is for an ALLOCSP type - * request which will change the file size. In this case, no - * DMAPI event will be generated by the call. A TRUNCATE event - * will be generated later by xfs_setattr. - * - * If alloc_type != 0, this request is for a RESVSP type - * request, and a DMAPI DM_EVENT_WRITE will be generated if the - * lower block boundary byte address is less than the file's - * length. - * - * RETURNS: - * 0 on success - * errno on error - * - */ -STATIC int +int xfs_alloc_file_space( - xfs_inode_t *ip, + struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len, - int alloc_type, - int attr_flags) + int alloc_type) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; @@ -1232,24 +1212,11 @@ xfs_zero_remaining_bytes( return error; } -/* - * xfs_free_file_space() - * This routine frees disk space for the given file. - * - * This routine is only called by xfs_change_file_space - * for an UNRESVSP type call. - * - * RETURNS: - * 0 on success - * errno on error - * - */ -STATIC int +int xfs_free_file_space( - xfs_inode_t *ip, + struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len, - int attr_flags) + xfs_off_t len) { int committed; int done; @@ -1267,7 +1234,6 @@ xfs_free_file_space( int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; - int need_iolock = 1; mp = ip->i_mount; @@ -1284,20 +1250,15 @@ xfs_free_file_space( startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); - if (attr_flags & XFS_ATTR_NOLOCK) - need_iolock = 0; - if (need_iolock) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); - /* wait for the completion of any pending DIOs */ - inode_dio_wait(VFS_I(ip)); - } + /* wait for the completion of any pending DIOs */ + inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, -1); if (error) - goto out_unlock_iolock; + goto out; truncate_pagecache_range(VFS_I(ip), ioffset, -1); /* @@ -1311,7 +1272,7 @@ xfs_free_file_space( error = xfs_bmapi_read(ip, startoffset_fsb, 1, &imap, &nimap, 0); if (error) - goto out_unlock_iolock; + goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; @@ -1326,7 +1287,7 @@ xfs_free_file_space( error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, &imap, &nimap, 0); if (error) - goto out_unlock_iolock; + goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); @@ -1412,27 +1373,23 @@ xfs_free_file_space( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - out_unlock_iolock: - if (need_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + out: return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : - XFS_ILOCK_EXCL); - return error; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + goto out; } -STATIC int +int xfs_zero_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len, - int attr_flags) + xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; uint granularity; @@ -1453,9 +1410,6 @@ xfs_zero_file_space( ASSERT(start_boundary >= offset); ASSERT(end_boundary <= offset + len); - if (!(attr_flags & XFS_ATTR_NOLOCK)) - xfs_ilock(ip, XFS_IOLOCK_EXCL); - if (start_boundary < end_boundary - 1) { /* punch out the page cache over the conversion range */ truncate_pagecache_range(VFS_I(ip), start_boundary, @@ -1463,16 +1417,16 @@ xfs_zero_file_space( /* convert the blocks */ error = xfs_alloc_file_space(ip, start_boundary, end_boundary - start_boundary - 1, - XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT, - attr_flags); + XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); if (error) - goto out_unlock; + goto out; /* We've handled the interior of the range, now for the edges */ - if (start_boundary != offset) + if (start_boundary != offset) { error = xfs_iozero(ip, offset, start_boundary - offset); - if (error) - goto out_unlock; + if (error) + goto out; + } if (end_boundary != offset + len) error = xfs_iozero(ip, end_boundary, @@ -1486,197 +1440,12 @@ xfs_zero_file_space( error = xfs_iozero(ip, offset, len); } -out_unlock: - if (!(attr_flags & XFS_ATTR_NOLOCK)) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); +out: return error; } /* - * xfs_change_file_space() - * This routine allocates or frees disk space for the given file. - * The user specified parameters are checked for alignment and size - * limitations. - * - * RETURNS: - * 0 on success - * errno on error - * - */ -int -xfs_change_file_space( - xfs_inode_t *ip, - int cmd, - xfs_flock64_t *bf, - xfs_off_t offset, - int attr_flags) -{ - xfs_mount_t *mp = ip->i_mount; - int clrprealloc; - int error; - xfs_fsize_t fsize; - int setprealloc; - xfs_off_t startoffset; - xfs_trans_t *tp; - struct iattr iattr; - - if (!S_ISREG(ip->i_d.di_mode)) - return XFS_ERROR(EINVAL); - - switch (bf->l_whence) { - case 0: /*SEEK_SET*/ - break; - case 1: /*SEEK_CUR*/ - bf->l_start += offset; - break; - case 2: /*SEEK_END*/ - bf->l_start += XFS_ISIZE(ip); - break; - default: - return XFS_ERROR(EINVAL); - } - - /* - * length of <= 0 for resv/unresv/zero is invalid. length for - * alloc/free is ignored completely and we have no idea what userspace - * might have set it to, so set it to zero to allow range - * checks to pass. - */ - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - if (bf->l_len <= 0) - return XFS_ERROR(EINVAL); - break; - default: - bf->l_len = 0; - break; - } - - if (bf->l_start < 0 || - bf->l_start > mp->m_super->s_maxbytes || - bf->l_start + bf->l_len < 0 || - bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) - return XFS_ERROR(EINVAL); - - bf->l_whence = 0; - - startoffset = bf->l_start; - fsize = XFS_ISIZE(ip); - - setprealloc = clrprealloc = 0; - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - error = xfs_zero_file_space(ip, startoffset, bf->l_len, - attr_flags); - if (error) - return error; - setprealloc = 1; - break; - - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - error = xfs_alloc_file_space(ip, startoffset, bf->l_len, - XFS_BMAPI_PREALLOC, attr_flags); - if (error) - return error; - setprealloc = 1; - break; - - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, - attr_flags))) - return error; - break; - - case XFS_IOC_ALLOCSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP: - case XFS_IOC_FREESP64: - /* - * These operations actually do IO when extending the file, but - * the allocation is done seperately to the zeroing that is - * done. This set of operations need to be serialised against - * other IO operations, such as truncate and buffered IO. We - * need to take the IOLOCK here to serialise the allocation and - * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, - * truncate, direct IO) from racing against the transient - * allocated but not written state we can have here. - */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - if (startoffset > fsize) { - error = xfs_alloc_file_space(ip, fsize, - startoffset - fsize, 0, - attr_flags | XFS_ATTR_NOLOCK); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - break; - } - } - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = startoffset; - - error = xfs_setattr_size(ip, &iattr, - attr_flags | XFS_ATTR_NOLOCK); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - - if (error) - return error; - - clrprealloc = 1; - break; - - default: - ASSERT(0); - return XFS_ERROR(EINVAL); - } - - /* - * update the inode timestamp, mode, and prealloc flag bits - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - if ((attr_flags & XFS_ATTR_DMI) == 0) { - ip->i_d.di_mode &= ~S_ISUID; - - /* - * Note that we don't have to worry about mandatory - * file locking being disabled here because we only - * clear the S_ISGID bit if the Group execute bit is - * on, but if it was on then mandatory locking wouldn't - * have been enabled. - */ - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } - if (setprealloc) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - else if (clrprealloc) - ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (attr_flags & XFS_ATTR_SYNC) - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp, 0); -} - -/* * We need to check that the format of the data fork in the temporary inode is * valid for the target inode before doing the swap. This is not a problem with * attr1 because of the fixed fork offset, but attr2 has a dynamically sized diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 061260946f7a..900747b25772 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -93,9 +93,12 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip, int *is_empty); /* preallocation and hole punch interface */ -int xfs_change_file_space(struct xfs_inode *ip, int cmd, - xfs_flock64_t *bf, xfs_off_t offset, - int attr_flags); +int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t len, int alloc_type); +int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t len); +int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t len); /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 5690e102243d..9adaae4f3e2f 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -17,18 +17,16 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_buf_item.h" #include "xfs_btree.h" diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 06729b67ad58..91e34f21bace 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -27,73 +27,6 @@ struct xfs_trans; extern kmem_zone_t *xfs_btree_cur_zone; /* - * This nonsense is to make -wlint happy. - */ -#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi) -#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) -#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) - -#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) -#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) -#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) -#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) - -/* - * Generic btree header. - * - * This is a combination of the actual format used on disk for short and long - * format btrees. The first three fields are shared by both format, but the - * pointers are different and should be used with care. - * - * To get the size of the actual short or long form headers please use the size - * macros below. Never use sizeof(xfs_btree_block). - * - * The blkno, crc, lsn, owner and uuid fields are only available in filesystems - * with the crc feature bit, and all accesses to them must be conditional on - * that flag. - */ -struct xfs_btree_block { - __be32 bb_magic; /* magic number for block type */ - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ - union { - struct { - __be32 bb_leftsib; - __be32 bb_rightsib; - - __be64 bb_blkno; - __be64 bb_lsn; - uuid_t bb_uuid; - __be32 bb_owner; - __le32 bb_crc; - } s; /* short form pointers */ - struct { - __be64 bb_leftsib; - __be64 bb_rightsib; - - __be64 bb_blkno; - __be64 bb_lsn; - uuid_t bb_uuid; - __be64 bb_owner; - __le32 bb_crc; - __be32 bb_pad; /* padding for alignment */ - } l; /* long form pointers */ - } bb_u; /* rest */ -}; - -#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ -#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ - -/* sizes of CRC enabled btree blocks */ -#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40) -#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48) - -#define XFS_BTREE_SBLOCK_CRC_OFF \ - offsetof(struct xfs_btree_block, bb_u.s.bb_crc) -#define XFS_BTREE_LBLOCK_CRC_OFF \ - offsetof(struct xfs_btree_block, bb_u.l.bb_crc) - -/* * Generic key, ptr and record wrapper structures. * * These are disk format structures, and are converted where necessary @@ -119,6 +52,18 @@ union xfs_btree_rec { }; /* + * This nonsense is to make -wlint happy. + */ +#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi) +#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) +#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) + +#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) +#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) +#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) +#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) + +/* * For logging record fields. */ #define XFS_BB_MAGIC (1 << 0) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 263470075ea2..c7f0b77dcb00 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -34,12 +34,13 @@ #include <linux/backing-dev.h> #include <linux/freezer.h> -#include "xfs_sb.h" +#include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_log.h" +#include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_trace.h" +#include "xfs_log.h" static kmem_zone_t *xfs_buf_zone; @@ -590,7 +591,7 @@ found: error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { xfs_warn(target->bt_mount, - "%s: failed to map pages\n", __func__); + "%s: failed to map pagesn", __func__); xfs_buf_relse(bp); return NULL; } @@ -809,7 +810,7 @@ xfs_buf_get_uncached( error = _xfs_buf_map_pages(bp, 0); if (unlikely(error)) { xfs_warn(target->bt_mount, - "%s: failed to map pages\n", __func__); + "%s: failed to map pages", __func__); goto fail_free_mem; } @@ -1618,7 +1619,7 @@ xfs_setsize_buftarg_flags( bdevname(btp->bt_bdev, name); xfs_warn(btp->bt_mount, - "Cannot set_blocksize to %u on device %s\n", + "Cannot set_blocksize to %u on device %s", sectorsize, name); return EINVAL; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f1d85cfc0a54..a64f67ba25d3 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -17,17 +17,18 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_log.h" kmem_zone_t *xfs_buf_item_zone; @@ -808,7 +809,7 @@ xfs_buf_item_init( * Mark bytes first through last inclusive as dirty in the buf * item's bitmap. */ -void +static void xfs_buf_item_log_segment( struct xfs_buf_log_item *bip, uint first, diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index db6371087fe8..3f3455a41510 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,10 +71,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); -void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, - enum xfs_blft); -void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); - extern kmem_zone_t *xfs_buf_item_zone; #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 20bf8e8002d6..796272a2e129 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -18,20 +18,20 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" @@ -129,56 +129,6 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } -void -xfs_da3_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || - from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); - - if (from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; - - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->__count); - to->level = be16_to_cpu(hdr3->__level); - return; - } - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.__count); - to->level = be16_to_cpu(from->hdr.__level); -} - -void -xfs_da3_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - ASSERT(from->magic == XFS_DA_NODE_MAGIC || - from->magic == XFS_DA3_NODE_MAGIC); - - if (from->magic == XFS_DA3_NODE_MAGIC) { - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; - - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->__count = cpu_to_be16(from->count); - hdr3->__level = cpu_to_be16(from->level); - return; - } - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.__count = cpu_to_be16(from->count); - to->hdr.__level = cpu_to_be16(from->level); -} - static bool xfs_da3_node_verify( struct xfs_buf *bp) @@ -186,8 +136,11 @@ xfs_da3_node_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_da_intnode *hdr = bp->b_addr; struct xfs_da3_icnode_hdr ichdr; + const struct xfs_dir_ops *ops; - xfs_da3_node_hdr_from_disk(&ichdr, hdr); + ops = xfs_dir_get_ops(mp, NULL); + + ops->node_hdr_from_disk(&ichdr, hdr); if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_node_hdr *hdr3 = bp->b_addr; @@ -354,11 +307,12 @@ xfs_da3_node_create( struct xfs_da3_icnode_hdr ichdr = {0}; struct xfs_buf *bp; int error; + struct xfs_inode *dp = args->dp; trace_xfs_da_node_create(args); ASSERT(level <= XFS_DA_NODE_MAXDEPTH); - error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); + error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); if (error) return(error); bp->b_ops = &xfs_da3_node_buf_ops; @@ -377,9 +331,9 @@ xfs_da3_node_create( } ichdr.level = level; - xfs_da3_node_hdr_to_disk(node, &ichdr); + dp->d_ops->node_hdr_to_disk(node, &ichdr); xfs_trans_log_buf(tp, bp, - XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); + XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); *bpp = bp; return(0); @@ -589,8 +543,8 @@ xfs_da3_root_split( oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { struct xfs_da3_icnode_hdr nodehdr; - xfs_da3_node_hdr_from_disk(&nodehdr, oldroot); - btree = xfs_da3_node_tree_p(oldroot); + dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); + btree = dp->d_ops->node_tree_p(oldroot); size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); level = nodehdr.level; @@ -604,8 +558,8 @@ xfs_da3_root_split( struct xfs_dir2_leaf_entry *ents; leaf = (xfs_dir2_leaf_t *)oldroot; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -649,14 +603,14 @@ xfs_da3_root_split( return error; node = bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); btree[0].hashval = cpu_to_be32(blk1->hashval); btree[0].before = cpu_to_be32(blk1->blkno); btree[1].hashval = cpu_to_be32(blk2->hashval); btree[1].before = cpu_to_be32(blk2->blkno); nodehdr.count = 2; - xfs_da3_node_hdr_to_disk(node, &nodehdr); + dp->d_ops->node_hdr_to_disk(node, &nodehdr); #ifdef DEBUG if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || @@ -693,11 +647,12 @@ xfs_da3_node_split( int newcount; int error; int useextra; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_split(state->args); node = oldblk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); /* * With V2 dirs the extra block is data or freespace. @@ -744,7 +699,7 @@ xfs_da3_node_split( * If we had double-split op below us, then add the extra block too. */ node = oldblk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); if (oldblk->index <= nodehdr.count) { oldblk->index++; xfs_da3_node_add(state, oldblk, addblk); @@ -793,15 +748,16 @@ xfs_da3_node_rebalance( int count; int tmp; int swap = 0; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_rebalance(state->args); node1 = blk1->bp->b_addr; node2 = blk2->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr1, node1); - xfs_da3_node_hdr_from_disk(&nodehdr2, node2); - btree1 = xfs_da3_node_tree_p(node1); - btree2 = xfs_da3_node_tree_p(node2); + dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); + dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); /* * Figure out how many entries need to move, and in which direction. @@ -814,10 +770,10 @@ xfs_da3_node_rebalance( tmpnode = node1; node1 = node2; node2 = tmpnode; - xfs_da3_node_hdr_from_disk(&nodehdr1, node1); - xfs_da3_node_hdr_from_disk(&nodehdr2, node2); - btree1 = xfs_da3_node_tree_p(node1); - btree2 = xfs_da3_node_tree_p(node2); + dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); + dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); swap = 1; } @@ -879,15 +835,14 @@ xfs_da3_node_rebalance( /* * Log header of node 1 and all current bits of node 2. */ - xfs_da3_node_hdr_to_disk(node1, &nodehdr1); + dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); xfs_trans_log_buf(tp, blk1->bp, - XFS_DA_LOGRANGE(node1, &node1->hdr, - xfs_da3_node_hdr_size(node1))); + XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); - xfs_da3_node_hdr_to_disk(node2, &nodehdr2); + dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); xfs_trans_log_buf(tp, blk2->bp, XFS_DA_LOGRANGE(node2, &node2->hdr, - xfs_da3_node_hdr_size(node2) + + dp->d_ops->node_hdr_size + (sizeof(btree2[0]) * nodehdr2.count))); /* @@ -897,10 +852,10 @@ xfs_da3_node_rebalance( if (swap) { node1 = blk1->bp->b_addr; node2 = blk2->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr1, node1); - xfs_da3_node_hdr_from_disk(&nodehdr2, node2); - btree1 = xfs_da3_node_tree_p(node1); - btree2 = xfs_da3_node_tree_p(node2); + dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); + dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); } blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); @@ -927,12 +882,13 @@ xfs_da3_node_add( struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int tmp; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_add(state->args); node = oldblk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); ASSERT(newblk->blkno != 0); @@ -955,9 +911,9 @@ xfs_da3_node_add( tmp + sizeof(*btree))); nodehdr.count += 1; - xfs_da3_node_hdr_to_disk(node, &nodehdr); + dp->d_ops->node_hdr_to_disk(node, &nodehdr); xfs_trans_log_buf(state->args->trans, oldblk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); + XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); /* * Copy the last hash value from the oldblk to propagate upwards. @@ -1094,6 +1050,7 @@ xfs_da3_root_join( struct xfs_da3_icnode_hdr oldroothdr; struct xfs_da_node_entry *btree; int error; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_root_join(state->args); @@ -1101,7 +1058,7 @@ xfs_da3_root_join( args = state->args; oldroot = root_blk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&oldroothdr, oldroot); + dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); ASSERT(oldroothdr.forw == 0); ASSERT(oldroothdr.back == 0); @@ -1115,10 +1072,10 @@ xfs_da3_root_join( * Read in the (only) child block, then copy those bytes into * the root block's buffer and free the original child block. */ - btree = xfs_da3_node_tree_p(oldroot); + btree = dp->d_ops->node_tree_p(oldroot); child = be32_to_cpu(btree[0].before); ASSERT(child != 0); - error = xfs_da3_node_read(args->trans, args->dp, child, -1, &bp, + error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, args->whichfork); if (error) return error; @@ -1168,6 +1125,7 @@ xfs_da3_node_toosmall( int error; int retval; int i; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_toosmall(state->args); @@ -1179,7 +1137,7 @@ xfs_da3_node_toosmall( blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->b_addr; node = (xfs_da_intnode_t *)info; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); if (nodehdr.count > (state->node_ents >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return(0); /* blk over 50%, don't try to join */ @@ -1231,13 +1189,13 @@ xfs_da3_node_toosmall( blkno = nodehdr.back; if (blkno == 0) continue; - error = xfs_da3_node_read(state->args->trans, state->args->dp, + error = xfs_da3_node_read(state->args->trans, dp, blkno, -1, &bp, state->args->whichfork); if (error) return(error); node = bp->b_addr; - xfs_da3_node_hdr_from_disk(&thdr, node); + dp->d_ops->node_hdr_from_disk(&thdr, node); xfs_trans_brelse(state->args->trans, bp); if (count - thdr.count >= 0) @@ -1275,6 +1233,7 @@ xfs_da3_node_toosmall( */ STATIC uint xfs_da3_node_lasthash( + struct xfs_inode *dp, struct xfs_buf *bp, int *count) { @@ -1283,12 +1242,12 @@ xfs_da3_node_lasthash( struct xfs_da3_icnode_hdr nodehdr; node = bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); if (count) *count = nodehdr.count; if (!nodehdr.count) return 0; - btree = xfs_da3_node_tree_p(node); + btree = dp->d_ops->node_tree_p(node); return be32_to_cpu(btree[nodehdr.count - 1].hashval); } @@ -1307,6 +1266,7 @@ xfs_da3_fixhashpath( xfs_dahash_t lasthash=0; int level; int count; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_fixhashpath(state->args); @@ -1319,12 +1279,12 @@ xfs_da3_fixhashpath( return; break; case XFS_DIR2_LEAFN_MAGIC: - lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); + lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); if (count == 0) return; break; case XFS_DA_NODE_MAGIC: - lasthash = xfs_da3_node_lasthash(blk->bp, &count); + lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); if (count == 0) return; break; @@ -1333,8 +1293,8 @@ xfs_da3_fixhashpath( struct xfs_da3_icnode_hdr nodehdr; node = blk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); if (be32_to_cpu(btree->hashval) == lasthash) break; blk->hashval = lasthash; @@ -1360,11 +1320,12 @@ xfs_da3_node_remove( struct xfs_da_node_entry *btree; int index; int tmp; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); ASSERT(drop_blk->index < nodehdr.count); ASSERT(drop_blk->index >= 0); @@ -1372,7 +1333,7 @@ xfs_da3_node_remove( * Copy over the offending entry, or just zero it out. */ index = drop_blk->index; - btree = xfs_da3_node_tree_p(node); + btree = dp->d_ops->node_tree_p(node); if (index < nodehdr.count - 1) { tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); @@ -1385,9 +1346,9 @@ xfs_da3_node_remove( xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; - xfs_da3_node_hdr_to_disk(node, &nodehdr); + dp->d_ops->node_hdr_to_disk(node, &nodehdr); xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); + XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); /* * Copy the last hash value from the block to propagate upwards. @@ -1414,15 +1375,16 @@ xfs_da3_node_unbalance( struct xfs_trans *tp; int sindex; int tmp; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_unbalance(state->args); drop_node = drop_blk->bp->b_addr; save_node = save_blk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&drop_hdr, drop_node); - xfs_da3_node_hdr_from_disk(&save_hdr, save_node); - drop_btree = xfs_da3_node_tree_p(drop_node); - save_btree = xfs_da3_node_tree_p(save_node); + dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); + dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); + drop_btree = dp->d_ops->node_tree_p(drop_node); + save_btree = dp->d_ops->node_tree_p(save_node); tp = state->args->trans; /* @@ -1456,10 +1418,10 @@ xfs_da3_node_unbalance( memcpy(&save_btree[sindex], &drop_btree[0], tmp); save_hdr.count += drop_hdr.count; - xfs_da3_node_hdr_to_disk(save_node, &save_hdr); + dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, - xfs_da3_node_hdr_size(save_node))); + dp->d_ops->node_hdr_size)); /* * Save the last hashval in the remaining block for upward propagation. @@ -1501,6 +1463,7 @@ xfs_da3_node_lookup_int( int max; int error; int retval; + struct xfs_inode *dp = state->args->dp; args = state->args; @@ -1536,7 +1499,8 @@ xfs_da3_node_lookup_int( if (blk->magic == XFS_DIR2_LEAFN_MAGIC || blk->magic == XFS_DIR3_LEAFN_MAGIC) { blk->magic = XFS_DIR2_LEAFN_MAGIC; - blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); + blk->hashval = xfs_dir2_leafn_lasthash(args->dp, + blk->bp, NULL); break; } @@ -1547,8 +1511,8 @@ xfs_da3_node_lookup_int( * Search an intermediate node for a match. */ node = blk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); max = nodehdr.count; blk->hashval = be32_to_cpu(btree[max - 1].hashval); @@ -1643,6 +1607,7 @@ xfs_da3_node_lookup_int( */ STATIC int xfs_da3_node_order( + struct xfs_inode *dp, struct xfs_buf *node1_bp, struct xfs_buf *node2_bp) { @@ -1655,10 +1620,10 @@ xfs_da3_node_order( node1 = node1_bp->b_addr; node2 = node2_bp->b_addr; - xfs_da3_node_hdr_from_disk(&node1hdr, node1); - xfs_da3_node_hdr_from_disk(&node2hdr, node2); - btree1 = xfs_da3_node_tree_p(node1); - btree2 = xfs_da3_node_tree_p(node2); + dp->d_ops->node_hdr_from_disk(&node1hdr, node1); + dp->d_ops->node_hdr_from_disk(&node2hdr, node2); + btree1 = dp->d_ops->node_tree_p(node1); + btree2 = dp->d_ops->node_tree_p(node2); if (node1hdr.count > 0 && node2hdr.count > 0 && ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || @@ -1685,6 +1650,7 @@ xfs_da3_blk_link( struct xfs_buf *bp; int before = 0; int error; + struct xfs_inode *dp = state->args->dp; /* * Set up environment. @@ -1702,10 +1668,10 @@ xfs_da3_blk_link( before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); break; case XFS_DIR2_LEAFN_MAGIC: - before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); + before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); break; case XFS_DA_NODE_MAGIC: - before = xfs_da3_node_order(old_blk->bp, new_blk->bp); + before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); break; } @@ -1720,7 +1686,7 @@ xfs_da3_blk_link( new_info->forw = cpu_to_be32(old_blk->blkno); new_info->back = old_info->back; if (old_info->back) { - error = xfs_da3_node_read(args->trans, args->dp, + error = xfs_da3_node_read(args->trans, dp, be32_to_cpu(old_info->back), -1, &bp, args->whichfork); if (error) @@ -1741,7 +1707,7 @@ xfs_da3_blk_link( new_info->forw = old_info->forw; new_info->back = cpu_to_be32(old_blk->blkno); if (old_info->forw) { - error = xfs_da3_node_read(args->trans, args->dp, + error = xfs_da3_node_read(args->trans, dp, be32_to_cpu(old_info->forw), -1, &bp, args->whichfork); if (error) @@ -1861,6 +1827,7 @@ xfs_da3_path_shift( xfs_dablk_t blkno = 0; int level; int error; + struct xfs_inode *dp = state->args->dp; trace_xfs_da_path_shift(state->args); @@ -1876,8 +1843,8 @@ xfs_da3_path_shift( level = (path->active-1) - 1; /* skip bottom layer in path */ for (blk = &path->blk[level]; level >= 0; blk--, level--) { node = blk->bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); if (forward && (blk->index < nodehdr.count - 1)) { blk->index++; @@ -1911,7 +1878,7 @@ xfs_da3_path_shift( * Read the next child block. */ blk->blkno = blkno; - error = xfs_da3_node_read(args->trans, args->dp, blkno, -1, + error = xfs_da3_node_read(args->trans, dp, blkno, -1, &blk->bp, args->whichfork); if (error) return(error); @@ -1933,8 +1900,8 @@ xfs_da3_path_shift( case XFS_DA3_NODE_MAGIC: blk->magic = XFS_DA_NODE_MAGIC; node = (xfs_da_intnode_t *)info; - xfs_da3_node_hdr_from_disk(&nodehdr, node); - btree = xfs_da3_node_tree_p(node); + dp->d_ops->node_hdr_from_disk(&nodehdr, node); + btree = dp->d_ops->node_tree_p(node); blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); if (forward) blk->index = 0; @@ -1947,16 +1914,15 @@ xfs_da3_path_shift( blk->magic = XFS_ATTR_LEAF_MAGIC; ASSERT(level == path->active-1); blk->index = 0; - blk->hashval = xfs_attr_leaf_lasthash(blk->bp, - NULL); + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); break; case XFS_DIR2_LEAFN_MAGIC: case XFS_DIR3_LEAFN_MAGIC: blk->magic = XFS_DIR2_LEAFN_MAGIC; ASSERT(level == path->active-1); blk->index = 0; - blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, - NULL); + blk->hashval = xfs_dir2_leafn_lasthash(args->dp, + blk->bp, NULL); break; default: ASSERT(0); @@ -2163,7 +2129,7 @@ xfs_da3_swap_lastblock( struct xfs_dir2_leaf *dead_leaf2; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr par_hdr; - struct xfs_inode *ip; + struct xfs_inode *dp; struct xfs_trans *tp; struct xfs_mount *mp; struct xfs_buf *dead_buf; @@ -2187,12 +2153,12 @@ xfs_da3_swap_lastblock( dead_buf = *dead_bufp; dead_blkno = *dead_blknop; tp = args->trans; - ip = args->dp; + dp = args->dp; w = args->whichfork; ASSERT(w == XFS_DATA_FORK); - mp = ip->i_mount; + mp = dp->i_mount; lastoff = mp->m_dirfreeblk; - error = xfs_bmap_last_before(tp, ip, &lastoff, w); + error = xfs_bmap_last_before(tp, dp, &lastoff, w); if (error) return error; if (unlikely(lastoff == 0)) { @@ -2204,7 +2170,7 @@ xfs_da3_swap_lastblock( * Read the last block in the btree space. */ last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; - error = xfs_da3_node_read(tp, ip, last_blkno, -1, &last_buf, w); + error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); if (error) return error; /* @@ -2222,16 +2188,16 @@ xfs_da3_swap_lastblock( struct xfs_dir2_leaf_entry *ents; dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, dead_leaf2); - ents = xfs_dir3_leaf_ents_p(dead_leaf2); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); + ents = dp->d_ops->leaf_ents_p(dead_leaf2); dead_level = 0; dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); } else { struct xfs_da3_icnode_hdr deadhdr; dead_node = (xfs_da_intnode_t *)dead_info; - xfs_da3_node_hdr_from_disk(&deadhdr, dead_node); - btree = xfs_da3_node_tree_p(dead_node); + dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); + btree = dp->d_ops->node_tree_p(dead_node); dead_level = deadhdr.level; dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); } @@ -2240,7 +2206,7 @@ xfs_da3_swap_lastblock( * If the moved block has a left sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->back))) { - error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); + error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); if (error) goto done; sib_info = sib_buf->b_addr; @@ -2262,7 +2228,7 @@ xfs_da3_swap_lastblock( * If the moved block has a right sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->forw))) { - error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); + error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); if (error) goto done; sib_info = sib_buf->b_addr; @@ -2286,11 +2252,11 @@ xfs_da3_swap_lastblock( * Walk down the tree looking for the parent of the moved block. */ for (;;) { - error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); + error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); if (error) goto done; par_node = par_buf->b_addr; - xfs_da3_node_hdr_from_disk(&par_hdr, par_node); + dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); if (level >= 0 && level != par_hdr.level + 1) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); @@ -2298,7 +2264,7 @@ xfs_da3_swap_lastblock( goto done; } level = par_hdr.level; - btree = xfs_da3_node_tree_p(par_node); + btree = dp->d_ops->node_tree_p(par_node); for (entno = 0; entno < par_hdr.count && be32_to_cpu(btree[entno].hashval) < dead_hash; @@ -2337,18 +2303,18 @@ xfs_da3_swap_lastblock( error = XFS_ERROR(EFSCORRUPTED); goto done; } - error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); + error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); if (error) goto done; par_node = par_buf->b_addr; - xfs_da3_node_hdr_from_disk(&par_hdr, par_node); + dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); if (par_hdr.level != level) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); goto done; } - btree = xfs_da3_node_tree_p(par_node); + btree = dp->d_ops->node_tree_p(par_node); entno = 0; } /* diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index b1f267995dea..6e95ea79f5d7 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -23,146 +23,7 @@ struct xfs_bmap_free; struct xfs_inode; struct xfs_trans; struct zone; - -/*======================================================================== - * Directory Structure when greater than XFS_LBSIZE(mp) bytes. - *========================================================================*/ - -/* - * This structure is common to both leaf nodes and non-leaf nodes in the Btree. - * - * It is used to manage a doubly linked list of all blocks at the same - * level in the Btree, and to identify which type of block this is. - */ -#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ -#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ -#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ - -typedef struct xfs_da_blkinfo { - __be32 forw; /* previous block in list */ - __be32 back; /* following block in list */ - __be16 magic; /* validity check on block */ - __be16 pad; /* unused */ -} xfs_da_blkinfo_t; - -/* - * CRC enabled directory structure types - * - * The headers change size for the additional verification information, but - * otherwise the tree layouts and contents are unchanged. Hence the da btree - * code can use the struct xfs_da_blkinfo for manipulating the tree links and - * magic numbers without modification for both v2 and v3 nodes. - */ -#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ -#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ -#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ - -struct xfs_da3_blkinfo { - /* - * the node link manipulation code relies on the fact that the first - * element of this structure is the struct xfs_da_blkinfo so it can - * ignore the differences in the rest of the structures. - */ - struct xfs_da_blkinfo hdr; - __be32 crc; /* CRC of block */ - __be64 blkno; /* first block of the buffer */ - __be64 lsn; /* sequence number of last write */ - uuid_t uuid; /* filesystem we belong to */ - __be64 owner; /* inode that owns the block */ -}; - -/* - * This is the structure of the root and intermediate nodes in the Btree. - * The leaf nodes are defined above. - * - * Entries are not packed. - * - * Since we have duplicate keys, use a binary search but always follow - * all match in the block, not just the first match found. - */ -#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ - -typedef struct xfs_da_node_hdr { - struct xfs_da_blkinfo info; /* block type, links, etc. */ - __be16 __count; /* count of active entries */ - __be16 __level; /* level above leaves (leaf == 0) */ -} xfs_da_node_hdr_t; - -struct xfs_da3_node_hdr { - struct xfs_da3_blkinfo info; /* block type, links, etc. */ - __be16 __count; /* count of active entries */ - __be16 __level; /* level above leaves (leaf == 0) */ - __be32 __pad32; -}; - -#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc)) - -typedef struct xfs_da_node_entry { - __be32 hashval; /* hash value for this descendant */ - __be32 before; /* Btree block before this key */ -} xfs_da_node_entry_t; - -typedef struct xfs_da_intnode { - struct xfs_da_node_hdr hdr; - struct xfs_da_node_entry __btree[]; -} xfs_da_intnode_t; - -struct xfs_da3_intnode { - struct xfs_da3_node_hdr hdr; - struct xfs_da_node_entry __btree[]; -}; - -/* - * In-core version of the node header to abstract the differences in the v2 and - * v3 disk format of the headers. Callers need to convert to/from disk format as - * appropriate. - */ -struct xfs_da3_icnode_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t level; -}; - -extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from); -extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from); - -static inline int -__xfs_da3_node_hdr_size(bool v3) -{ - if (v3) - return sizeof(struct xfs_da3_node_hdr); - return sizeof(struct xfs_da_node_hdr); -} -static inline int -xfs_da3_node_hdr_size(struct xfs_da_intnode *dap) -{ - bool v3 = dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC); - - return __xfs_da3_node_hdr_size(v3); -} - -static inline struct xfs_da_node_entry * -xfs_da3_node_tree_p(struct xfs_da_intnode *dap) -{ - if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { - struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap; - return dap3->__btree; - } - return dap->__btree; -} - -extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from); -extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from); - -#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize +struct xfs_dir_ops; /*======================================================================== * Btree searching and modification structure definitions. @@ -309,8 +170,6 @@ int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int which_fork); -extern const struct xfs_buf_ops xfs_da3_node_buf_ops; - /* * Utility routines. */ diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c new file mode 100644 index 000000000000..e6c83e1fbc8a --- /dev/null +++ b/fs/xfs/xfs_da_format.c @@ -0,0 +1,907 @@ +/* + * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_inode.h" +#include "xfs_dir2.h" + +/* + * Shortform directory ops + */ +static int +xfs_dir2_sf_entsize( + struct xfs_dir2_sf_hdr *hdr, + int len) +{ + int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ + + count += len; /* name */ + count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : + sizeof(xfs_dir2_ino4_t); /* ino # */ + return count; +} + +static int +xfs_dir3_sf_entsize( + struct xfs_dir2_sf_hdr *hdr, + int len) +{ + return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); +} + +static struct xfs_dir2_sf_entry * +xfs_dir2_sf_nextentry( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); +} + +static struct xfs_dir2_sf_entry * +xfs_dir3_sf_nextentry( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); +} + + +/* + * For filetype enabled shortform directories, the file type field is stored at + * the end of the name. Because it's only a single byte, endian conversion is + * not necessary. For non-filetype enable directories, the type is always + * unknown and we never store the value. + */ +static __uint8_t +xfs_dir2_sfe_get_ftype( + struct xfs_dir2_sf_entry *sfep) +{ + return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_sfe_put_ftype( + struct xfs_dir2_sf_entry *sfep, + __uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); +} + +static __uint8_t +xfs_dir3_sfe_get_ftype( + struct xfs_dir2_sf_entry *sfep) +{ + __uint8_t ftype; + + ftype = sfep->name[sfep->namelen]; + if (ftype >= XFS_DIR3_FT_MAX) + return XFS_DIR3_FT_UNKNOWN; + return ftype; +} + +static void +xfs_dir3_sfe_put_ftype( + struct xfs_dir2_sf_entry *sfep, + __uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); + + sfep->name[sfep->namelen] = ftype; +} + +/* + * Inode numbers in short-form directories can come in two versions, + * either 4 bytes or 8 bytes wide. These helpers deal with the + * two forms transparently by looking at the headers i8count field. + * + * For 64-bit inode number the most significant byte must be zero. + */ +static xfs_ino_t +xfs_dir2_sf_get_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *from) +{ + if (hdr->i8count) + return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; + else + return get_unaligned_be32(&from->i4.i); +} + +static void +xfs_dir2_sf_put_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *to, + xfs_ino_t ino) +{ + ASSERT((ino & 0xff00000000000000ULL) == 0); + + if (hdr->i8count) + put_unaligned_be64(ino, &to->i8.i); + else + put_unaligned_be32(ino, &to->i4.i); +} + +static xfs_ino_t +xfs_dir2_sf_get_parent_ino( + struct xfs_dir2_sf_hdr *hdr) +{ + return xfs_dir2_sf_get_ino(hdr, &hdr->parent); +} + +static void +xfs_dir2_sf_put_parent_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. If the entry stores a filetype value, then it + * sits between the name and the inode number. Hence the inode numbers may only + * be accessed through the helpers below. + */ +static xfs_ino_t +xfs_dir2_sfe_get_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]); +} + +static void +xfs_dir2_sfe_put_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino); +} + +static xfs_ino_t +xfs_dir3_sfe_get_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]); +} + +static void +xfs_dir3_sfe_put_ino( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, + (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino); +} + + +/* + * Directory data block operations + */ + +/* + * For special situations, the dirent size ends up fixed because we always know + * what the size of the entry is. That's true for the "." and "..", and + * therefore we know that they are a fixed size and hence their offsets are + * constant, as is the first entry. + * + * Hence, this calculation is written as a macro to be able to be calculated at + * compile time and so certain offsets can be calculated directly in the + * structure initaliser via the macro. There are two macros - one for dirents + * with ftype and without so there are no unresolvable conditionals in the + * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power + * of 2 and the compiler doesn't reject it (unlike roundup()). + */ +#define XFS_DIR2_DATA_ENTSIZE(n) \ + round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ + sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) + +#define XFS_DIR3_DATA_ENTSIZE(n) \ + round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ + sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \ + XFS_DIR2_DATA_ALIGN) + +static int +xfs_dir2_data_entsize( + int n) +{ + return XFS_DIR2_DATA_ENTSIZE(n); +} + +static int +xfs_dir3_data_entsize( + int n) +{ + return XFS_DIR3_DATA_ENTSIZE(n); +} + +static __uint8_t +xfs_dir2_data_get_ftype( + struct xfs_dir2_data_entry *dep) +{ + return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_data_put_ftype( + struct xfs_dir2_data_entry *dep, + __uint8_t ftype) +{ + ASSERT(ftype < XFS_DIR3_FT_MAX); +} + +static __uint8_t +xfs_dir3_data_get_ftype( + struct xfs_dir2_data_entry *dep) +{ + __uint8_t ftype = dep->name[dep->namelen]; + + ASSERT(ftype < XFS_DIR3_FT_MAX); + if (ftype >= XFS_DIR3_FT_MAX) + return XFS_DIR3_FT_UNKNOWN; + return ftype; +} + +static void +xfs_dir3_data_put_ftype( + struct xfs_dir2_data_entry *dep, + __uint8_t type) +{ + ASSERT(type < XFS_DIR3_FT_MAX); + ASSERT(dep->namelen != 0); + + dep->name[dep->namelen] = type; +} + +/* + * Pointer to an entry's tag word. + */ +static __be16 * +xfs_dir2_data_entry_tag_p( + struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +static __be16 * +xfs_dir3_data_entry_tag_p( + struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * location of . and .. in data space (always block 0) + */ +static struct xfs_dir2_data_entry * +xfs_dir2_data_dot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_dotdot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_first_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1) + + XFS_DIR2_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_ftype_data_dotdot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_ftype_data_first_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_dot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_dotdot_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_first_entry_p( + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_free * +xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ + return hdr->bestfree; +} + +static struct xfs_dir2_data_free * +xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ + return ((struct xfs_dir3_data_hdr *)hdr)->best_free; +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_unused * +xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_unused *) + ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + +static struct xfs_dir2_data_unused * +xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_unused *) + ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + + +/* + * Directory Leaf block operations + */ +static int +xfs_dir2_max_leaf_ents(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +static struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ + return lp->__ents; +} + +static int +xfs_dir3_max_leaf_ents(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +static struct xfs_dir2_leaf_entry * +xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ + return ((struct xfs_dir3_leaf *)lp)->__ents; +} + +static void +xfs_dir2_leaf_hdr_from_disk( + struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from) +{ + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.count); + to->stale = be16_to_cpu(from->hdr.stale); + + ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || + to->magic == XFS_DIR2_LEAFN_MAGIC); +} + +static void +xfs_dir2_leaf_hdr_to_disk( + struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from) +{ + ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || + from->magic == XFS_DIR2_LEAFN_MAGIC); + + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.count = cpu_to_be16(from->count); + to->hdr.stale = cpu_to_be16(from->stale); +} + +static void +xfs_dir3_leaf_hdr_from_disk( + struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from) +{ + struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; + + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->count); + to->stale = be16_to_cpu(hdr3->stale); + + ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || + to->magic == XFS_DIR3_LEAFN_MAGIC); +} + +static void +xfs_dir3_leaf_hdr_to_disk( + struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from) +{ + struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; + + ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || + from->magic == XFS_DIR3_LEAFN_MAGIC); + + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->count = cpu_to_be16(from->count); + hdr3->stale = cpu_to_be16(from->stale); +} + + +/* + * Directory/Attribute Node block operations + */ +static struct xfs_da_node_entry * +xfs_da2_node_tree_p(struct xfs_da_intnode *dap) +{ + return dap->__btree; +} + +static struct xfs_da_node_entry * +xfs_da3_node_tree_p(struct xfs_da_intnode *dap) +{ + return ((struct xfs_da3_intnode *)dap)->__btree; +} + +static void +xfs_da2_node_hdr_from_disk( + struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from) +{ + ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.__count); + to->level = be16_to_cpu(from->hdr.__level); +} + +static void +xfs_da2_node_hdr_to_disk( + struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from) +{ + ASSERT(from->magic == XFS_DA_NODE_MAGIC); + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.__count = cpu_to_be16(from->count); + to->hdr.__level = cpu_to_be16(from->level); +} + +static void +xfs_da3_node_hdr_from_disk( + struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from) +{ + struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; + + ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->__count); + to->level = be16_to_cpu(hdr3->__level); +} + +static void +xfs_da3_node_hdr_to_disk( + struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from) +{ + struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; + + ASSERT(from->magic == XFS_DA3_NODE_MAGIC); + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->__count = cpu_to_be16(from->count); + hdr3->__level = cpu_to_be16(from->level); +} + + +/* + * Directory free space block operations + */ +static int +xfs_dir2_free_max_bests(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / + sizeof(xfs_dir2_data_off_t); +} + +static __be16 * +xfs_dir2_free_bests_p(struct xfs_dir2_free *free) +{ + return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return db % xfs_dir2_free_max_bests(mp); +} + +static int +xfs_dir3_free_max_bests(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) / + sizeof(xfs_dir2_data_off_t); +} + +static __be16 * +xfs_dir3_free_bests_p(struct xfs_dir2_free *free) +{ + return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return db % xfs_dir3_free_max_bests(mp); +} + +static void +xfs_dir2_free_hdr_from_disk( + struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from) +{ + to->magic = be32_to_cpu(from->hdr.magic); + to->firstdb = be32_to_cpu(from->hdr.firstdb); + to->nvalid = be32_to_cpu(from->hdr.nvalid); + to->nused = be32_to_cpu(from->hdr.nused); + ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); +} + +static void +xfs_dir2_free_hdr_to_disk( + struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from) +{ + ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); + + to->hdr.magic = cpu_to_be32(from->magic); + to->hdr.firstdb = cpu_to_be32(from->firstdb); + to->hdr.nvalid = cpu_to_be32(from->nvalid); + to->hdr.nused = cpu_to_be32(from->nused); +} + +static void +xfs_dir3_free_hdr_from_disk( + struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from) +{ + struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; + + to->magic = be32_to_cpu(hdr3->hdr.magic); + to->firstdb = be32_to_cpu(hdr3->firstdb); + to->nvalid = be32_to_cpu(hdr3->nvalid); + to->nused = be32_to_cpu(hdr3->nused); + + ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); +} + +static void +xfs_dir3_free_hdr_to_disk( + struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from) +{ + struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; + + ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); + + hdr3->hdr.magic = cpu_to_be32(from->magic); + hdr3->firstdb = cpu_to_be32(from->firstdb); + hdr3->nvalid = cpu_to_be32(from->nvalid); + hdr3->nused = cpu_to_be32(from->nused); +} + +static const struct xfs_dir_ops xfs_dir2_ops = { + .sf_entsize = xfs_dir2_sf_entsize, + .sf_nextentry = xfs_dir2_sf_nextentry, + .sf_get_ftype = xfs_dir2_sfe_get_ftype, + .sf_put_ftype = xfs_dir2_sfe_put_ftype, + .sf_get_ino = xfs_dir2_sfe_get_ino, + .sf_put_ino = xfs_dir2_sfe_put_ino, + .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, + .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + + .data_entsize = xfs_dir2_data_entsize, + .data_get_ftype = xfs_dir2_data_get_ftype, + .data_put_ftype = xfs_dir2_data_put_ftype, + .data_entry_tag_p = xfs_dir2_data_entry_tag_p, + .data_bestfree_p = xfs_dir2_data_bestfree_p, + + .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), + .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1), + .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR2_DATA_ENTSIZE(1) + + XFS_DIR2_DATA_ENTSIZE(2), + .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), + + .data_dot_entry_p = xfs_dir2_data_dot_entry_p, + .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, + .data_first_entry_p = xfs_dir2_data_first_entry_p, + .data_entry_p = xfs_dir2_data_entry_p, + .data_unused_p = xfs_dir2_data_unused_p, + + .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), + .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, + .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, + .leaf_max_ents = xfs_dir2_max_leaf_ents, + .leaf_ents_p = xfs_dir2_leaf_ents_p, + + .node_hdr_size = sizeof(struct xfs_da_node_hdr), + .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, + .node_tree_p = xfs_da2_node_tree_p, + + .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), + .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, + .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, + .free_max_bests = xfs_dir2_free_max_bests, + .free_bests_p = xfs_dir2_free_bests_p, + .db_to_fdb = xfs_dir2_db_to_fdb, + .db_to_fdindex = xfs_dir2_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir2_ftype_ops = { + .sf_entsize = xfs_dir3_sf_entsize, + .sf_nextentry = xfs_dir3_sf_nextentry, + .sf_get_ftype = xfs_dir3_sfe_get_ftype, + .sf_put_ftype = xfs_dir3_sfe_put_ftype, + .sf_get_ino = xfs_dir3_sfe_get_ino, + .sf_put_ino = xfs_dir3_sfe_put_ino, + .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, + .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + + .data_entsize = xfs_dir3_data_entsize, + .data_get_ftype = xfs_dir3_data_get_ftype, + .data_put_ftype = xfs_dir3_data_put_ftype, + .data_entry_tag_p = xfs_dir3_data_entry_tag_p, + .data_bestfree_p = xfs_dir2_data_bestfree_p, + + .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), + .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1), + .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2), + .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), + + .data_dot_entry_p = xfs_dir2_data_dot_entry_p, + .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, + .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, + .data_entry_p = xfs_dir2_data_entry_p, + .data_unused_p = xfs_dir2_data_unused_p, + + .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), + .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, + .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, + .leaf_max_ents = xfs_dir2_max_leaf_ents, + .leaf_ents_p = xfs_dir2_leaf_ents_p, + + .node_hdr_size = sizeof(struct xfs_da_node_hdr), + .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, + .node_tree_p = xfs_da2_node_tree_p, + + .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), + .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, + .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, + .free_max_bests = xfs_dir2_free_max_bests, + .free_bests_p = xfs_dir2_free_bests_p, + .db_to_fdb = xfs_dir2_db_to_fdb, + .db_to_fdindex = xfs_dir2_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir3_ops = { + .sf_entsize = xfs_dir3_sf_entsize, + .sf_nextentry = xfs_dir3_sf_nextentry, + .sf_get_ftype = xfs_dir3_sfe_get_ftype, + .sf_put_ftype = xfs_dir3_sfe_put_ftype, + .sf_get_ino = xfs_dir3_sfe_get_ino, + .sf_put_ino = xfs_dir3_sfe_put_ino, + .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, + .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + + .data_entsize = xfs_dir3_data_entsize, + .data_get_ftype = xfs_dir3_data_get_ftype, + .data_put_ftype = xfs_dir3_data_put_ftype, + .data_entry_tag_p = xfs_dir3_data_entry_tag_p, + .data_bestfree_p = xfs_dir3_data_bestfree_p, + + .data_dot_offset = sizeof(struct xfs_dir3_data_hdr), + .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1), + .data_first_offset = sizeof(struct xfs_dir3_data_hdr) + + XFS_DIR3_DATA_ENTSIZE(1) + + XFS_DIR3_DATA_ENTSIZE(2), + .data_entry_offset = sizeof(struct xfs_dir3_data_hdr), + + .data_dot_entry_p = xfs_dir3_data_dot_entry_p, + .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, + .data_first_entry_p = xfs_dir3_data_first_entry_p, + .data_entry_p = xfs_dir3_data_entry_p, + .data_unused_p = xfs_dir3_data_unused_p, + + .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), + .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, + .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, + .leaf_max_ents = xfs_dir3_max_leaf_ents, + .leaf_ents_p = xfs_dir3_leaf_ents_p, + + .node_hdr_size = sizeof(struct xfs_da3_node_hdr), + .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, + .node_tree_p = xfs_da3_node_tree_p, + + .free_hdr_size = sizeof(struct xfs_dir3_free_hdr), + .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, + .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, + .free_max_bests = xfs_dir3_free_max_bests, + .free_bests_p = xfs_dir3_free_bests_p, + .db_to_fdb = xfs_dir3_db_to_fdb, + .db_to_fdindex = xfs_dir3_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir2_nondir_ops = { + .node_hdr_size = sizeof(struct xfs_da_node_hdr), + .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, + .node_tree_p = xfs_da2_node_tree_p, +}; + +static const struct xfs_dir_ops xfs_dir3_nondir_ops = { + .node_hdr_size = sizeof(struct xfs_da3_node_hdr), + .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, + .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, + .node_tree_p = xfs_da3_node_tree_p, +}; + +/* + * Return the ops structure according to the current config. If we are passed + * an inode, then that overrides the default config we use which is based on + * feature bits. + */ +const struct xfs_dir_ops * +xfs_dir_get_ops( + struct xfs_mount *mp, + struct xfs_inode *dp) +{ + if (dp) + return dp->d_ops; + if (mp->m_dir_inode_ops) + return mp->m_dir_inode_ops; + if (xfs_sb_version_hascrc(&mp->m_sb)) + return &xfs_dir3_ops; + if (xfs_sb_version_hasftype(&mp->m_sb)) + return &xfs_dir2_ftype_ops; + return &xfs_dir2_ops; +} + +const struct xfs_dir_ops * +xfs_nondir_get_ops( + struct xfs_mount *mp, + struct xfs_inode *dp) +{ + if (dp) + return dp->d_ops; + if (mp->m_nondir_inode_ops) + return mp->m_nondir_inode_ops; + if (xfs_sb_version_hascrc(&mp->m_sb)) + return &xfs_dir3_nondir_ops; + return &xfs_dir2_nondir_ops; +} diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_da_format.h index 9cf67381adf6..a19d3f8f639c 100644 --- a/fs/xfs/xfs_dir2_format.h +++ b/fs/xfs/xfs_da_format.h @@ -16,8 +16,113 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef __XFS_DIR2_FORMAT_H__ -#define __XFS_DIR2_FORMAT_H__ +#ifndef __XFS_DA_FORMAT_H__ +#define __XFS_DA_FORMAT_H__ + +/*======================================================================== + * Directory Structure when greater than XFS_LBSIZE(mp) bytes. + *========================================================================*/ + +/* + * This structure is common to both leaf nodes and non-leaf nodes in the Btree. + * + * It is used to manage a doubly linked list of all blocks at the same + * level in the Btree, and to identify which type of block this is. + */ +#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ +#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ +#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ +#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ + +typedef struct xfs_da_blkinfo { + __be32 forw; /* previous block in list */ + __be32 back; /* following block in list */ + __be16 magic; /* validity check on block */ + __be16 pad; /* unused */ +} xfs_da_blkinfo_t; + +/* + * CRC enabled directory structure types + * + * The headers change size for the additional verification information, but + * otherwise the tree layouts and contents are unchanged. Hence the da btree + * code can use the struct xfs_da_blkinfo for manipulating the tree links and + * magic numbers without modification for both v2 and v3 nodes. + */ +#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ +#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ +#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ +#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ + +struct xfs_da3_blkinfo { + /* + * the node link manipulation code relies on the fact that the first + * element of this structure is the struct xfs_da_blkinfo so it can + * ignore the differences in the rest of the structures. + */ + struct xfs_da_blkinfo hdr; + __be32 crc; /* CRC of block */ + __be64 blkno; /* first block of the buffer */ + __be64 lsn; /* sequence number of last write */ + uuid_t uuid; /* filesystem we belong to */ + __be64 owner; /* inode that owns the block */ +}; + +/* + * This is the structure of the root and intermediate nodes in the Btree. + * The leaf nodes are defined above. + * + * Entries are not packed. + * + * Since we have duplicate keys, use a binary search but always follow + * all match in the block, not just the first match found. + */ +#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ + +typedef struct xfs_da_node_hdr { + struct xfs_da_blkinfo info; /* block type, links, etc. */ + __be16 __count; /* count of active entries */ + __be16 __level; /* level above leaves (leaf == 0) */ +} xfs_da_node_hdr_t; + +struct xfs_da3_node_hdr { + struct xfs_da3_blkinfo info; /* block type, links, etc. */ + __be16 __count; /* count of active entries */ + __be16 __level; /* level above leaves (leaf == 0) */ + __be32 __pad32; +}; + +#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc)) + +typedef struct xfs_da_node_entry { + __be32 hashval; /* hash value for this descendant */ + __be32 before; /* Btree block before this key */ +} xfs_da_node_entry_t; + +typedef struct xfs_da_intnode { + struct xfs_da_node_hdr hdr; + struct xfs_da_node_entry __btree[]; +} xfs_da_intnode_t; + +struct xfs_da3_intnode { + struct xfs_da3_node_hdr hdr; + struct xfs_da_node_entry __btree[]; +}; + +/* + * In-core version of the node header to abstract the differences in the v2 and + * v3 disk format of the headers. Callers need to convert to/from disk format as + * appropriate. + */ +struct xfs_da3_icnode_hdr { + __uint32_t forw; + __uint32_t back; + __uint16_t magic; + __uint16_t count; + __uint16_t level; +}; + +#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize /* * Directory version 2. @@ -189,79 +294,6 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); } -static inline int -xfs_dir3_sf_entsize( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ - - count += len; /* name */ - count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : - sizeof(xfs_dir2_ino4_t); /* ino # */ - if (xfs_sb_version_hasftype(&mp->m_sb)) - count += sizeof(__uint8_t); /* file type */ - return count; -} - -static inline struct xfs_dir2_sf_entry * -xfs_dir3_sf_nextentry( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir3_sf_entsize(mp, hdr, sfep->namelen)); -} - -/* - * in dir3 shortform directories, the file type field is stored at a variable - * offset after the inode number. Because it's only a single byte, endian - * conversion is not necessary. - */ -static inline __uint8_t * -xfs_dir3_sfe_ftypep( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (__uint8_t *)&sfep->name[sfep->namelen]; -} - -static inline __uint8_t -xfs_dir3_sfe_get_ftype( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - __uint8_t *ftp; - - if (!xfs_sb_version_hasftype(&mp->m_sb)) - return XFS_DIR3_FT_UNKNOWN; - - ftp = xfs_dir3_sfe_ftypep(hdr, sfep); - if (*ftp >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return *ftp; -} - -static inline void -xfs_dir3_sfe_put_ftype( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) -{ - __uint8_t *ftp; - - ASSERT(ftype < XFS_DIR3_FT_MAX); - - if (!xfs_sb_version_hasftype(&mp->m_sb)) - return; - ftp = xfs_dir3_sfe_ftypep(hdr, sfep); - *ftp = ftype; -} - /* * Data block structures. * @@ -345,17 +377,6 @@ struct xfs_dir3_data_hdr { #define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc) -static inline struct xfs_dir2_data_free * -xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr; - return hdr3->best_free; - } - return hdr->bestfree; -} - /* * Active entry in a data block. * @@ -389,72 +410,6 @@ typedef struct xfs_dir2_data_unused { } xfs_dir2_data_unused_t; /* - * Size of a data entry. - */ -static inline int -__xfs_dir3_data_entsize( - bool ftype, - int n) -{ - int size = offsetof(struct xfs_dir2_data_entry, name[0]); - - size += n; - size += sizeof(xfs_dir2_data_off_t); - if (ftype) - size += sizeof(__uint8_t); - return roundup(size, XFS_DIR2_DATA_ALIGN); -} -static inline int -xfs_dir3_data_entsize( - struct xfs_mount *mp, - int n) -{ - bool ftype = xfs_sb_version_hasftype(&mp->m_sb) ? true : false; - return __xfs_dir3_data_entsize(ftype, n); -} - -static inline __uint8_t -xfs_dir3_dirent_get_ftype( - struct xfs_mount *mp, - struct xfs_dir2_data_entry *dep) -{ - if (xfs_sb_version_hasftype(&mp->m_sb)) { - __uint8_t type = dep->name[dep->namelen]; - - ASSERT(type < XFS_DIR3_FT_MAX); - if (type < XFS_DIR3_FT_MAX) - return type; - - } - return XFS_DIR3_FT_UNKNOWN; -} - -static inline void -xfs_dir3_dirent_put_ftype( - struct xfs_mount *mp, - struct xfs_dir2_data_entry *dep, - __uint8_t type) -{ - ASSERT(type < XFS_DIR3_FT_MAX); - ASSERT(dep->namelen != 0); - - if (xfs_sb_version_hasftype(&mp->m_sb)) - dep->name[dep->namelen] = type; -} - -/* - * Pointer to an entry's tag word. - */ -static inline __be16 * -xfs_dir3_data_entry_tag_p( - struct xfs_mount *mp, - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir3_data_entsize(mp, dep->namelen) - sizeof(__be16)); -} - -/* * Pointer to a freespace's tag word. */ static inline __be16 * @@ -464,93 +419,6 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) be16_to_cpu(dup->length) - sizeof(__be16)); } -static inline size_t -xfs_dir3_data_hdr_size(bool dir3) -{ - if (dir3) - return sizeof(struct xfs_dir3_data_hdr); - return sizeof(struct xfs_dir2_data_hdr); -} - -static inline size_t -xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr) -{ - bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - return xfs_dir3_data_hdr_size(dir3); -} - -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_entry_offset(hdr)); -} - -static inline struct xfs_dir2_data_unused * -xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + xfs_dir3_data_entry_offset(hdr)); -} - -/* - * Offsets of . and .. in data space (always block 0) - * - * XXX: there is scope for significant optimisation of the logic here. Right - * now we are checking for "dir3 format" over and over again. Ideally we should - * only do it once for each operation. - */ -static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dot_offset(struct xfs_mount *mp) -{ - return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dotdot_offset(struct xfs_mount *mp) -{ - return xfs_dir3_data_dot_offset(mp) + - xfs_dir3_data_entsize(mp, 1); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir3_data_first_offset(struct xfs_mount *mp) -{ - return xfs_dir3_data_dotdot_offset(mp) + - xfs_dir3_data_entsize(mp, 2); -} - -/* - * location of . and .. in data space (always block 0) - */ -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p( - struct xfs_mount *mp, - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_dot_offset(mp)); -} - -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p( - struct xfs_mount *mp, - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_dotdot_offset(mp)); -} - -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p( - struct xfs_mount *mp, - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + xfs_dir3_data_first_offset(mp)); -} - /* * Leaf block structures. * @@ -645,39 +513,6 @@ struct xfs_dir3_leaf { #define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc) -extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from); - -static inline int -xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp) -{ - if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || - lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) - return sizeof(struct xfs_dir3_leaf_hdr); - return sizeof(struct xfs_dir2_leaf_hdr); -} - -static inline int -xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ - return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || - lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp; - return lp3->__ents; - } - return lp->__ents; -} - /* * Get address of the bestcount field in the single-leaf block. */ @@ -869,48 +704,6 @@ struct xfs_dir3_icfree_hdr { }; -void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from); - -static inline int -xfs_dir3_free_hdr_size(struct xfs_mount *mp) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return sizeof(struct xfs_dir3_free_hdr); - return sizeof(struct xfs_dir2_free_hdr); -} - -static inline int -xfs_dir3_free_max_bests(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) / - sizeof(xfs_dir2_data_off_t); -} - -static inline __be16 * -xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static inline xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static inline int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return db % xfs_dir3_free_max_bests(mp); -} - /* * Single block format. * @@ -961,4 +754,262 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); } -#endif /* __XFS_DIR2_FORMAT_H__ */ + +/* + * Attribute storage layout + * + * Attribute lists are structured around Btrees where all the data + * elements are in the leaf nodes. Attribute names are hashed into an int, + * then that int is used as the index into the Btree. Since the hashval + * of an attribute name may not be unique, we may have duplicate keys. The + * internal links in the Btree are logical block offsets into the file. + * + *======================================================================== + * Attribute structure when equal to XFS_LBSIZE(mp) bytes. + *======================================================================== + * + * Struct leaf_entry's are packed from the top. Name/values grow from the + * bottom but are not packed. The freemap contains run-length-encoded entries + * for the free bytes after the leaf_entry's, but only the N largest such, + * smaller runs are dropped. When the freemap doesn't show enough space + * for an allocation, we compact the name/value area and try again. If we + * still don't have enough space, then we have to split the block. The + * name/value structs (both local and remote versions) must be 32bit aligned. + * + * Since we have duplicate hash keys, for each key that matches, compare + * the actual name string. The root and intermediate node search always + * takes the first-in-the-block key match found, so we should only have + * to work "forw"ard. If none matches, continue with the "forw"ard leaf + * nodes until the hash key changes or the attribute name is found. + * + * We store the fact that an attribute is a ROOT/USER/SECURE attribute in + * the leaf_entry. The namespaces are independent only because we also look + * at the namespace bit when we are looking for a matching attribute name. + * + * We also store an "incomplete" bit in the leaf_entry. It shows that an + * attribute is in the middle of being created and should not be shown to + * the user if we crash during the time that the bit is set. We clear the + * bit when we have finished setting up the attribute. We do this because + * we cannot create some large attributes inside a single transaction, and we + * need some indication that we weren't finished if we crash in the middle. + */ +#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ + +typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ + __be16 base; /* base of free region */ + __be16 size; /* length of free region */ +} xfs_attr_leaf_map_t; + +typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */ + xfs_da_blkinfo_t info; /* block type, links, etc. */ + __be16 count; /* count of active leaf_entry's */ + __be16 usedbytes; /* num bytes of names/values stored */ + __be16 firstused; /* first used byte in name area */ + __u8 holes; /* != 0 if blk needs compaction */ + __u8 pad1; + xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; + /* N largest free regions */ +} xfs_attr_leaf_hdr_t; + +typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ + __be32 hashval; /* hash value of name */ + __be16 nameidx; /* index into buffer of name/value */ + __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ + __u8 pad2; /* unused pad byte */ +} xfs_attr_leaf_entry_t; + +typedef struct xfs_attr_leaf_name_local { + __be16 valuelen; /* number of bytes in value */ + __u8 namelen; /* length of name bytes */ + __u8 nameval[1]; /* name/value bytes */ +} xfs_attr_leaf_name_local_t; + +typedef struct xfs_attr_leaf_name_remote { + __be32 valueblk; /* block number of value bytes */ + __be32 valuelen; /* number of bytes in value */ + __u8 namelen; /* length of name bytes */ + __u8 name[1]; /* name bytes */ +} xfs_attr_leaf_name_remote_t; + +typedef struct xfs_attr_leafblock { + xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ + xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ + xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ + xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ +} xfs_attr_leafblock_t; + +/* + * CRC enabled leaf structures. Called "version 3" structures to match the + * version number of the directory and dablk structures for this feature, and + * attr2 is already taken by the variable inode attribute fork size feature. + */ +struct xfs_attr3_leaf_hdr { + struct xfs_da3_blkinfo info; + __be16 count; + __be16 usedbytes; + __be16 firstused; + __u8 holes; + __u8 pad1; + struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; + __be32 pad2; /* 64 bit alignment */ +}; + +#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc)) + +struct xfs_attr3_leafblock { + struct xfs_attr3_leaf_hdr hdr; + struct xfs_attr_leaf_entry entries[1]; + + /* + * The rest of the block contains the following structures after the + * leaf entries, growing from the bottom up. The variables are never + * referenced, the locations accessed purely from helper functions. + * + * struct xfs_attr_leaf_name_local + * struct xfs_attr_leaf_name_remote + */ +}; + +/* + * incore, neutral version of the attribute leaf header + */ +struct xfs_attr3_icleaf_hdr { + __uint32_t forw; + __uint32_t back; + __uint16_t magic; + __uint16_t count; + __uint16_t usedbytes; + __uint16_t firstused; + __u8 holes; + struct { + __uint16_t base; + __uint16_t size; + } freemap[XFS_ATTR_LEAF_MAPSIZE]; +}; + +/* + * Flags used in the leaf_entry[i].flags field. + * NOTE: the INCOMPLETE bit must not collide with the flags bits specified + * on the system call, they are "or"ed together for various operations. + */ +#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ +#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ +#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */ +#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ +#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT) +#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) +#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) +#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) + +/* + * Conversion macros for converting namespace bits from argument flags + * to ondisk flags. + */ +#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) +#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) +#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ + ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) +#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ + ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) + +/* + * Alignment for namelist and valuelist entries (since they are mixed + * there can be only one alignment value) + */ +#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) + +static inline int +xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) +{ + if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) + return sizeof(struct xfs_attr3_leaf_hdr); + return sizeof(struct xfs_attr_leaf_hdr); +} + +static inline struct xfs_attr_leaf_entry * +xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) +{ + if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) + return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; + return &leafp->entries[0]; +} + +/* + * Cast typed pointers for "local" and "remote" name/value structs. + */ +static inline char * +xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) +{ + struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); + + return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; +} + +static inline xfs_attr_leaf_name_remote_t * +xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) +{ + return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); +} + +static inline xfs_attr_leaf_name_local_t * +xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) +{ + return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx); +} + +/* + * Calculate total bytes used (including trailing pad for alignment) for + * a "local" name/value structure, a "remote" name/value structure, and + * a pointer which might be either. + */ +static inline int xfs_attr_leaf_entsize_remote(int nlen) +{ + return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); +} + +static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) +{ + return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); +} + +static inline int xfs_attr_leaf_entsize_local_max(int bsize) +{ + return (((bsize) >> 1) + ((bsize) >> 2)); +} + + + +/* + * Remote attribute block format definition + * + * There is one of these headers per filesystem block in a remote attribute. + * This is done to ensure there is a 1:1 mapping between the attribute value + * length and the number of blocks needed to store the attribute. This makes the + * verification of a buffer a little more complex, but greatly simplifies the + * allocation, reading and writing of these attributes as we don't have to guess + * the number of blocks needed to store the attribute data. + */ +#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ + +struct xfs_attr3_rmt_hdr { + __be32 rm_magic; + __be32 rm_offset; + __be32 rm_bytes; + __be32 rm_crc; + uuid_t rm_uuid; + __be64 rm_owner; + __be64 rm_blkno; + __be64 rm_lsn; +}; + +#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc) + +#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \ + ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ + sizeof(struct xfs_attr3_rmt_hdr) : 0)) + +#endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index edf203ab50af..ce16ef02997a 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -17,25 +17,24 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_dinode.h" struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; @@ -96,13 +95,17 @@ xfs_dir_mount( ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb)); ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= XFS_MAX_BLOCKSIZE); + + mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); + mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); + mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp)); mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp)); - nodehdr_size = __xfs_da3_node_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); + nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) / (uint)sizeof(xfs_da_node_entry_t); mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) / @@ -113,6 +116,7 @@ xfs_dir_mount( mp->m_dirnameops = &xfs_ascii_ci_nameops; else mp->m_dirnameops = &xfs_default_nameops; + } /* diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 9910401327d4..cec70e0781ab 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -32,6 +32,83 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; /* + * directory operations vector for encode/decode routines + */ +struct xfs_dir_ops { + int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len); + struct xfs_dir2_sf_entry * + (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep); + __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); + void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep, + __uint8_t ftype); + xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep); + void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino); + xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr); + void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino); + + int (*data_entsize)(int len); + __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); + void (*data_put_ftype)(struct xfs_dir2_data_entry *dep, + __uint8_t ftype); + __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep); + struct xfs_dir2_data_free * + (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr); + + xfs_dir2_data_aoff_t data_dot_offset; + xfs_dir2_data_aoff_t data_dotdot_offset; + xfs_dir2_data_aoff_t data_first_offset; + size_t data_entry_offset; + + struct xfs_dir2_data_entry * + (*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr); + struct xfs_dir2_data_entry * + (*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr); + struct xfs_dir2_data_entry * + (*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr); + struct xfs_dir2_data_entry * + (*data_entry_p)(struct xfs_dir2_data_hdr *hdr); + struct xfs_dir2_data_unused * + (*data_unused_p)(struct xfs_dir2_data_hdr *hdr); + + int leaf_hdr_size; + void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from); + void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from); + int (*leaf_max_ents)(struct xfs_mount *mp); + struct xfs_dir2_leaf_entry * + (*leaf_ents_p)(struct xfs_dir2_leaf *lp); + + int node_hdr_size; + void (*node_hdr_to_disk)(struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from); + void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from); + struct xfs_da_node_entry * + (*node_tree_p)(struct xfs_da_intnode *dap); + + int free_hdr_size; + void (*free_hdr_to_disk)(struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from); + void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from); + int (*free_max_bests)(struct xfs_mount *mp); + __be16 * (*free_bests_p)(struct xfs_dir2_free *free); + xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db); + int (*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db); +}; + +extern const struct xfs_dir_ops * + xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); +extern const struct xfs_dir_ops * + xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); + +/* * Generic directory interface routines */ extern void xfs_dir_startup(void); @@ -65,37 +142,30 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); /* * Interface routines used by userspace utilities */ -extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); -extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp, - xfs_ino_t ino); -extern xfs_ino_t xfs_dir3_sfe_get_ino(struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep); -extern void xfs_dir3_sfe_put_ino(struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino); - extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_buf *bp); -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, +extern void xfs_dir2_data_freescan(struct xfs_inode *dp, struct xfs_dir2_data_hdr *hdr, int *loghead); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_buf *bp); extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp, +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( - struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup); + struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, + struct xfs_dir2_data_unused *dup); extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops; diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 12dad188939d..90cdbf4b5f19 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -18,25 +18,25 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_dinode.h" /* * Local function prototypes. @@ -168,6 +168,7 @@ xfs_dir3_block_init( static void xfs_dir2_block_need_space( + struct xfs_inode *dp, struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_block_tail *btp, struct xfs_dir2_leaf_entry *blp, @@ -183,7 +184,7 @@ xfs_dir2_block_need_space( struct xfs_dir2_data_unused *enddup = NULL; *compact = 0; - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); /* * If there are stale entries we'll use one for the leaf. @@ -280,6 +281,7 @@ out: static void xfs_dir2_block_compact( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp, struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_block_tail *btp, @@ -312,7 +314,7 @@ xfs_dir2_block_compact( *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); *lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); - xfs_dir2_data_make_free(tp, bp, + xfs_dir2_data_make_free(tp, dp, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), needlog, &needscan); @@ -323,7 +325,7 @@ xfs_dir2_block_compact( * This needs to happen before the next call to use_free. */ if (needscan) - xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog); + xfs_dir2_data_freescan(dp, hdr, needlog); } /* @@ -369,7 +371,7 @@ xfs_dir2_block_addname( if (error) return error; - len = xfs_dir3_data_entsize(mp, args->namelen); + len = dp->d_ops->data_entsize(args->namelen); /* * Set up pointers to parts of the block. @@ -382,7 +384,7 @@ xfs_dir2_block_addname( * Find out if we can reuse stale entries or whether we need extra * space for entry and new leaf. */ - xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup, + xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup, &enddup, &compact, len); /* @@ -418,7 +420,7 @@ xfs_dir2_block_addname( * If need to compact the leaf entries, do it now. */ if (compact) { - xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, + xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); /* recalculate blp post-compaction */ blp = xfs_dir2_block_leaf_p(btp); @@ -453,7 +455,7 @@ xfs_dir2_block_addname( /* * Mark the space needed for the new leaf entry, now in use. */ - xfs_dir2_data_use_free(tp, bp, enddup, + xfs_dir2_data_use_free(tp, dp, bp, enddup, (xfs_dir2_data_aoff_t) ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), @@ -468,7 +470,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); needscan = 0; } /* @@ -540,7 +542,7 @@ xfs_dir2_block_addname( /* * Mark space for the data entry used. */ - xfs_dir2_data_use_free(tp, bp, dup, + xfs_dir2_data_use_free(tp, dp, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* @@ -549,18 +551,18 @@ xfs_dir2_block_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); - xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); - tagp = xfs_dir3_data_entry_tag_p(mp, dep); + dp->d_ops->data_put_ftype(dep, args->filetype); + tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, bp); + xfs_dir2_data_log_header(tp, dp, bp); xfs_dir2_block_log_tail(tp, bp); - xfs_dir2_data_log_entry(tp, bp, dep); + xfs_dir2_data_log_entry(tp, dp, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -642,7 +644,7 @@ xfs_dir2_block_lookup( * Fill in inode number, CI name if appropriate, release the block. */ args->inumber = be64_to_cpu(dep->inumber); - args->filetype = xfs_dir3_dirent_get_ftype(mp, dep); + args->filetype = dp->d_ops->data_get_ftype(dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(args->trans, bp); return XFS_ERROR(error); @@ -799,9 +801,9 @@ xfs_dir2_block_removename( * Mark the data entry's space free. */ needlog = needscan = 0; - xfs_dir2_data_make_free(tp, bp, + xfs_dir2_data_make_free(tp, dp, bp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); + dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* * Fix up the block tail. */ @@ -816,9 +818,9 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, bp); + xfs_dir2_data_log_header(tp, dp, bp); xfs_dir3_data_check(dp, bp); /* * See if the size as a shortform is good enough. @@ -875,8 +877,8 @@ xfs_dir2_block_replace( * Change the inode number to the new value. */ dep->inumber = cpu_to_be64(args->inumber); - xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, bp, dep); + dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir2_data_log_entry(args->trans, dp, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -934,8 +936,8 @@ xfs_dir2_leaf_to_block( tp = args->trans; mp = dp->i_mount; leaf = lbp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); ltp = xfs_dir2_leaf_tail_p(mp, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || @@ -949,7 +951,7 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > mp->m_dirblksize) { int hdrsz; - hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); + hdrsz = dp->d_ops->data_entry_offset; bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == mp->m_dirblksize - hdrsz) { @@ -999,7 +1001,7 @@ xfs_dir2_leaf_to_block( /* * Use up the space at the end of the block (blp/btp). */ - xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size, + xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size, &needlog, &needscan); /* * Initialize the block tail. @@ -1023,9 +1025,9 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_log_header(tp, dp, dbp); /* * Pitch the old leaf block. */ @@ -1136,9 +1138,9 @@ xfs_dir2_sf_to_block( * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = xfs_dir3_data_unused_p(hdr); + dup = dp->d_ops->data_unused_p(hdr); needlog = needscan = 0; - xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, + xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); ASSERT(needscan == 0); /* @@ -1152,38 +1154,38 @@ xfs_dir2_sf_to_block( /* * Remove the freespace, we'll manage it. */ - xfs_dir2_data_use_free(tp, bp, dup, + xfs_dir2_data_use_free(tp, dp, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ - dep = xfs_dir3_data_dot_entry_p(mp, hdr); + dep = dp->d_ops->data_dot_entry_p(hdr); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; - xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR); - tagp = xfs_dir3_data_entry_tag_p(mp, dep); + dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); + tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, bp, dep); + xfs_dir2_data_log_entry(tp, dp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)hdr)); /* * Create entry for .. */ - dep = xfs_dir3_data_dotdot_entry_p(mp, hdr); - dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); + dep = dp->d_ops->data_dotdot_entry_p(hdr); + dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; - xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR); - tagp = xfs_dir3_data_entry_tag_p(mp, dep); + dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); + tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, bp, dep); + xfs_dir2_data_log_entry(tp, dp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)hdr)); - offset = xfs_dir3_data_first_offset(mp); + offset = dp->d_ops->data_first_offset; /* * Loop over existing entries, stuff them in. */ @@ -1214,7 +1216,9 @@ xfs_dir2_sf_to_block( *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)hdr)); xfs_dir2_data_log_unused(tp, bp, dup); - xfs_dir2_data_freeinsert(hdr, dup, &dummy); + xfs_dir2_data_freeinsert(hdr, + dp->d_ops->data_bestfree_p(hdr), + dup, &dummy); offset += be16_to_cpu(dup->length); continue; } @@ -1222,14 +1226,13 @@ xfs_dir2_sf_to_block( * Copy a real entry. */ dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); - dep->inumber = cpu_to_be64(xfs_dir3_sfe_get_ino(mp, sfp, sfep)); + dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); dep->namelen = sfep->namelen; - xfs_dir3_dirent_put_ftype(mp, dep, - xfs_dir3_sfe_get_ftype(mp, sfp, sfep)); + dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); memcpy(dep->name, sfep->name, dep->namelen); - tagp = xfs_dir3_data_entry_tag_p(mp, dep); + tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, bp, dep); + xfs_dir2_data_log_entry(tp, dp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> @@ -1240,7 +1243,7 @@ xfs_dir2_sf_to_block( if (++i == sfp->count) sfep = NULL; else - sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ kmem_free(sfp); diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 47e1326c169a..70acff4ee173 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -18,20 +18,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" @@ -63,11 +62,18 @@ __xfs_dir3_data_check( char *p; /* current data position */ int stale; /* count of stale leaves */ struct xfs_name name; + const struct xfs_dir_ops *ops; mp = bp->b_target->bt_mount; + + /* + * We can be passed a null dp here from a verifier, so we need to go the + * hard way to get them. + */ + ops = xfs_dir_get_ops(mp, dp); + hdr = bp->b_addr; - bf = xfs_dir3_data_bestfree_p(hdr); - p = (char *)xfs_dir3_data_entry_p(hdr); + p = (char *)ops->data_entry_p(hdr); switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): @@ -75,6 +81,16 @@ __xfs_dir3_data_check( btp = xfs_dir2_block_tail_p(mp, hdr); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; + + /* + * The number of leaf entries is limited by the size of the + * block and the amount of space used by the data entries. + * We don't know how much space is used by the data entries yet, + * so just ensure that the count falls somewhere inside the + * block right now. + */ + XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): @@ -85,10 +101,11 @@ __xfs_dir3_data_check( return EFSCORRUPTED; } - count = lastfree = freeseen = 0; /* * Account for zero bestfree entries. */ + bf = ops->data_bestfree_p(hdr); + count = lastfree = freeseen = 0; if (!bf[0].length) { XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); freeseen |= 1 << 0; @@ -121,7 +138,7 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN( be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)hdr); - dfp = xfs_dir2_data_freefind(hdr, dup); + dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); XFS_WANT_CORRUPTED_RETURN( @@ -147,10 +164,10 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN( !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) == + be16_to_cpu(*ops->data_entry_tag_p(dep)) == (char *)dep - (char *)hdr); XFS_WANT_CORRUPTED_RETURN( - xfs_dir3_dirent_get_ftype(mp, dep) < XFS_DIR3_FT_MAX); + ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -168,7 +185,7 @@ __xfs_dir3_data_check( } XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); } - p += xfs_dir3_data_entsize(mp, dep->namelen); + p += ops->data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. @@ -327,19 +344,18 @@ xfs_dir3_data_readahead( */ xfs_dir2_data_free_t * xfs_dir2_data_freefind( - xfs_dir2_data_hdr_t *hdr, /* data block */ - xfs_dir2_data_unused_t *dup) /* data unused entry */ + struct xfs_dir2_data_hdr *hdr, /* data block header */ + struct xfs_dir2_data_free *bf, /* bestfree table pointer */ + struct xfs_dir2_data_unused *dup) /* unused space */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_aoff_t off; /* offset value needed */ - struct xfs_dir2_data_free *bf; #ifdef DEBUG int matched; /* matched the value */ int seenzero; /* saw a 0 bestfree entry */ #endif off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); - bf = xfs_dir3_data_bestfree_p(hdr); #ifdef DEBUG /* @@ -399,11 +415,11 @@ xfs_dir2_data_freefind( */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( - xfs_dir2_data_hdr_t *hdr, /* data block pointer */ - xfs_dir2_data_unused_t *dup, /* unused space */ + struct xfs_dir2_data_hdr *hdr, /* data block pointer */ + struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ + struct xfs_dir2_data_unused *dup, /* unused space */ int *loghead) /* log the data header (out) */ { - xfs_dir2_data_free_t *dfp; /* bestfree table pointer */ xfs_dir2_data_free_t new; /* new bestfree entry */ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || @@ -411,7 +427,6 @@ xfs_dir2_data_freeinsert( hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - dfp = xfs_dir3_data_bestfree_p(hdr); new.length = dup->length; new.offset = cpu_to_be16((char *)dup - (char *)hdr); @@ -444,11 +459,11 @@ xfs_dir2_data_freeinsert( */ STATIC void xfs_dir2_data_freeremove( - xfs_dir2_data_hdr_t *hdr, /* data block header */ - xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ + struct xfs_dir2_data_hdr *hdr, /* data block header */ + struct xfs_dir2_data_free *bf, /* bestfree table pointer */ + struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { - struct xfs_dir2_data_free *bf; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -458,7 +473,6 @@ xfs_dir2_data_freeremove( /* * It's the first entry, slide the next 2 up. */ - bf = xfs_dir3_data_bestfree_p(hdr); if (dfp == &bf[0]) { bf[0] = bf[1]; bf[1] = bf[2]; @@ -486,9 +500,9 @@ xfs_dir2_data_freeremove( */ void xfs_dir2_data_freescan( - xfs_mount_t *mp, /* filesystem mount point */ - xfs_dir2_data_hdr_t *hdr, /* data block header */ - int *loghead) /* out: log data header */ + struct xfs_inode *dp, + struct xfs_dir2_data_hdr *hdr, + int *loghead) { xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ @@ -505,19 +519,19 @@ xfs_dir2_data_freescan( /* * Start by clearing the table. */ - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); *loghead = 1; /* * Set up pointers. */ - p = (char *)xfs_dir3_data_entry_p(hdr); + p = (char *)dp->d_ops->data_entry_p(hdr); if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(mp, hdr); + btp = xfs_dir2_block_tail_p(dp->i_mount, hdr); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)hdr + mp->m_dirblksize; + endp = (char *)hdr + dp->i_mount->m_dirblksize; /* * Loop over the block's entries. */ @@ -529,7 +543,7 @@ xfs_dir2_data_freescan( if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(hdr, dup, loghead); + xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); p += be16_to_cpu(dup->length); } /* @@ -538,8 +552,8 @@ xfs_dir2_data_freescan( else { dep = (xfs_dir2_data_entry_t *)p; ASSERT((char *)dep - (char *)hdr == - be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep))); - p += xfs_dir3_data_entsize(mp, dep->namelen); + be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); + p += dp->d_ops->data_entsize(dep->namelen); } } } @@ -594,8 +608,8 @@ xfs_dir3_data_init( } else hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - bf = xfs_dir3_data_bestfree_p(hdr); - bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr)); + bf = dp->d_ops->data_bestfree_p(hdr); + bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { bf[i].length = 0; bf[i].offset = 0; @@ -604,17 +618,17 @@ xfs_dir3_data_init( /* * Set up an unused entry for the block's body. */ - dup = xfs_dir3_data_unused_p(hdr); + dup = dp->d_ops->data_unused_p(hdr); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr); + t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset; bf[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ - xfs_dir2_data_log_header(tp, bp); + xfs_dir2_data_log_header(tp, dp, bp); xfs_dir2_data_log_unused(tp, bp, dup); *bpp = bp; return 0; @@ -626,11 +640,11 @@ xfs_dir3_data_init( void xfs_dir2_data_log_entry( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { struct xfs_dir2_data_hdr *hdr = bp->b_addr; - struct xfs_mount *mp = tp->t_mountp; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -638,7 +652,7 @@ xfs_dir2_data_log_entry( hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), - (uint)((char *)(xfs_dir3_data_entry_tag_p(mp, dep) + 1) - + (uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) - (char *)hdr - 1)); } @@ -648,16 +662,19 @@ xfs_dir2_data_log_entry( void xfs_dir2_data_log_header( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp) { - xfs_dir2_data_hdr_t *hdr = bp->b_addr; +#ifdef DEBUG + struct xfs_dir2_data_hdr *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); +#endif - xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1); + xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1); } /* @@ -698,6 +715,7 @@ xfs_dir2_data_log_unused( void xfs_dir2_data_make_free( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ @@ -735,7 +753,7 @@ xfs_dir2_data_make_free( * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > xfs_dir3_data_entry_offset(hdr)) { + if (offset > dp->d_ops->data_entry_offset) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; @@ -761,15 +779,15 @@ xfs_dir2_data_make_free( * Previous and following entries are both free, * merge everything into a single free entry. */ - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ /* * See if prevdup and/or postdup are in bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, prevdup); - dfp2 = xfs_dir2_data_freefind(hdr, postdup); + dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); + dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise @@ -797,12 +815,13 @@ xfs_dir2_data_make_free( ASSERT(dfp2 == dfp); dfp2 = &bf[1]; } - xfs_dir2_data_freeremove(hdr, dfp2, needlogp); - xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); /* * Now insert the new entry. */ - dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, + needlogp); ASSERT(dfp == &bf[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); @@ -813,7 +832,7 @@ xfs_dir2_data_make_free( * The entry before us is free, merge with it. */ else if (prevdup) { - dfp = xfs_dir2_data_freefind(hdr, prevdup); + dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); @@ -824,8 +843,8 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. @@ -839,7 +858,7 @@ xfs_dir2_data_make_free( * The following entry is free, merge with it. */ else if (postdup) { - dfp = xfs_dir2_data_freefind(hdr, postdup); + dfp = xfs_dir2_data_freefind(hdr, bf, postdup); newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); @@ -852,8 +871,8 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. @@ -873,7 +892,7 @@ xfs_dir2_data_make_free( *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } *needscanp = needscan; } @@ -884,6 +903,7 @@ xfs_dir2_data_make_free( void xfs_dir2_data_use_free( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ @@ -913,9 +933,9 @@ xfs_dir2_data_use_free( /* * Look up the entry in the bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, dup); oldlen = be16_to_cpu(dup->length); - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); + dfp = xfs_dir2_data_freefind(hdr, bf, dup); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* * Check for alignment with front and back of the entry. @@ -932,7 +952,8 @@ xfs_dir2_data_use_free( if (dfp) { needscan = (bf[2].offset != 0); if (!needscan) - xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, + needlogp); } } /* @@ -950,8 +971,9 @@ xfs_dir2_data_use_free( * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, + needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); @@ -977,8 +999,9 @@ xfs_dir2_data_use_free( * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, + needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); @@ -1017,9 +1040,11 @@ xfs_dir2_data_use_free( if (dfp) { needscan = (bf[2].length != 0); if (!needscan) { - xfs_dir2_data_freeremove(hdr, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup, needlogp); - xfs_dir2_data_freeinsert(hdr, newdup2, + xfs_dir2_data_freeremove(hdr, bf, dfp, + needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup, + needlogp); + xfs_dir2_data_freeinsert(hdr, bf, newdup2, needlogp); } } diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 1021c8356d08..ae47ec6e16c4 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -18,23 +18,21 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" @@ -52,21 +50,21 @@ static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); * Pop an assert if something is wrong. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(mp, bp) \ +#define xfs_dir3_leaf_check(dp, bp) \ do { \ - if (!xfs_dir3_leaf1_check((mp), (bp))) \ + if (!xfs_dir3_leaf1_check((dp), (bp))) \ ASSERT(0); \ } while (0); STATIC bool xfs_dir3_leaf1_check( - struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; struct xfs_dir3_icleaf_hdr leafhdr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -75,71 +73,16 @@ xfs_dir3_leaf1_check( } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) return false; - return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } #else -#define xfs_dir3_leaf_check(mp, bp) +#define xfs_dir3_leaf_check(dp, bp) #endif -void -xfs_dir3_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - if (from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || - from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.count); - to->stale = be16_to_cpu(from->hdr.stale); - } else { - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; - - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->count); - to->stale = be16_to_cpu(hdr3->stale); - } - - ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || - to->magic == XFS_DIR3_LEAF1_MAGIC || - to->magic == XFS_DIR2_LEAFN_MAGIC || - to->magic == XFS_DIR3_LEAFN_MAGIC); -} - -void -xfs_dir3_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || - from->magic == XFS_DIR3_LEAF1_MAGIC || - from->magic == XFS_DIR2_LEAFN_MAGIC || - from->magic == XFS_DIR3_LEAFN_MAGIC); - - if (from->magic == XFS_DIR2_LEAF1_MAGIC || - from->magic == XFS_DIR2_LEAFN_MAGIC) { - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.count = cpu_to_be16(from->count); - to->hdr.stale = cpu_to_be16(from->stale); - } else { - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; - - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->count = cpu_to_be16(from->count); - hdr3->stale = cpu_to_be16(from->stale); - } -} - bool xfs_dir3_leaf_check_int( struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf) { @@ -147,8 +90,21 @@ xfs_dir3_leaf_check_int( xfs_dir2_leaf_tail_t *ltp; int stale; int i; + const struct xfs_dir_ops *ops; + struct xfs_dir3_icleaf_hdr leafhdr; - ents = xfs_dir3_leaf_ents_p(leaf); + /* + * we can be passed a null dp here from a verifier, so we need to go the + * hard way to get them. + */ + ops = xfs_dir_get_ops(mp, dp); + + if (!hdr) { + ops->leaf_hdr_from_disk(&leafhdr, leaf); + hdr = &leafhdr; + } + + ents = ops->leaf_ents_p(leaf); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* @@ -156,7 +112,7 @@ xfs_dir3_leaf_check_int( * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - if (hdr->count > xfs_dir3_max_leaf_ents(mp, leaf)) + if (hdr->count > ops->leaf_max_ents(mp)) return false; /* Leaves and bests don't overlap in leaf format. */ @@ -192,7 +148,6 @@ xfs_dir3_leaf_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dir2_leaf *leaf = bp->b_addr; - struct xfs_dir3_icleaf_hdr leafhdr; ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); @@ -214,8 +169,7 @@ xfs_dir3_leaf_verify( return false; } - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); } static void @@ -401,7 +355,7 @@ xfs_dir3_leaf_get_buf( return error; xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); - xfs_dir3_leaf_log_header(tp, bp); + xfs_dir3_leaf_log_header(tp, dp, bp); if (magic == XFS_DIR2_LEAF1_MAGIC) xfs_dir3_leaf_log_tail(tp, bp); *bpp = bp; @@ -462,31 +416,31 @@ xfs_dir2_block_to_leaf( xfs_dir3_data_check(dp, dbp); btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); - bf = xfs_dir3_data_bestfree_p(hdr); - ents = xfs_dir3_leaf_ents_p(leaf); + bf = dp->d_ops->data_bestfree_p(hdr); + ents = dp->d_ops->leaf_ents_p(leaf); /* * Set the counts in the leaf header. */ - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); leafhdr.count = be32_to_cpu(btp->count); leafhdr.stale = be32_to_cpu(btp->stale); - xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, lbp); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, dp, lbp); /* * Could compact these but I think we always do the conversion * after squeezing out stale entries. */ memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, lbp, 0, leafhdr.count - 1); + xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1); needscan = 0; needlog = 1; /* * Make the space formerly occupied by the leaf entries and block * tail be free. */ - xfs_dir2_data_make_free(tp, dbp, + xfs_dir2_data_make_free(tp, dp, dbp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - (char *)blp), @@ -502,7 +456,7 @@ xfs_dir2_block_to_leaf( hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); /* * Set up leaf tail and bests table. */ @@ -514,8 +468,8 @@ xfs_dir2_block_to_leaf( * Log the data header and leaf bests table. */ if (needlog) - xfs_dir2_data_log_header(tp, dbp); - xfs_dir3_leaf_check(mp, lbp); + xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); xfs_dir3_leaf_log_bests(tp, lbp, 0, 0); return 0; @@ -699,10 +653,10 @@ xfs_dir2_leaf_addname( index = xfs_dir2_leaf_search_hash(args, lbp); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); - length = xfs_dir3_data_entsize(mp, args->namelen); + length = dp->d_ops->data_entsize(args->namelen); /* * See if there are any entries with the same hash value @@ -864,7 +818,7 @@ xfs_dir2_leaf_addname( else xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); hdr = dbp->b_addr; - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); bestsp[use_block] = bf[0].length; grown = 1; } else { @@ -880,7 +834,7 @@ xfs_dir2_leaf_addname( return error; } hdr = dbp->b_addr; - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); grown = 0; } /* @@ -893,7 +847,7 @@ xfs_dir2_leaf_addname( /* * Mark the initial part of our freespace in use for the new entry. */ - xfs_dir2_data_use_free(tp, dbp, dup, + xfs_dir2_data_use_free(tp, dp, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -903,20 +857,20 @@ xfs_dir2_leaf_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); - tagp = xfs_dir3_data_entry_tag_p(mp, dep); + dp->d_ops->data_put_ftype(dep, args->filetype); + tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); /* * Need to log the data block's header. */ if (needlog) - xfs_dir2_data_log_header(tp, dbp); - xfs_dir2_data_log_entry(tp, dbp, dep); + xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_entry(tp, dp, dbp, dep); /* * If the bests table needs to be changed, do it. * Log the change unless we've already done that. @@ -939,10 +893,10 @@ xfs_dir2_leaf_addname( /* * Log the leaf fields and give up the buffers. */ - xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, lbp); - xfs_dir3_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); - xfs_dir3_leaf_check(mp, lbp); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh); + xfs_dir3_leaf_check(dp, lbp); xfs_dir3_data_check(dp, dbp); return 0; } @@ -962,6 +916,7 @@ xfs_dir3_leaf_compact( int loglow; /* first leaf entry to log */ int to; /* target leaf index */ struct xfs_dir2_leaf_entry *ents; + struct xfs_inode *dp = args->dp; leaf = bp->b_addr; if (!leafhdr->stale) @@ -970,7 +925,7 @@ xfs_dir3_leaf_compact( /* * Compress out the stale entries in place. */ - ents = xfs_dir3_leaf_ents_p(leaf); + ents = dp->d_ops->leaf_ents_p(leaf); for (from = to = 0, loglow = -1; from < leafhdr->count; from++) { if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; @@ -991,10 +946,10 @@ xfs_dir3_leaf_compact( leafhdr->count -= leafhdr->stale; leafhdr->stale = 0; - xfs_dir3_leaf_hdr_to_disk(leaf, leafhdr); - xfs_dir3_leaf_log_header(args->trans, bp); + dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); + xfs_dir3_leaf_log_header(args->trans, dp, bp); if (loglow != -1) - xfs_dir3_leaf_log_ents(args->trans, bp, loglow, to - 1); + xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1); } /* @@ -1121,10 +1076,11 @@ xfs_dir3_leaf_log_bests( */ void xfs_dir3_leaf_log_ents( - xfs_trans_t *tp, /* transaction pointer */ - struct xfs_buf *bp, /* leaf buffer */ - int first, /* first entry to log */ - int last) /* last entry to log */ + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf *bp, + int first, + int last) { xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ @@ -1136,7 +1092,7 @@ xfs_dir3_leaf_log_ents( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ents = xfs_dir3_leaf_ents_p(leaf); + ents = dp->d_ops->leaf_ents_p(leaf); firstlep = &ents[first]; lastlep = &ents[last]; xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), @@ -1149,6 +1105,7 @@ xfs_dir3_leaf_log_ents( void xfs_dir3_leaf_log_header( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; @@ -1159,7 +1116,7 @@ xfs_dir3_leaf_log_header( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), - xfs_dir3_leaf_hdr_size(leaf) - 1); + dp->d_ops->leaf_hdr_size - 1); } /* @@ -1214,9 +1171,9 @@ xfs_dir2_leaf_lookup( } tp = args->trans; dp = args->dp; - xfs_dir3_leaf_check(dp->i_mount, lbp); + xfs_dir3_leaf_check(dp, lbp); leaf = lbp->b_addr; - ents = xfs_dir3_leaf_ents_p(leaf); + ents = dp->d_ops->leaf_ents_p(leaf); /* * Get to the leaf entry and contained data entry address. */ @@ -1232,7 +1189,7 @@ xfs_dir2_leaf_lookup( * Return the found inode number & CI name if appropriate */ args->inumber = be64_to_cpu(dep->inumber); - args->filetype = xfs_dir3_dirent_get_ftype(dp->i_mount, dep); + args->filetype = dp->d_ops->data_get_ftype(dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); @@ -1279,9 +1236,9 @@ xfs_dir2_leaf_lookup_int( *lbpp = lbp; leaf = lbp->b_addr; - xfs_dir3_leaf_check(mp, lbp); - ents = xfs_dir3_leaf_ents_p(leaf); - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_check(dp, lbp); + ents = dp->d_ops->leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); /* * Look for the first leaf entry with our hash value. @@ -1415,9 +1372,9 @@ xfs_dir2_leaf_removename( leaf = lbp->b_addr; hdr = dbp->b_addr; xfs_dir3_data_check(dp, dbp); - bf = xfs_dir3_data_bestfree_p(hdr); - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + bf = dp->d_ops->data_bestfree_p(hdr); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); /* * Point to the leaf entry, use that to point to the data entry. */ @@ -1433,27 +1390,27 @@ xfs_dir2_leaf_removename( /* * Mark the former data entry unused. */ - xfs_dir2_data_make_free(tp, dbp, + xfs_dir2_data_make_free(tp, dp, dbp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); + dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. */ leafhdr.stale++; - xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, lbp); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, dp, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, lbp, index, index); + xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index); /* * Scan the freespace in the data block again if necessary, * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_log_header(tp, dp, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. @@ -1467,7 +1424,7 @@ xfs_dir2_leaf_removename( * If the data block is now empty then get rid of the data block. */ if (be16_to_cpu(bf[0].length) == - mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) { + mp->m_dirblksize - dp->d_ops->data_entry_offset) { ASSERT(db != mp->m_dirdatablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* @@ -1478,7 +1435,7 @@ xfs_dir2_leaf_removename( */ if (error == ENOSPC && args->total == 0) error = 0; - xfs_dir3_leaf_check(mp, lbp); + xfs_dir3_leaf_check(dp, lbp); return error; } dbp = NULL; @@ -1512,7 +1469,7 @@ xfs_dir2_leaf_removename( else if (db != mp->m_dirdatablk) dbp = NULL; - xfs_dir3_leaf_check(mp, lbp); + xfs_dir3_leaf_check(dp, lbp); /* * See if we can convert to block form. */ @@ -1547,7 +1504,7 @@ xfs_dir2_leaf_replace( } dp = args->dp; leaf = lbp->b_addr; - ents = xfs_dir3_leaf_ents_p(leaf); + ents = dp->d_ops->leaf_ents_p(leaf); /* * Point to the leaf entry, get data address from it. */ @@ -1563,10 +1520,10 @@ xfs_dir2_leaf_replace( * Put the new inode number in, log it. */ dep->inumber = cpu_to_be64(args->inumber); - xfs_dir3_dirent_put_ftype(dp->i_mount, dep, args->filetype); + dp->d_ops->data_put_ftype(dep, args->filetype); tp = args->trans; - xfs_dir2_data_log_entry(tp, dbp, dep); - xfs_dir3_leaf_check(dp->i_mount, lbp); + xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir3_leaf_check(dp, lbp); xfs_trans_brelse(tp, lbp); return 0; } @@ -1592,8 +1549,8 @@ xfs_dir2_leaf_search_hash( struct xfs_dir3_icleaf_hdr leafhdr; leaf = lbp->b_addr; - ents = xfs_dir3_leaf_ents_p(leaf); - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = args->dp->d_ops->leaf_ents_p(leaf); + args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); /* * Note, the table cannot be empty, so we have to go through the loop. @@ -1661,12 +1618,12 @@ xfs_dir2_leaf_trim_data( #ifdef DEBUG { struct xfs_dir2_data_hdr *hdr = dbp->b_addr; - struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr); + struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr); ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); ASSERT(be16_to_cpu(bf[0].length) == - mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)); + mp->m_dirblksize - dp->d_ops->data_entry_offset); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); } #endif @@ -1782,7 +1739,7 @@ xfs_dir2_node_to_leaf( return 0; lbp = state->path.blk[0].bp; leaf = lbp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -1794,7 +1751,7 @@ xfs_dir2_node_to_leaf( if (error) return error; free = fbp->b_addr; - xfs_dir3_free_hdr_from_disk(&freehdr, free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); ASSERT(!freehdr.firstdb); @@ -1828,14 +1785,14 @@ xfs_dir2_node_to_leaf( /* * Set up the leaf bests table. */ - memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free), + memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free), freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); - xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, lbp); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, dp, lbp); xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_check(mp, lbp); + xfs_dir3_leaf_check(dp, lbp); /* * Get rid of the freespace block. diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 4c3dba7ffb74..56369d4509d5 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -18,22 +18,21 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" @@ -55,21 +54,21 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, * Check internal consistency of a leafn block. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(mp, bp) \ +#define xfs_dir3_leaf_check(dp, bp) \ do { \ - if (!xfs_dir3_leafn_check((mp), (bp))) \ + if (!xfs_dir3_leafn_check((dp), (bp))) \ ASSERT(0); \ } while (0); static bool xfs_dir3_leafn_check( - struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; struct xfs_dir3_icleaf_hdr leafhdr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -78,10 +77,10 @@ xfs_dir3_leafn_check( } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) return false; - return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } #else -#define xfs_dir3_leaf_check(mp, bp) +#define xfs_dir3_leaf_check(dp, bp) #endif static bool @@ -193,53 +192,6 @@ xfs_dir2_free_try_read( return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp); } - -void -xfs_dir3_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - if (from->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)) { - to->magic = be32_to_cpu(from->hdr.magic); - to->firstdb = be32_to_cpu(from->hdr.firstdb); - to->nvalid = be32_to_cpu(from->hdr.nvalid); - to->nused = be32_to_cpu(from->hdr.nused); - } else { - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; - - to->magic = be32_to_cpu(hdr3->hdr.magic); - to->firstdb = be32_to_cpu(hdr3->firstdb); - to->nvalid = be32_to_cpu(hdr3->nvalid); - to->nused = be32_to_cpu(hdr3->nused); - } - - ASSERT(to->magic == XFS_DIR2_FREE_MAGIC || - to->magic == XFS_DIR3_FREE_MAGIC); -} - -static void -xfs_dir3_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_FREE_MAGIC || - from->magic == XFS_DIR3_FREE_MAGIC); - - if (from->magic == XFS_DIR2_FREE_MAGIC) { - to->hdr.magic = cpu_to_be32(from->magic); - to->hdr.firstdb = cpu_to_be32(from->firstdb); - to->hdr.nvalid = cpu_to_be32(from->nvalid); - to->hdr.nused = cpu_to_be32(from->nused); - } else { - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; - - hdr3->hdr.magic = cpu_to_be32(from->magic); - hdr3->firstdb = cpu_to_be32(from->firstdb); - hdr3->nvalid = cpu_to_be32(from->nvalid); - hdr3->nused = cpu_to_be32(from->nused); - } -} - static int xfs_dir3_free_get_buf( struct xfs_trans *tp, @@ -277,7 +229,7 @@ xfs_dir3_free_get_buf( uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); } else hdr.magic = XFS_DIR2_FREE_MAGIC; - xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr); + dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); *bpp = bp; return 0; } @@ -288,6 +240,7 @@ xfs_dir3_free_get_buf( STATIC void xfs_dir2_free_log_bests( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ @@ -296,7 +249,7 @@ xfs_dir2_free_log_bests( __be16 *bests; free = bp->b_addr; - bests = xfs_dir3_free_bests_p(tp->t_mountp, free); + bests = dp->d_ops->free_bests_p(free); ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); xfs_trans_log_buf(tp, bp, @@ -311,6 +264,7 @@ xfs_dir2_free_log_bests( static void xfs_dir2_free_log_header( struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_buf *bp) { #ifdef DEBUG @@ -320,7 +274,7 @@ xfs_dir2_free_log_header( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); #endif - xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1); + xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1); } /* @@ -369,7 +323,7 @@ xfs_dir2_leaf_to_node( return error; free = fbp->b_addr; - xfs_dir3_free_hdr_from_disk(&freehdr, free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); ASSERT(be32_to_cpu(ltp->bestcount) <= @@ -380,7 +334,7 @@ xfs_dir2_leaf_to_node( * Count active entries. */ from = xfs_dir2_leaf_bests_p(ltp); - to = xfs_dir3_free_bests_p(mp, free); + to = dp->d_ops->free_bests_p(free); for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { if ((off = be16_to_cpu(*from)) != NULLDATAOFF) n++; @@ -393,9 +347,9 @@ xfs_dir2_leaf_to_node( freehdr.nused = n; freehdr.nvalid = be32_to_cpu(ltp->bestcount); - xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_bests(tp, fbp, 0, freehdr.nvalid - 1); - xfs_dir2_free_log_header(tp, fbp); + dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1); + xfs_dir2_free_log_header(tp, dp, fbp); /* * Converting the leaf to a leafnode is just a matter of changing the @@ -409,8 +363,8 @@ xfs_dir2_leaf_to_node( leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); lbp->b_ops = &xfs_dir3_leafn_buf_ops; xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); - xfs_dir3_leaf_log_header(tp, lbp); - xfs_dir3_leaf_check(mp, lbp); + xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_check(dp, lbp); return 0; } @@ -443,8 +397,8 @@ xfs_dir2_leafn_add( mp = dp->i_mount; tp = args->trans; leaf = bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); /* * Quick check just to make sure we are not going to index @@ -460,7 +414,7 @@ xfs_dir2_leafn_add( * a compact. */ - if (leafhdr.count == xfs_dir3_max_leaf_ents(mp, leaf)) { + if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) { if (!leafhdr.stale) return XFS_ERROR(ENOSPC); compact = leafhdr.stale > 1; @@ -498,30 +452,30 @@ xfs_dir2_leafn_add( lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); - xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, bp); - xfs_dir3_leaf_log_ents(tp, bp, lfloglow, lfloghigh); - xfs_dir3_leaf_check(mp, bp); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh); + xfs_dir3_leaf_check(dp, bp); return 0; } #ifdef DEBUG static void xfs_dir2_free_hdr_check( - struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_db_t db) { struct xfs_dir3_icfree_hdr hdr; - xfs_dir3_free_hdr_from_disk(&hdr, bp->b_addr); + dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); - ASSERT((hdr.firstdb % xfs_dir3_free_max_bests(mp)) == 0); + ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0); ASSERT(hdr.firstdb <= db); ASSERT(db < hdr.firstdb + hdr.nvalid); } #else -#define xfs_dir2_free_hdr_check(mp, dp, db) +#define xfs_dir2_free_hdr_check(dp, bp, db) #endif /* DEBUG */ /* @@ -530,6 +484,7 @@ xfs_dir2_free_hdr_check( */ xfs_dahash_t /* hash value */ xfs_dir2_leafn_lasthash( + struct xfs_inode *dp, struct xfs_buf *bp, /* leaf buffer */ int *count) /* count of entries in leaf */ { @@ -537,7 +492,7 @@ xfs_dir2_leafn_lasthash( struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -547,7 +502,7 @@ xfs_dir2_leafn_lasthash( if (!leafhdr.count) return 0; - ents = xfs_dir3_leaf_ents_p(leaf); + ents = dp->d_ops->leaf_ents_p(leaf); return be32_to_cpu(ents[leafhdr.count - 1].hashval); } @@ -584,10 +539,10 @@ xfs_dir2_leafn_lookup_for_addname( tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); - xfs_dir3_leaf_check(mp, bp); + xfs_dir3_leaf_check(dp, bp); ASSERT(leafhdr.count > 0); /* @@ -605,7 +560,7 @@ xfs_dir2_leafn_lookup_for_addname( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); } - length = xfs_dir3_data_entsize(mp, args->namelen); + length = dp->d_ops->data_entsize(args->namelen); /* * Loop over leaf entries with the right hash value. */ @@ -637,7 +592,7 @@ xfs_dir2_leafn_lookup_for_addname( * Convert the data block to the free block * holding its freespace information. */ - newfdb = xfs_dir2_db_to_fdb(mp, newdb); + newfdb = dp->d_ops->db_to_fdb(mp, newdb); /* * If it's not the one we have in hand, read it in. */ @@ -655,16 +610,16 @@ xfs_dir2_leafn_lookup_for_addname( return error; free = curbp->b_addr; - xfs_dir2_free_hdr_check(mp, curbp, curdb); + xfs_dir2_free_hdr_check(dp, curbp, curdb); } /* * Get the index for our entry. */ - fi = xfs_dir2_db_to_fdindex(mp, curdb); + fi = dp->d_ops->db_to_fdindex(mp, curdb); /* * If it has room, return it. */ - bests = xfs_dir3_free_bests_p(mp, free); + bests = dp->d_ops->free_bests_p(free); if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); @@ -734,10 +689,10 @@ xfs_dir2_leafn_lookup_for_entry( tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); - xfs_dir3_leaf_check(mp, bp); + xfs_dir3_leaf_check(dp, bp); ASSERT(leafhdr.count > 0); /* @@ -816,7 +771,7 @@ xfs_dir2_leafn_lookup_for_entry( xfs_trans_brelse(tp, state->extrablk.bp); args->cmpresult = cmp; args->inumber = be64_to_cpu(dep->inumber); - args->filetype = xfs_dir3_dirent_get_ftype(mp, dep); + args->filetype = dp->d_ops->data_get_ftype(dep); *indexp = index; state->extravalid = 1; state->extrablk.bp = curbp; @@ -907,7 +862,7 @@ xfs_dir3_leafn_moveents( if (start_d < dhdr->count) { memmove(&dents[start_d + count], &dents[start_d], (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, bp_d, start_d + count, + xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count, count + dhdr->count - 1); } /* @@ -929,7 +884,8 @@ xfs_dir3_leafn_moveents( */ memcpy(&dents[start_d], &sents[start_s], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1); + xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, + start_d, start_d + count - 1); /* * If there are source entries after the ones we copied, @@ -938,7 +894,8 @@ xfs_dir3_leafn_moveents( if (start_s + count < shdr->count) { memmove(&sents[start_s], &sents[start_s + count], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1); + xfs_dir3_leaf_log_ents(tp, args->dp, bp_s, + start_s, start_s + count - 1); } /* @@ -956,6 +913,7 @@ xfs_dir3_leafn_moveents( */ int /* sort order */ xfs_dir2_leafn_order( + struct xfs_inode *dp, struct xfs_buf *leaf1_bp, /* leaf1 buffer */ struct xfs_buf *leaf2_bp) /* leaf2 buffer */ { @@ -966,10 +924,10 @@ xfs_dir2_leafn_order( struct xfs_dir3_icleaf_hdr hdr1; struct xfs_dir3_icleaf_hdr hdr2; - xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1); - xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = xfs_dir3_leaf_ents_p(leaf1); - ents2 = xfs_dir3_leaf_ents_p(leaf2); + dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); + dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); + ents1 = dp->d_ops->leaf_ents_p(leaf1); + ents2 = dp->d_ops->leaf_ents_p(leaf2); if (hdr1.count > 0 && hdr2.count > 0 && (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || @@ -1007,12 +965,13 @@ xfs_dir2_leafn_rebalance( struct xfs_dir2_leaf_entry *ents2; struct xfs_dir3_icleaf_hdr hdr1; struct xfs_dir3_icleaf_hdr hdr2; + struct xfs_inode *dp = state->args->dp; args = state->args; /* * If the block order is wrong, swap the arguments. */ - if ((swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp))) { + if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) { xfs_da_state_blk_t *tmp; /* temp for block swap */ tmp = blk1; @@ -1021,10 +980,10 @@ xfs_dir2_leafn_rebalance( } leaf1 = blk1->bp->b_addr; leaf2 = blk2->bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1); - xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = xfs_dir3_leaf_ents_p(leaf1); - ents2 = xfs_dir3_leaf_ents_p(leaf2); + dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); + dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); + ents1 = dp->d_ops->leaf_ents_p(leaf1); + ents2 = dp->d_ops->leaf_ents_p(leaf2); oldsum = hdr1.count + hdr2.count; #if defined(DEBUG) || defined(XFS_WARN) @@ -1070,13 +1029,13 @@ xfs_dir2_leafn_rebalance( ASSERT(hdr1.stale + hdr2.stale == oldstale); /* log the changes made when moving the entries */ - xfs_dir3_leaf_hdr_to_disk(leaf1, &hdr1); - xfs_dir3_leaf_hdr_to_disk(leaf2, &hdr2); - xfs_dir3_leaf_log_header(args->trans, blk1->bp); - xfs_dir3_leaf_log_header(args->trans, blk2->bp); + dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); + dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); + xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp); + xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp); - xfs_dir3_leaf_check(args->dp->i_mount, blk1->bp); - xfs_dir3_leaf_check(args->dp->i_mount, blk2->bp); + xfs_dir3_leaf_check(dp, blk1->bp); + xfs_dir3_leaf_check(dp, blk2->bp); /* * Mark whether we're inserting into the old or new leaf. @@ -1097,11 +1056,11 @@ xfs_dir2_leafn_rebalance( * Finally sanity check just to make sure we are not returning a * negative index */ - if(blk2->index < 0) { + if (blk2->index < 0) { state->inleaf = 1; blk2->index = 0; - xfs_alert(args->dp->i_mount, - "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", + xfs_alert(dp->i_mount, + "%s: picked the wrong leaf? reverting original leaf: blk1->index %d", __func__, blk1->index); } } @@ -1120,17 +1079,17 @@ xfs_dir3_data_block_free( int logfree = 0; __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; + struct xfs_inode *dp = args->dp; - xfs_dir3_free_hdr_from_disk(&freehdr, free); - - bests = xfs_dir3_free_bests_p(tp->t_mountp, free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + bests = dp->d_ops->free_bests_p(free); if (hdr) { /* * Data block is not empty, just set the free entry to the new * value. */ bests[findex] = cpu_to_be16(longest); - xfs_dir2_free_log_bests(tp, fbp, findex, findex); + xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); return 0; } @@ -1157,8 +1116,8 @@ xfs_dir3_data_block_free( logfree = 1; } - xfs_dir3_free_hdr_to_disk(free, &freehdr); - xfs_dir2_free_log_header(tp, fbp); + dp->d_ops->free_hdr_to_disk(free, &freehdr); + xfs_dir2_free_log_header(tp, dp, fbp); /* * If there are no useful entries left in the block, get rid of the @@ -1182,7 +1141,7 @@ xfs_dir3_data_block_free( /* Log the free entry that changed, unless we got rid of it. */ if (logfree) - xfs_dir2_free_log_bests(tp, fbp, findex, findex); + xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); return 0; } @@ -1222,8 +1181,8 @@ xfs_dir2_leafn_remove( tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); /* * Point to the entry we're removing. @@ -1243,11 +1202,11 @@ xfs_dir2_leafn_remove( * Log the leaf block changes. */ leafhdr.stale++; - xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, bp); + dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, dp, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, bp, index, index); + xfs_dir3_leaf_log_ents(tp, dp, bp, index, index); /* * Make the data entry free. Keep track of the longest freespace @@ -1256,19 +1215,19 @@ xfs_dir2_leafn_remove( dbp = dblk->bp; hdr = dbp->b_addr; dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); longest = be16_to_cpu(bf[0].length); needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dbp, off, - xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); + xfs_dir2_data_make_free(tp, dp, dbp, off, + dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); /* * Rescan the data block freespaces for bestfree. * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_log_header(tp, dp, dbp); xfs_dir3_data_check(dp, dbp); /* * If the longest data block freespace changes, need to update @@ -1285,7 +1244,7 @@ xfs_dir2_leafn_remove( * Convert the data block number to a free block, * read in the free block. */ - fdb = xfs_dir2_db_to_fdb(mp, db); + fdb = dp->d_ops->db_to_fdb(mp, db); error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), &fbp); if (error) @@ -1294,22 +1253,22 @@ xfs_dir2_leafn_remove( #ifdef DEBUG { struct xfs_dir3_icfree_hdr freehdr; - xfs_dir3_free_hdr_from_disk(&freehdr, free); - ASSERT(freehdr.firstdb == xfs_dir3_free_max_bests(mp) * + dp->d_ops->free_hdr_from_disk(&freehdr, free); + ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) * (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); } #endif /* * Calculate which entry we need to fix. */ - findex = xfs_dir2_db_to_fdindex(mp, db); + findex = dp->d_ops->db_to_fdindex(mp, db); longest = be16_to_cpu(bf[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ if (longest == mp->m_dirblksize - - xfs_dir3_data_entry_offset(hdr)) { + dp->d_ops->data_entry_offset) { /* * Try to punch out the data block. */ @@ -1336,12 +1295,12 @@ xfs_dir2_leafn_remove( return error; } - xfs_dir3_leaf_check(mp, bp); + xfs_dir3_leaf_check(dp, bp); /* * Return indication of whether this leaf block is empty enough * to justify trying to join it with a neighbor. */ - *rval = (xfs_dir3_leaf_hdr_size(leaf) + + *rval = (dp->d_ops->leaf_hdr_size + (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < mp->m_dir_magicpct; return 0; @@ -1360,13 +1319,14 @@ xfs_dir2_leafn_split( xfs_dablk_t blkno; /* new leaf block number */ int error; /* error return value */ xfs_mount_t *mp; /* filesystem mount point */ + struct xfs_inode *dp; /* * Allocate space for a new leaf node. */ args = state->args; - mp = args->dp->i_mount; - ASSERT(args != NULL); + dp = args->dp; + mp = dp->i_mount; ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); error = xfs_da_grow_inode(args, &blkno); if (error) { @@ -1401,10 +1361,10 @@ xfs_dir2_leafn_split( /* * Update last hashval in each block since we added the name. */ - oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL); - newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL); - xfs_dir3_leaf_check(mp, oldblk->bp); - xfs_dir3_leaf_check(mp, newblk->bp); + oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); + newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); + xfs_dir3_leaf_check(dp, oldblk->bp); + xfs_dir3_leaf_check(dp, newblk->bp); return error; } @@ -1434,6 +1394,7 @@ xfs_dir2_leafn_toosmall( int rval; /* result from path_shift */ struct xfs_dir3_icleaf_hdr leafhdr; struct xfs_dir2_leaf_entry *ents; + struct xfs_inode *dp = state->args->dp; /* * Check for the degenerate case of the block being over 50% full. @@ -1442,12 +1403,12 @@ xfs_dir2_leafn_toosmall( */ blk = &state->path.blk[state->path.active - 1]; leaf = blk->bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); - xfs_dir3_leaf_check(state->args->dp->i_mount, blk->bp); + dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_check(dp, blk->bp); count = leafhdr.count - leafhdr.stale; - bytes = xfs_dir3_leaf_hdr_size(leaf) + count * sizeof(ents[0]); + bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); if (bytes > (state->blocksize >> 1)) { /* * Blk over 50%, don't try to join. @@ -1492,7 +1453,7 @@ xfs_dir2_leafn_toosmall( /* * Read the sibling leaf block. */ - error = xfs_dir3_leafn_read(state->args->trans, state->args->dp, + error = xfs_dir3_leafn_read(state->args->trans, dp, blkno, -1, &bp); if (error) return error; @@ -1504,8 +1465,8 @@ xfs_dir2_leafn_toosmall( bytes = state->blocksize - (state->blocksize >> 2); leaf = bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf); - ents = xfs_dir3_leaf_ents_p(leaf); + dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); + ents = dp->d_ops->leaf_ents_p(leaf); count += hdr2.count - hdr2.stale; bytes -= count * sizeof(ents[0]); @@ -1559,6 +1520,7 @@ xfs_dir2_leafn_unbalance( struct xfs_dir3_icleaf_hdr drophdr; struct xfs_dir2_leaf_entry *sents; struct xfs_dir2_leaf_entry *dents; + struct xfs_inode *dp = state->args->dp; args = state->args; ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); @@ -1566,10 +1528,10 @@ xfs_dir2_leafn_unbalance( drop_leaf = drop_blk->bp->b_addr; save_leaf = save_blk->bp->b_addr; - xfs_dir3_leaf_hdr_from_disk(&savehdr, save_leaf); - xfs_dir3_leaf_hdr_from_disk(&drophdr, drop_leaf); - sents = xfs_dir3_leaf_ents_p(save_leaf); - dents = xfs_dir3_leaf_ents_p(drop_leaf); + dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); + dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); + sents = dp->d_ops->leaf_ents_p(save_leaf); + dents = dp->d_ops->leaf_ents_p(drop_leaf); /* * If there are any stale leaf entries, take this opportunity @@ -1584,7 +1546,7 @@ xfs_dir2_leafn_unbalance( * Move the entries from drop to the appropriate end of save. */ drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval); - if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp)) + if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp)) xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, save_blk->bp, &savehdr, sents, 0, drophdr.count); @@ -1595,13 +1557,13 @@ xfs_dir2_leafn_unbalance( save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval); /* log the changes made when moving the entries */ - xfs_dir3_leaf_hdr_to_disk(save_leaf, &savehdr); - xfs_dir3_leaf_hdr_to_disk(drop_leaf, &drophdr); - xfs_dir3_leaf_log_header(args->trans, save_blk->bp); - xfs_dir3_leaf_log_header(args->trans, drop_blk->bp); + dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); + dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); + xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp); + xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp); - xfs_dir3_leaf_check(args->dp->i_mount, save_blk->bp); - xfs_dir3_leaf_check(args->dp->i_mount, drop_blk->bp); + xfs_dir3_leaf_check(dp, save_blk->bp); + xfs_dir3_leaf_check(dp, drop_blk->bp); } /* @@ -1712,7 +1674,7 @@ xfs_dir2_node_addname_int( dp = args->dp; mp = dp->i_mount; tp = args->trans; - length = xfs_dir3_data_entsize(mp, args->namelen); + length = dp->d_ops->data_entsize(args->namelen); /* * If we came in with a freespace block that means that lookup * found an entry with our hash value. This is the freespace @@ -1726,8 +1688,8 @@ xfs_dir2_node_addname_int( ifbno = fblk->blkno; free = fbp->b_addr; findex = fblk->index; - bests = xfs_dir3_free_bests_p(mp, free); - xfs_dir3_free_hdr_from_disk(&freehdr, free); + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); /* * This means the free entry showed that the data block had @@ -1819,8 +1781,8 @@ xfs_dir2_node_addname_int( * and the freehdr are actually initialised if they are placed * there, so we have to do it here to avoid warnings. Blech. */ - bests = xfs_dir3_free_bests_p(mp, free); - xfs_dir3_free_hdr_from_disk(&freehdr, free); + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); if (be16_to_cpu(bests[findex]) != NULLDATAOFF && be16_to_cpu(bests[findex]) >= length) dbno = freehdr.firstdb + findex; @@ -1871,7 +1833,7 @@ xfs_dir2_node_addname_int( * Get the freespace block corresponding to the data block * that was just allocated. */ - fbno = xfs_dir2_db_to_fdb(mp, dbno); + fbno = dp->d_ops->db_to_fdb(mp, dbno); error = xfs_dir2_free_try_read(tp, dp, xfs_dir2_db_to_da(mp, fbno), &fbp); @@ -1888,12 +1850,12 @@ xfs_dir2_node_addname_int( if (error) return error; - if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { + if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) { xfs_alert(mp, "%s: dir ino %llu needed freesp block %lld for\n" " data block %lld, got %lld ifbno %llu lastfbno %d", __func__, (unsigned long long)dp->i_ino, - (long long)xfs_dir2_db_to_fdb(mp, dbno), + (long long)dp->d_ops->db_to_fdb(mp, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { @@ -1918,30 +1880,30 @@ xfs_dir2_node_addname_int( if (error) return error; free = fbp->b_addr; - bests = xfs_dir3_free_bests_p(mp, free); - xfs_dir3_free_hdr_from_disk(&freehdr, free); + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); /* * Remember the first slot as our empty slot. */ freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - xfs_dir3_free_max_bests(mp); + dp->d_ops->free_max_bests(mp); } else { free = fbp->b_addr; - bests = xfs_dir3_free_bests_p(mp, free); - xfs_dir3_free_hdr_from_disk(&freehdr, free); + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); } /* * Set the freespace block index from the data block number. */ - findex = xfs_dir2_db_to_fdindex(mp, dbno); + findex = dp->d_ops->db_to_fdindex(mp, dbno); /* * If it's after the end of the current entries in the * freespace block, extend that table. */ if (findex >= freehdr.nvalid) { - ASSERT(findex < xfs_dir3_free_max_bests(mp)); + ASSERT(findex < dp->d_ops->free_max_bests(mp)); freehdr.nvalid = findex + 1; /* * Tag new entry so nused will go up. @@ -1954,8 +1916,8 @@ xfs_dir2_node_addname_int( */ if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { freehdr.nused++; - xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(tp, fbp); + dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_header(tp, dp, fbp); } /* * Update the real value in the table. @@ -1963,7 +1925,7 @@ xfs_dir2_node_addname_int( * change again. */ hdr = dbp->b_addr; - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); bests[findex] = bf[0].length; logfree = 1; } @@ -1985,7 +1947,7 @@ xfs_dir2_node_addname_int( if (error) return error; hdr = dbp->b_addr; - bf = xfs_dir3_data_bestfree_p(hdr); + bf = dp->d_ops->data_bestfree_p(hdr); logfree = 0; } ASSERT(be16_to_cpu(bf[0].length) >= length); @@ -1998,7 +1960,7 @@ xfs_dir2_node_addname_int( /* * Mark the first part of the unused space, inuse for us. */ - xfs_dir2_data_use_free(tp, dbp, dup, + xfs_dir2_data_use_free(tp, dp, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -2008,24 +1970,24 @@ xfs_dir2_node_addname_int( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); - tagp = xfs_dir3_data_entry_tag_p(mp, dep); + dp->d_ops->data_put_ftype(dep, args->filetype); + tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dbp, dep); + xfs_dir2_data_log_entry(tp, dp, dbp, dep); /* * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(mp, hdr, &needlog); + xfs_dir2_data_freescan(dp, hdr, &needlog); /* * Log the data block header if needed. */ if (needlog) - xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_log_header(tp, dp, dbp); /* * If the freespace entry is now wrong, update it. */ - bests = xfs_dir3_free_bests_p(mp, free); /* gcc is so stupid */ + bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { bests[findex] = bf[0].length; logfree = 1; @@ -2034,7 +1996,7 @@ xfs_dir2_node_addname_int( * Log the freespace entry if needed. */ if (logfree) - xfs_dir2_free_log_bests(tp, fbp, findex, findex); + xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); /* * Return the data block and offset in args, then drop the data block. */ @@ -2212,7 +2174,7 @@ xfs_dir2_node_replace( blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); leaf = blk->bp->b_addr; - ents = xfs_dir3_leaf_ents_p(leaf); + ents = args->dp->d_ops->leaf_ents_p(leaf); lep = &ents[blk->index]; ASSERT(state->extravalid); /* @@ -2229,8 +2191,9 @@ xfs_dir2_node_replace( * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); - xfs_dir3_dirent_put_ftype(state->mp, dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep); + args->dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir2_data_log_entry(args->trans, args->dp, + state->extrablk.bp, dep); rval = 0; } /* @@ -2285,7 +2248,7 @@ xfs_dir2_node_trim_free( if (!bp) return 0; free = bp->b_addr; - xfs_dir3_free_hdr_from_disk(&freehdr, free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); /* * If there are used entries, there's nothing to do. diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 1bad84c40829..8b9d2281f85b 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h @@ -59,7 +59,8 @@ extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, extern struct xfs_dir2_data_free * xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_data_unused *dup, int *loghead); + struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, + int *loghead); extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, struct xfs_buf **bpp); @@ -76,9 +77,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, struct xfs_buf **bpp, __uint16_t magic); -extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, - int first, int last); -extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, +extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, int first, int last); +extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_buf *bp); extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); @@ -93,21 +94,18 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, int lowstale, int highstale, int *lfloglow, int *lfloghigh); extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from); -extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, +extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_buf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, + struct xfs_buf *bp, int *count); extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, struct xfs_da_args *args, int *indexp, struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp, +extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp, struct xfs_buf *leaf2_bp); extern int xfs_dir2_leafn_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 8f84153e98a8..c4e50c6ed584 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -18,23 +18,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_bmap.h" +#include "xfs_trans.h" +#include "xfs_dinode.h" /* * Directory file type support functions @@ -119,9 +119,9 @@ xfs_dir2_sf_getdents( * mp->m_dirdatablk. */ dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - xfs_dir3_data_dot_offset(mp)); + dp->d_ops->data_dot_offset); dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - xfs_dir3_data_dotdot_offset(mp)); + dp->d_ops->data_dotdot_offset); /* * Put . entry unless we're starting past it. @@ -136,7 +136,7 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (ctx->pos <= dotdot_offset) { - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = dp->d_ops->sf_get_parent_ino(sfp); ctx->pos = dotdot_offset & 0x7fffffff; if (!dir_emit(ctx, "..", 2, ino, DT_DIR)) return 0; @@ -153,17 +153,17 @@ xfs_dir2_sf_getdents( xfs_dir2_sf_get_offset(sfep)); if (ctx->pos > off) { - sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); continue; } - ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); - filetype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); + ino = dp->d_ops->sf_get_ino(sfp, sfep); + filetype = dp->d_ops->sf_get_ftype(sfep); ctx->pos = off & 0x7fffffff; if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, xfs_dir3_get_dtype(mp, filetype))) return 0; - sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); } ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & @@ -213,7 +213,7 @@ xfs_dir2_block_getdents( * Set up values for the loop. */ btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)xfs_dir3_data_entry_p(hdr); + ptr = (char *)dp->d_ops->data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); /* @@ -237,7 +237,7 @@ xfs_dir2_block_getdents( /* * Bump pointer for the next iteration. */ - ptr += xfs_dir3_data_entsize(mp, dep->namelen); + ptr += dp->d_ops->data_entsize(dep->namelen); /* * The entry is before the desired starting point, skip it. */ @@ -248,7 +248,7 @@ xfs_dir2_block_getdents( (char *)dep - (char *)hdr); ctx->pos = cook & 0x7fffffff; - filetype = xfs_dir3_dirent_get_ftype(mp, dep); + filetype = dp->d_ops->data_get_ftype(dep); /* * If it didn't fit, set the final offset to here & return. */ @@ -578,13 +578,13 @@ xfs_dir2_leaf_getdents( /* * Find our position in the block. */ - ptr = (char *)xfs_dir3_data_entry_p(hdr); + ptr = (char *)dp->d_ops->data_entry_p(hdr); byteoff = xfs_dir2_byte_to_off(mp, curoff); /* * Skip past the header. */ if (byteoff == 0) - curoff += xfs_dir3_data_entry_offset(hdr); + curoff += dp->d_ops->data_entry_offset; /* * Skip past entries until we reach our offset. */ @@ -601,7 +601,7 @@ xfs_dir2_leaf_getdents( } dep = (xfs_dir2_data_entry_t *)ptr; length = - xfs_dir3_data_entsize(mp, dep->namelen); + dp->d_ops->data_entsize(dep->namelen); ptr += length; } /* @@ -632,8 +632,8 @@ xfs_dir2_leaf_getdents( } dep = (xfs_dir2_data_entry_t *)ptr; - length = xfs_dir3_data_entsize(mp, dep->namelen); - filetype = xfs_dir3_dirent_get_ftype(mp, dep); + length = dp->d_ops->data_entsize(dep->namelen); + filetype = dp->d_ops->data_get_ftype(dep); ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; if (!dir_emit(ctx, (char *)dep->name, dep->namelen, diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 3ef6d402084c..aafc6e46cb58 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -17,22 +17,22 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_error.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_trace.h" +#include "xfs_dinode.h" /* * Prototypes for internal functions. @@ -57,89 +57,6 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args); #endif /* XFS_BIG_INUMS */ /* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide. These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *from) -{ - if (hdr->i8count) - return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; - else - return get_unaligned_be32(&from->i4.i); -} - -static void -xfs_dir2_sf_put_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *to, - xfs_ino_t ino) -{ - ASSERT((ino & 0xff00000000000000ULL) == 0); - - if (hdr->i8count) - put_unaligned_be64(ino, &to->i8.i); - else - put_unaligned_be32(ino, &to->i4.i); -} - -xfs_ino_t -xfs_dir2_sf_get_parent_ino( - struct xfs_dir2_sf_hdr *hdr) -{ - return xfs_dir2_sf_get_ino(hdr, &hdr->parent); -} - -void -xfs_dir2_sf_put_parent_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. If the entry stores a filetype value, then it - * sits between the name and the inode number. Hence the inode numbers may only - * be accessed through the helpers below. - */ -static xfs_dir2_inou_t * -xfs_dir3_sfe_inop( - struct xfs_mount *mp, - struct xfs_dir2_sf_entry *sfep) -{ - __uint8_t *ptr = &sfep->name[sfep->namelen]; - if (xfs_sb_version_hasftype(&mp->m_sb)) - ptr++; - return (xfs_dir2_inou_t *)ptr; -} - -xfs_ino_t -xfs_dir3_sfe_get_ino( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, xfs_dir3_sfe_inop(mp, sfep)); -} - -void -xfs_dir3_sfe_put_ino( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, xfs_dir3_sfe_inop(mp, sfep), ino); -} - -/* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the * space currently present in the inode. If it won't fit, the output @@ -226,7 +143,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - xfs_dir2_sf_put_parent_ino(sfhp, parent); + dp->d_ops->sf_put_parent_ino(sfhp, parent); return size; } @@ -293,7 +210,7 @@ xfs_dir2_block_to_sf( * Set up to loop over the block's entries. */ btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)xfs_dir3_data_entry_p(hdr); + ptr = (char *)dp->d_ops->data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); /* @@ -321,7 +238,7 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - xfs_dir2_sf_get_parent_ino(sfp)); + dp->d_ops->sf_get_parent_ino(sfp)); /* * Normal entry, copy it into shortform. */ @@ -331,14 +248,14 @@ xfs_dir2_block_to_sf( (xfs_dir2_data_aoff_t) ((char *)dep - (char *)hdr)); memcpy(sfep->name, dep->name, dep->namelen); - xfs_dir3_sfe_put_ino(mp, sfp, sfep, - be64_to_cpu(dep->inumber)); - xfs_dir3_sfe_put_ftype(mp, sfp, sfep, - xfs_dir3_dirent_get_ftype(mp, dep)); + dp->d_ops->sf_put_ino(sfp, sfep, + be64_to_cpu(dep->inumber)); + dp->d_ops->sf_put_ftype(sfep, + dp->d_ops->data_get_ftype(dep)); - sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); } - ptr += xfs_dir3_data_entsize(mp, dep->namelen); + ptr += dp->d_ops->data_entsize(dep->namelen); } ASSERT((char *)sfep - (char *)sfp == size); xfs_dir2_sf_check(args); @@ -389,7 +306,7 @@ xfs_dir2_sf_addname( /* * Compute entry (and change in) size. */ - add_entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen); + add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS @@ -483,8 +400,7 @@ xfs_dir2_sf_addname_easy( /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, - xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen), + xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. @@ -497,8 +413,8 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, args->inumber); - xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, args->filetype); + dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); + dp->d_ops->sf_put_ftype(sfep, args->filetype); /* * Update the header and inode. @@ -557,13 +473,13 @@ xfs_dir2_sf_addname_hard( * to insert the new entry. * If it's going to end up at the end then oldsfep will point there. */ - for (offset = xfs_dir3_data_first_offset(mp), + for (offset = dp->d_ops->data_first_offset, oldsfep = xfs_dir2_sf_firstentry(oldsfp), - add_datasize = xfs_dir3_data_entsize(mp, args->namelen), + add_datasize = dp->d_ops->data_entsize(args->namelen), eof = (char *)oldsfep == &buf[old_isize]; !eof; - offset = new_offset + xfs_dir3_data_entsize(mp, oldsfep->namelen), - oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep), + offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), + oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep), eof = (char *)oldsfep == &buf[old_isize]) { new_offset = xfs_dir2_sf_get_offset(oldsfep); if (offset + add_datasize <= new_offset) @@ -592,8 +508,8 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - xfs_dir3_sfe_put_ino(mp, sfp, sfep, args->inumber); - xfs_dir3_sfe_put_ftype(mp, sfp, sfep, args->filetype); + dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); + dp->d_ops->sf_put_ftype(sfep, args->filetype); sfp->count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) @@ -603,7 +519,7 @@ xfs_dir2_sf_addname_hard( * If there's more left to copy, do that. */ if (!eof) { - sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); memcpy(sfep, oldsfep, old_isize - nbytes); } kmem_free(buf); @@ -639,8 +555,8 @@ xfs_dir2_sf_addname_pick( mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - size = xfs_dir3_data_entsize(mp, args->namelen); - offset = xfs_dir3_data_first_offset(mp); + size = dp->d_ops->data_entsize(args->namelen); + offset = dp->d_ops->data_first_offset; sfep = xfs_dir2_sf_firstentry(sfp); holefit = 0; /* @@ -652,8 +568,8 @@ xfs_dir2_sf_addname_pick( if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + - xfs_dir3_data_entsize(mp, sfep->namelen); - sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); + dp->d_ops->data_entsize(sfep->namelen); + sfep = dp->d_ops->sf_nextentry(sfp, sfep); } /* * Calculate data bytes used excluding the new entry, if this @@ -713,21 +629,20 @@ xfs_dir2_sf_check( mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - offset = xfs_dir3_data_first_offset(mp); - ino = xfs_dir2_sf_get_parent_ino(sfp); + offset = dp->d_ops->data_first_offset; + ino = dp->d_ops->sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep)) { + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); + ino = dp->d_ops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + - xfs_dir3_data_entsize(mp, sfep->namelen); - ASSERT(xfs_dir3_sfe_get_ftype(mp, sfp, sfep) < - XFS_DIR3_FT_MAX); + dp->d_ops->data_entsize(sfep->namelen); + ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); } ASSERT(i8count == sfp->i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); @@ -783,7 +698,7 @@ xfs_dir2_sf_create( /* * Now can put in the inode number, since i8count is set. */ - xfs_dir2_sf_put_parent_ino(sfp, pino); + dp->d_ops->sf_put_parent_ino(sfp, pino); sfp->count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); @@ -838,7 +753,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = xfs_dir2_sf_get_parent_ino(sfp); + args->inumber = dp->d_ops->sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; return XFS_ERROR(EEXIST); @@ -848,7 +763,7 @@ xfs_dir2_sf_lookup( */ ci_sfep = NULL; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode * number. If it's the first case-insensitive match, store the @@ -858,10 +773,8 @@ xfs_dir2_sf_lookup( sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = xfs_dir3_sfe_get_ino(dp->i_mount, - sfp, sfep); - args->filetype = xfs_dir3_sfe_get_ftype(dp->i_mount, - sfp, sfep); + args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); + args->filetype = dp->d_ops->sf_get_ftype(sfep); if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); ci_sfep = sfep; @@ -917,10 +830,10 @@ xfs_dir2_sf_removename( * Find the one we're deleting. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(xfs_dir3_sfe_get_ino(dp->i_mount, sfp, sfep) == + ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) == args->inumber); break; } @@ -934,7 +847,7 @@ xfs_dir2_sf_removename( * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen); + entsize = dp->d_ops->sf_entsize(sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -1041,28 +954,25 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir2_sf_get_parent_ino(sfp); + ino = dp->d_ops->sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); #endif - xfs_dir2_sf_put_parent_ino(sfp, args->inumber); + dp->d_ops->sf_put_parent_ino(sfp, args->inumber); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = xfs_dir3_sfe_get_ino(dp->i_mount, - sfp, sfep); + ino = dp->d_ops->sf_get_ino(sfp, sfep); ASSERT(args->inumber != ino); #endif - xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, - args->inumber); - xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, - args->filetype); + dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); + dp->d_ops->sf_put_ftype(sfep, args->filetype); break; } } @@ -1165,22 +1075,21 @@ xfs_dir2_sf_toino4( */ sfp->count = oldsfp->count; sfp->i8count = 0; - xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); + dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->count; - i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep), - oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) { + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), + oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - xfs_dir3_sfe_put_ino(mp, sfp, sfep, - xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep)); - xfs_dir3_sfe_put_ftype(mp, sfp, sfep, - xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep)); + dp->d_ops->sf_put_ino(sfp, sfep, + dp->d_ops->sf_get_ino(oldsfp, oldsfep)); + dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); } /* * Clean up the inode. @@ -1244,22 +1153,21 @@ xfs_dir2_sf_toino8( */ sfp->count = oldsfp->count; sfp->i8count = 1; - xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); + dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->count; - i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep), - oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) { + i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), + oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - xfs_dir3_sfe_put_ino(mp, sfp, sfep, - xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep)); - xfs_dir3_sfe_put_ftype(mp, sfp, sfep, - xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep)); + dp->d_ops->sf_put_ino(sfp, sfep, + dp->d_ops->sf_get_ino(oldsfp, oldsfep)); + dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); } /* * Clean up the inode. diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 45560ee1a4ba..8367d6dc18c9 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -17,22 +17,21 @@ */ #include "xfs.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_quota.h" -#include "xfs_alloc_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" #include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_discard.h" #include "xfs_trace.h" +#include "xfs_log.h" STATIC int xfs_trim_extents( diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 1ee776d477c3..6b1e695caf0e 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -18,28 +18,28 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" #include "xfs_cksum.h" #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_bmap_btree.h" /* * Lock order: @@ -292,118 +292,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; } -STATIC bool -xfs_dquot_buf_verify_crc( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - int ndquots; - int i; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return true; - - /* - * if we are in log recovery, the quota subsystem has not been - * initialised so we have no quotainfo structure. In that case, we need - * to manually calculate the number of dquots in the buffer. - */ - if (mp->m_quotainfo) - ndquots = mp->m_quotainfo->qi_dqperchunk; - else - ndquots = xfs_qm_calc_dquots_per_chunk(mp, - XFS_BB_TO_FSB(mp, bp->b_length)); - - for (i = 0; i < ndquots; i++, d++) { - if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), - XFS_DQUOT_CRC_OFF)) - return false; - if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) - return false; - } - return true; -} - -STATIC bool -xfs_dquot_buf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - xfs_dqid_t id = 0; - int ndquots; - int i; - - /* - * if we are in log recovery, the quota subsystem has not been - * initialised so we have no quotainfo structure. In that case, we need - * to manually calculate the number of dquots in the buffer. - */ - if (mp->m_quotainfo) - ndquots = mp->m_quotainfo->qi_dqperchunk; - else - ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length); - - /* - * On the first read of the buffer, verify that each dquot is valid. - * We don't know what the id of the dquot is supposed to be, just that - * they should be increasing monotonically within the buffer. If the - * first id is corrupt, then it will fail on the second dquot in the - * buffer so corruptions could point to the wrong dquot in this case. - */ - for (i = 0; i < ndquots; i++) { - struct xfs_disk_dquot *ddq; - int error; - - ddq = &d[i].dd_diskdq; - - if (i == 0) - id = be32_to_cpu(ddq->d_id); - - error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, - "xfs_dquot_buf_verify"); - if (error) - return false; - } - return true; -} - -static void -xfs_dquot_buf_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } -} - -/* - * we don't calculate the CRC here as that is done when the dquot is flushed to - * the buffer after the update is done. This ensures that the dquot in the - * buffer always has an up-to-date CRC value. - */ -void -xfs_dquot_buf_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - return; - } -} - -const struct xfs_buf_ops xfs_dquot_buf_ops = { - .verify_read = xfs_dquot_buf_read_verify, - .verify_write = xfs_dquot_buf_write_verify, -}; - /* * Allocate a block and fill it with dquots. * This is called when the bmapi finds a hole. @@ -514,6 +402,7 @@ xfs_qm_dqalloc( return (error); } + STATIC int xfs_qm_dqrepair( struct xfs_mount *mp, @@ -547,7 +436,7 @@ xfs_qm_dqrepair( /* Do the actual repair of dquots in this buffer */ for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { ddq = &d[i].dd_diskdq; - error = xfs_qm_dqcheck(mp, ddq, firstid + i, + error = xfs_dqcheck(mp, ddq, firstid + i, dqp->dq_flags & XFS_DQ_ALLTYPES, XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); if (error) { @@ -1133,7 +1022,7 @@ xfs_qm_dqflush( /* * A simple sanity check in case we got a corrupted dquot.. */ - error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)"); if (error) { xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 55abbca2883d..d22ed0053c32 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -172,6 +172,4 @@ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) return dqp; } -extern const struct xfs_buf_ops xfs_dquot_buf_ops; - #endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c new file mode 100644 index 000000000000..d401457d2f25 --- /dev/null +++ b/fs/xfs/xfs_dquot_buf.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_qm.h" +#include "xfs_error.h" +#include "xfs_cksum.h" +#include "xfs_trace.h" + +int +xfs_calc_dquots_per_chunk( + struct xfs_mount *mp, + unsigned int nbblks) /* basic block units */ +{ + unsigned int ndquots; + + ASSERT(nbblks > 0); + ndquots = BBTOB(nbblks); + do_div(ndquots, sizeof(xfs_dqblk_t)); + + return ndquots; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dqcheck( + struct xfs_mount *mp, + xfs_disk_dquot_t *ddq, + xfs_dqid_t id, + uint type, /* used only when IO_dorepair is true */ + uint flags, + char *str) +{ + xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; + int errs = 0; + + /* + * We can encounter an uninitialized dquot buffer for 2 reasons: + * 1. If we crash while deleting the quotainode(s), and those blks got + * used for user data. This is because we take the path of regular + * file deletion; however, the size field of quotainodes is never + * updated, so all the tricks that we play in itruncate_finish + * don't quite matter. + * + * 2. We don't play the quota buffers when there's a quotaoff logitem. + * But the allocation will be replayed so we'll end up with an + * uninitialized quota block. + * + * This is all fine; things are still consistent, and we haven't lost + * any quota information. Just don't complain about bad dquot blks. + */ + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", + str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); + errs++; + } + if (ddq->d_version != XFS_DQUOT_VERSION) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", + str, id, ddq->d_version, XFS_DQUOT_VERSION); + errs++; + } + + if (ddq->d_flags != XFS_DQ_USER && + ddq->d_flags != XFS_DQ_PROJ && + ddq->d_flags != XFS_DQ_GROUP) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, unknown flags 0x%x", + str, id, ddq->d_flags); + errs++; + } + + if (id != -1 && id != be32_to_cpu(ddq->d_id)) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : ondisk-dquot 0x%p, ID mismatch: " + "0x%x expected, found id 0x%x", + str, ddq, id, be32_to_cpu(ddq->d_id)); + errs++; + } + + if (!errs && ddq->d_id) { + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > + be64_to_cpu(ddq->d_blk_softlimit)) { + if (!ddq->d_btimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > + be64_to_cpu(ddq->d_ino_softlimit)) { + if (!ddq->d_itimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > + be64_to_cpu(ddq->d_rtb_softlimit)) { + if (!ddq->d_rtbtimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + } + + if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) + return errs; + + if (flags & XFS_QMOPT_DOWARN) + xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); + + /* + * Typically, a repair is only requested by quotacheck. + */ + ASSERT(id != -1); + ASSERT(flags & XFS_QMOPT_DQREPAIR); + memset(d, 0, sizeof(xfs_dqblk_t)); + + d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); + d->dd_diskdq.d_version = XFS_DQUOT_VERSION; + d->dd_diskdq.d_flags = type; + d->dd_diskdq.d_id = cpu_to_be32(id); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } + + return errs; +} + +STATIC bool +xfs_dquot_buf_verify_crc( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + int ndquots; + int i; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return true; + + /* + * if we are in log recovery, the quota subsystem has not been + * initialised so we have no quotainfo structure. In that case, we need + * to manually calculate the number of dquots in the buffer. + */ + if (mp->m_quotainfo) + ndquots = mp->m_quotainfo->qi_dqperchunk; + else + ndquots = xfs_calc_dquots_per_chunk(mp, + XFS_BB_TO_FSB(mp, bp->b_length)); + + for (i = 0; i < ndquots; i++, d++) { + if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF)) + return false; + if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) + return false; + } + return true; +} + +STATIC bool +xfs_dquot_buf_verify( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_dqid_t id = 0; + int ndquots; + int i; + + /* + * if we are in log recovery, the quota subsystem has not been + * initialised so we have no quotainfo structure. In that case, we need + * to manually calculate the number of dquots in the buffer. + */ + if (mp->m_quotainfo) + ndquots = mp->m_quotainfo->qi_dqperchunk; + else + ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length); + + /* + * On the first read of the buffer, verify that each dquot is valid. + * We don't know what the id of the dquot is supposed to be, just that + * they should be increasing monotonically within the buffer. If the + * first id is corrupt, then it will fail on the second dquot in the + * buffer so corruptions could point to the wrong dquot in this case. + */ + for (i = 0; i < ndquots; i++) { + struct xfs_disk_dquot *ddq; + int error; + + ddq = &d[i].dd_diskdq; + + if (i == 0) + id = be32_to_cpu(ddq->d_id); + + error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, + "xfs_dquot_buf_verify"); + if (error) + return false; + } + return true; +} + +static void +xfs_dquot_buf_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +/* + * we don't calculate the CRC here as that is done when the dquot is flushed to + * the buffer after the update is done. This ensures that the dquot in the + * buffer always has an up-to-date CRC value. + */ +static void +xfs_dquot_buf_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify(mp, bp)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + return; + } +} + +const struct xfs_buf_ops xfs_dquot_buf_ops = { + .verify_read = xfs_dquot_buf_read_verify, + .verify_write = xfs_dquot_buf_write_verify, +}; + diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index e838d84b4e85..92e5f62eefc6 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -18,23 +18,19 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_quota.h" #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" +#include "xfs_log.h" static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) { diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 1123d93ff795..9995b807d627 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -16,16 +16,13 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" +#include "xfs_format.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" #include "xfs_error.h" #ifdef DEBUG @@ -159,7 +156,7 @@ xfs_error_report( { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, - "Internal error %s at line %d of file %s. Caller 0x%p\n", + "Internal error %s at line %d of file %s. Caller 0x%p", tag, linenum, filename, ra); xfs_stack_trace(); diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 066df425c14f..1399e187d425 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -16,21 +16,21 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" +#include "xfs_da_format.h" #include "xfs_dir2.h" #include "xfs_export.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_log.h" /* * Note that we only accept fileids which are long enough rather than allow diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index e43708e2f080..fd22f69049d4 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -19,17 +19,18 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_alloc.h" -#include "xfs_inode.h" #include "xfs_extent_busy.h" #include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_log.h" void xfs_extent_busy_insert( diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 985412d65ba5..bfff284d2dcc 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -20,6 +20,10 @@ #ifndef __XFS_EXTENT_BUSY_H__ #define __XFS_EXTENT_BUSY_H__ +struct xfs_mount; +struct xfs_trans; +struct xfs_alloc_arg; + /* * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that * have been freed but whose transactions aren't committed to disk yet. diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index dc53e8febbbe..3680d04f973f 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -17,14 +17,14 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" +#include "xfs_buf_item.h" #include "xfs_extfree_item.h" diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4c749ab543d0..52c91e143725 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -17,25 +17,27 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_trans.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_ioctl.h" #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_dinode.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -805,44 +807,64 @@ out: STATIC long xfs_file_fallocate( - struct file *file, - int mode, - loff_t offset, - loff_t len) + struct file *file, + int mode, + loff_t offset, + loff_t len) { - struct inode *inode = file_inode(file); - long error; - loff_t new_size = 0; - xfs_flock64_t bf; - xfs_inode_t *ip = XFS_I(inode); - int cmd = XFS_IOC_RESVSP; - int attr_flags = XFS_ATTR_NOLOCK; + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_trans *tp; + long error; + loff_t new_size = 0; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (mode & FALLOC_FL_PUNCH_HOLE) { + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out_unlock; + } else { + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = -inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } - if (mode & FALLOC_FL_PUNCH_HOLE) - cmd = XFS_IOC_UNRESVSP; - - /* check the new inode size is valid before allocating */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = inode_newsize_ok(inode, new_size); + error = xfs_alloc_file_space(ip, offset, len, + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } - if (file->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; + tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); + error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + goto out_unlock; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + ip->i_d.di_mode &= ~S_ISUID; + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + + if (!(mode & FALLOC_FL_PUNCH_HOLE)) + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + if (file->f_flags & O_DSYNC) + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); if (error) goto out_unlock; @@ -852,12 +874,12 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); + error = xfs_setattr_size(ip, &iattr); } out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; + return -error; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index ce78e654d37b..12b6e7701985 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -16,19 +16,19 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_log.h" -#include "xfs_bmap_btree.h" -#include "xfs_inum.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_ag.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_inum.h" +#include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_alloc.h" #include "xfs_mru_cache.h" +#include "xfs_dinode.h" #include "xfs_filestream.h" #include "xfs_trace.h" diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index 35c08ff54ca0..b6ab5a3cfa12 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h @@ -156,14 +156,259 @@ struct xfs_dsymlink_hdr { ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ sizeof(struct xfs_dsymlink_hdr) : 0)) -int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); -int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, - uint32_t size, struct xfs_buf *bp); -bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, - uint32_t size, struct xfs_buf *bp); -void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_inode *ip, struct xfs_ifork *ifp); - -extern const struct xfs_buf_ops xfs_symlink_buf_ops; + +/* + * Allocation Btree format definitions + * + * There are two on-disk btrees, one sorted by blockno and one sorted + * by blockcount and blockno. All blocks look the same to make the code + * simpler; if we have time later, we'll make the optimizations. + */ +#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ +#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */ +#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ +#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */ + +/* + * Data record/key structure + */ +typedef struct xfs_alloc_rec { + __be32 ar_startblock; /* starting block number */ + __be32 ar_blockcount; /* count of free blocks */ +} xfs_alloc_rec_t, xfs_alloc_key_t; + +typedef struct xfs_alloc_rec_incore { + xfs_agblock_t ar_startblock; /* starting block number */ + xfs_extlen_t ar_blockcount; /* count of free blocks */ +} xfs_alloc_rec_incore_t; + +/* btree pointer type */ +typedef __be32 xfs_alloc_ptr_t; + +/* + * Block numbers in the AG: + * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. + */ +#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) +#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) + + +/* + * Inode Allocation Btree format definitions + * + * There is a btree for the inode map per allocation group. + */ +#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ +#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ + +typedef __uint64_t xfs_inofree_t; +#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) +#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) +#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) +#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) + +static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) +{ + return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; +} + +/* + * Data record structure + */ +typedef struct xfs_inobt_rec { + __be32 ir_startino; /* starting inode number */ + __be32 ir_freecount; /* count of free inodes (set bits) */ + __be64 ir_free; /* free inode mask */ +} xfs_inobt_rec_t; + +typedef struct xfs_inobt_rec_incore { + xfs_agino_t ir_startino; /* starting inode number */ + __int32_t ir_freecount; /* count of free inodes (set bits) */ + xfs_inofree_t ir_free; /* free inode mask */ +} xfs_inobt_rec_incore_t; + + +/* + * Key structure + */ +typedef struct xfs_inobt_key { + __be32 ir_startino; /* starting inode number */ +} xfs_inobt_key_t; + +/* btree pointer type */ +typedef __be32 xfs_inobt_ptr_t; + +/* + * block numbers in the AG. + */ +#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) +#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) + + + +/* + * BMAP Btree format definitions + * + * This includes both the root block definition that sits inside an inode fork + * and the record/pointer formats for the leaf/node in the blocks. + */ +#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ +#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */ + +/* + * Bmap root header, on-disk form only. + */ +typedef struct xfs_bmdr_block { + __be16 bb_level; /* 0 is a leaf */ + __be16 bb_numrecs; /* current # of data records */ +} xfs_bmdr_block_t; + +/* + * Bmap btree record and extent descriptor. + * l0:63 is an extent flag (value 1 indicates non-normal). + * l0:9-62 are startoff. + * l0:0-8 and l1:21-63 are startblock. + * l1:0-20 are blockcount. + */ +#define BMBT_EXNTFLAG_BITLEN 1 +#define BMBT_STARTOFF_BITLEN 54 +#define BMBT_STARTBLOCK_BITLEN 52 +#define BMBT_BLOCKCOUNT_BITLEN 21 + +typedef struct xfs_bmbt_rec { + __be64 l0, l1; +} xfs_bmbt_rec_t; + +typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ +typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; + +typedef struct xfs_bmbt_rec_host { + __uint64_t l0, l1; +} xfs_bmbt_rec_host_t; + +/* + * Values and macros for delayed-allocation startblock fields. + */ +#define STARTBLOCKVALBITS 17 +#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) +#define DSTARTBLOCKMASKBITS (15 + 20) +#define STARTBLOCKMASK \ + (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) +#define DSTARTBLOCKMASK \ + (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) + +static inline int isnullstartblock(xfs_fsblock_t x) +{ + return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; +} + +static inline int isnulldstartblock(xfs_dfsbno_t x) +{ + return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; +} + +static inline xfs_fsblock_t nullstartblock(int k) +{ + ASSERT(k < (1 << STARTBLOCKVALBITS)); + return STARTBLOCKMASK | (k); +} + +static inline xfs_filblks_t startblockval(xfs_fsblock_t x) +{ + return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); +} + +/* + * Possible extent formats. + */ +typedef enum { + XFS_EXTFMT_NOSTATE = 0, + XFS_EXTFMT_HASSTATE +} xfs_exntfmt_t; + +/* + * Possible extent states. + */ +typedef enum { + XFS_EXT_NORM, XFS_EXT_UNWRITTEN, + XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID +} xfs_exntst_t; + +/* + * Incore version of above. + */ +typedef struct xfs_bmbt_irec +{ + xfs_fileoff_t br_startoff; /* starting file offset */ + xfs_fsblock_t br_startblock; /* starting block number */ + xfs_filblks_t br_blockcount; /* number of blocks */ + xfs_exntst_t br_state; /* extent state */ +} xfs_bmbt_irec_t; + +/* + * Key structure for non-leaf levels of the tree. + */ +typedef struct xfs_bmbt_key { + __be64 br_startoff; /* starting file offset */ +} xfs_bmbt_key_t, xfs_bmdr_key_t; + +/* btree pointer type */ +typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; + + +/* + * Generic Btree block format definitions + * + * This is a combination of the actual format used on disk for short and long + * format btrees. The first three fields are shared by both format, but the + * pointers are different and should be used with care. + * + * To get the size of the actual short or long form headers please use the size + * macros below. Never use sizeof(xfs_btree_block). + * + * The blkno, crc, lsn, owner and uuid fields are only available in filesystems + * with the crc feature bit, and all accesses to them must be conditional on + * that flag. + */ +struct xfs_btree_block { + __be32 bb_magic; /* magic number for block type */ + __be16 bb_level; /* 0 is a leaf */ + __be16 bb_numrecs; /* current # of data records */ + union { + struct { + __be32 bb_leftsib; + __be32 bb_rightsib; + + __be64 bb_blkno; + __be64 bb_lsn; + uuid_t bb_uuid; + __be32 bb_owner; + __le32 bb_crc; + } s; /* short form pointers */ + struct { + __be64 bb_leftsib; + __be64 bb_rightsib; + + __be64 bb_blkno; + __be64 bb_lsn; + uuid_t bb_uuid; + __be64 bb_owner; + __le32 bb_crc; + __be32 bb_pad; /* padding for alignment */ + } l; /* long form pointers */ + } bb_u; /* rest */ +}; + +#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ +#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ + +/* sizes of CRC enabled btree blocks */ +#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40) +#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48) + +#define XFS_BTREE_SBLOCK_CRC_OFF \ + offsetof(struct xfs_btree_block, bb_u.s.bb_crc) +#define XFS_BTREE_LBLOCK_CRC_OFF \ + offsetof(struct xfs_btree_block, bb_u.l.bb_crc) #endif /* __XFS_FORMAT_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 18272c766a50..c5fc116dfaa3 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -233,11 +233,11 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ -#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ +#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ #define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ - +#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index e64ee5288b86..a6e54b3319bd 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -17,28 +17,29 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_btree.h" #include "xfs_error.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_fsops.h" #include "xfs_itable.h" #include "xfs_trans_space.h" #include "xfs_rtalloc.h" -#include "xfs_filestream.h" #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h" /* * File system operations @@ -101,7 +102,9 @@ xfs_fs_geometry( (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | (xfs_sb_version_hascrc(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_V5SB : 0); + XFS_FSOP_GEOM_FLAGS_V5SB : 0) | + (xfs_sb_version_hasftype(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_FTYPE : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; @@ -153,7 +156,7 @@ xfs_growfs_data_private( xfs_buf_t *bp; int bucket; int dpct; - int error; + int error, saved_error = 0; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; xfs_rfsblock_t nb, nb_mod; @@ -496,29 +499,33 @@ xfs_growfs_data_private( error = ENOMEM; } + /* + * If we get an error reading or writing alternate superblocks, + * continue. xfs_repair chooses the "best" superblock based + * on most matches; if we break early, we'll leave more + * superblocks un-updated than updated, and xfs_repair may + * pick them over the properly-updated primary. + */ if (error) { xfs_warn(mp, "error %d reading secondary superblock for ag %d", error, agno); - break; + saved_error = error; + continue; } xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); - /* - * If we get an error writing out the alternate superblocks, - * just issue a warning and continue. The real work is - * already done and committed. - */ error = xfs_bwrite(bp); xfs_buf_relse(bp); if (error) { xfs_warn(mp, "write error %d updating secondary superblock for ag %d", error, agno); - break; /* no point in continuing */ + saved_error = error; + continue; } } - return error; + return saved_error ? saved_error : error; error0: xfs_trans_cancel(tp, XFS_TRANS_ABORT); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index ccf2fb143962..e87719c5bebe 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -17,29 +17,30 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_cksum.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_icreate_item.h" #include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_trace.h" /* @@ -1627,8 +1628,9 @@ xfs_read_agi( { int error; - ASSERT(agno != NULLAGNUMBER); + trace_xfs_read_agi(mp, agno); + ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); @@ -1651,6 +1653,8 @@ xfs_ialloc_read_agi( struct xfs_perag *pag; /* per allocation group data */ int error; + trace_xfs_ialloc_read_agi(mp, agno); + error = xfs_read_agi(mp, tp, agno, bpp); if (error) return error; diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 68c07320f096..a8f76a5ff418 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -23,6 +23,7 @@ struct xfs_dinode; struct xfs_imap; struct xfs_mount; struct xfs_trans; +struct xfs_btree_cur; /* * Allocation parameters for inode allocation. @@ -42,7 +43,7 @@ struct xfs_trans; static inline struct xfs_dinode * xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) { - return (xfs_dinode_t *) + return (struct xfs_dinode *) (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog)); } @@ -158,6 +159,4 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen); -extern const struct xfs_buf_ops xfs_agi_buf_ops; - #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 5448eb6b8c12..c8fa5bbb36de 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -17,24 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_trans.h" STATIC int diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 3ac36b7642e9..f38b22011c4e 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -27,55 +27,6 @@ struct xfs_btree_cur; struct xfs_mount; /* - * There is a btree for the inode map per allocation group. - */ -#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ -#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ - -typedef __uint64_t xfs_inofree_t; -#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) -#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) -#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) -#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) - -static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) -{ - return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; -} - -/* - * Data record structure - */ -typedef struct xfs_inobt_rec { - __be32 ir_startino; /* starting inode number */ - __be32 ir_freecount; /* count of free inodes (set bits) */ - __be64 ir_free; /* free inode mask */ -} xfs_inobt_rec_t; - -typedef struct xfs_inobt_rec_incore { - xfs_agino_t ir_startino; /* starting inode number */ - __int32_t ir_freecount; /* count of free inodes (set bits) */ - xfs_inofree_t ir_free; /* free inode mask */ -} xfs_inobt_rec_incore_t; - - -/* - * Key structure - */ -typedef struct xfs_inobt_key { - __be32 ir_startino; /* starting inode number */ -} xfs_inobt_key_t; - -/* btree pointer type */ -typedef __be32 xfs_inobt_ptr_t; - -/* - * block numbers in the AG. - */ -#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) -#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) - -/* * Btree block header size depends on a superblock flag. */ #define XFS_INOBT_BLOCK_LEN(mp) \ @@ -110,6 +61,4 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); -extern const struct xfs_buf_ops xfs_inobt_buf_ops; - #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 474807a401c8..98d35244eecc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -18,24 +18,19 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_log_priv.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_dinode.h" #include "xfs_error.h" -#include "xfs_filestream.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_inode_item.h" #include "xfs_quota.h" #include "xfs_trace.h" -#include "xfs_fsops.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" @@ -500,11 +495,6 @@ xfs_inode_ag_walk_grab( if (!igrab(inode)) return ENOENT; - if (is_bad_inode(inode)) { - IRELE(ip); - return ENOENT; - } - /* inode is valid */ return 0; @@ -918,8 +908,6 @@ restart: xfs_iflock(ip); } - if (is_bad_inode(VFS_I(ip))) - goto reclaim; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip, false); diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 5a5a593994d4..d2eaccfa73f4 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -17,13 +17,14 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_icreate_item.h" diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e3d75385aa76..001aa893ed59 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -19,39 +19,38 @@ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_space.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_attr_sf.h" #include "xfs_attr.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" +#include "xfs_trans_space.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_error.h" #include "xfs_quota.h" +#include "xfs_dinode.h" #include "xfs_filestream.h" #include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" +#include "xfs_bmap_btree.h" kmem_zone_t *xfs_inode_zone; @@ -1663,6 +1662,126 @@ xfs_release( } /* + * xfs_inactive_truncate + * + * Called to perform a truncate when an inode becomes unlinked. + */ +STATIC int +xfs_inactive_truncate( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* + * Log the inode size first to prevent stale data exposure in the event + * of a system crash before the truncate completes. See the related + * comment in xfs_setattr_size() for details. + */ + ip->i_d.di_size = 0; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); + if (error) + goto error_trans_cancel; + + ASSERT(ip->i_d.di_nextents == 0); + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + goto error_unlock; + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; + +error_trans_cancel: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); +error_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + +/* + * xfs_inactive_ifree() + * + * Perform the inode free when an inode is unlinked. + */ +STATIC int +xfs_inactive_ifree( + struct xfs_inode *ip) +{ + xfs_bmap_free_t free_list; + xfs_fsblock_t first_block; + int committed; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + xfs_bmap_init(&free_list, &first_block); + error = xfs_ifree(tp, ip, &free_list); + if (error) { + /* + * If we fail to free the inode, shut down. The cancel + * might do that, we need to make sure. Otherwise the + * inode might be lost for a long time or forever. + */ + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_notice(mp, "%s: xfs_ifree returned error %d", + __func__, error); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + } + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + } + + /* + * Credit the quota account(s). The inode is gone. + */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); + + /* + * Just ignore errors at this point. There is nothing we can + * do except to try to keep going. Make sure it's not a silent + * error. + */ + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", + __func__, error); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + xfs_notice(mp, "%s: xfs_trans_commit returned error %d", + __func__, error); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +} + +/* * xfs_inactive * * This is called when the vnode reference count for the vnode @@ -1670,16 +1789,11 @@ xfs_release( * now be truncated. Also, we clear all of the read-ahead state * kept for the inode here since the file is now closed. */ -int +void xfs_inactive( xfs_inode_t *ip) { - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - int committed; - struct xfs_trans *tp; struct xfs_mount *mp; - struct xfs_trans_res *resp; int error; int truncate = 0; @@ -1687,19 +1801,17 @@ xfs_inactive( * If the inode is already free, then there can be nothing * to clean up here. */ - if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { + if (ip->i_d.di_mode == 0) { ASSERT(ip->i_df.if_real_bytes == 0); ASSERT(ip->i_df.if_broot_bytes == 0); - return VN_INACTIVE_CACHE; + return; } mp = ip->i_mount; - error = 0; - /* If this is a read-only mount, don't do this (would generate I/O) */ if (mp->m_flags & XFS_MOUNT_RDONLY) - goto out; + return; if (ip->i_d.di_nlink != 0) { /* @@ -1707,12 +1819,10 @@ xfs_inactive( * cache. Post-eof blocks must be freed, lest we end up with * broken free space accounting. */ - if (xfs_can_free_eofblocks(ip, true)) { - error = xfs_free_eofblocks(mp, ip, false); - if (error) - return VN_INACTIVE_CACHE; - } - goto out; + if (xfs_can_free_eofblocks(ip, true)) + xfs_free_eofblocks(mp, ip, false); + + return; } if (S_ISREG(ip->i_d.di_mode) && @@ -1722,36 +1832,14 @@ xfs_inactive( error = xfs_qm_dqattach(ip, 0); if (error) - return VN_INACTIVE_CACHE; + return; - tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ? - &M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree; - - error = xfs_trans_reserve(tp, resp, 0, 0); - if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); - return VN_INACTIVE_CACHE; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); - - if (S_ISLNK(ip->i_d.di_mode)) { - error = xfs_inactive_symlink(ip, &tp); - if (error) - goto out_cancel; - } else if (truncate) { - ip->i_d.di_size = 0; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); - if (error) - goto out_cancel; - - ASSERT(ip->i_d.di_nextents == 0); - } + if (S_ISLNK(ip->i_d.di_mode)) + error = xfs_inactive_symlink(ip); + else if (truncate) + error = xfs_inactive_truncate(ip); + if (error) + return; /* * If there are attributes associated with the file then blow them away @@ -1762,25 +1850,9 @@ xfs_inactive( if (ip->i_d.di_anextents > 0) { ASSERT(ip->i_d.di_forkoff != 0); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - goto out_unlock; - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - error = xfs_attr_inactive(ip); if (error) - goto out; - - tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); + return; } if (ip->i_afp) @@ -1791,52 +1863,14 @@ xfs_inactive( /* * Free the inode. */ - xfs_bmap_init(&free_list, &first_block); - error = xfs_ifree(tp, ip, &free_list); - if (error) { - /* - * If we fail to free the inode, shut down. The cancel - * might do that, we need to make sure. Otherwise the - * inode might be lost for a long time or forever. - */ - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_notice(mp, "%s: xfs_ifree returned error %d", - __func__, error); - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - } - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - } else { - /* - * Credit the quota account(s). The inode is gone. - */ - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); - - /* - * Just ignore errors at this point. There is nothing we can - * do except to try to keep going. Make sure it's not a silent - * error. - */ - error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) - xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", - __func__, error); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - xfs_notice(mp, "%s: xfs_trans_commit returned error %d", - __func__, error); - } + error = xfs_inactive_ifree(ip); + if (error) + return; /* * Release the dquots held by inode, if any. */ xfs_qm_dqdetach(ip); -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out: - return VN_INACTIVE_CACHE; -out_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - goto out_unlock; } /* @@ -2370,6 +2404,33 @@ xfs_iunpin_wait( __xfs_iunpin_wait(ip); } +/* + * Removing an inode from the namespace involves removing the directory entry + * and dropping the link count on the inode. Removing the directory entry can + * result in locking an AGF (directory blocks were freed) and removing a link + * count can result in placing the inode on an unlinked list which results in + * locking an AGI. + * + * The big problem here is that we have an ordering constraint on AGF and AGI + * locking - inode allocation locks the AGI, then can allocate a new extent for + * new inodes, locking the AGF after the AGI. Similarly, freeing the inode + * removes the inode from the unlinked list, requiring that we lock the AGI + * first, and then freeing the inode can result in an inode chunk being freed + * and hence freeing disk space requiring that we lock an AGF. + * + * Hence the ordering that is imposed by other parts of the code is AGI before + * AGF. This means we cannot remove the directory entry before we drop the inode + * reference count and put it on the unlinked list as this results in a lock + * order of AGF then AGI, and this can deadlock against inode allocation and + * freeing. Therefore we must drop the link counts before we remove the + * directory entry. + * + * This is still safe from a transactional point of view - it is not until we + * get to xfs_bmap_finish() that we have the possibility of multiple + * transactions in this operation. Hence as long as we remove the directory + * entry and drop the link count in the first transaction of the remove + * operation, there are no transactional constraints on the ordering here. + */ int xfs_remove( xfs_inode_t *dp, @@ -2439,6 +2500,7 @@ xfs_remove( /* * If we're removing a directory perform some additional validation. */ + cancel_flags |= XFS_TRANS_ABORT; if (is_dir) { ASSERT(ip->i_d.di_nlink >= 2); if (ip->i_d.di_nlink != 2) { @@ -2449,31 +2511,16 @@ xfs_remove( error = XFS_ERROR(ENOTEMPTY); goto out_trans_cancel; } - } - xfs_bmap_init(&free_list, &first_block); - error = xfs_dir_removename(tp, dp, name, ip->i_ino, - &first_block, &free_list, resblks); - if (error) { - ASSERT(error != ENOENT); - goto out_bmap_cancel; - } - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - if (is_dir) { - /* - * Drop the link from ip's "..". - */ + /* Drop the link from ip's "..". */ error = xfs_droplink(tp, dp); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; - /* - * Drop the "." link from ip to self. - */ + /* Drop the "." link from ip to self. */ error = xfs_droplink(tp, ip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } else { /* * When removing a non-directory we need to log the parent @@ -2482,20 +2529,24 @@ xfs_remove( */ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Drop the link from dp to ip. - */ + /* Drop the link from dp to ip. */ error = xfs_droplink(tp, ip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; - /* - * Determine if this is the last link while - * we are in the transaction. - */ + /* Determine if this is the last link while the inode is locked */ link_zero = (ip->i_d.di_nlink == 0); + xfs_bmap_init(&free_list, &first_block); + error = xfs_dir_removename(tp, dp, name, ip->i_ino, + &first_block, &free_list, resblks); + if (error) { + ASSERT(error != ENOENT); + goto out_bmap_cancel; + } + /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to @@ -2525,7 +2576,6 @@ xfs_remove( out_bmap_cancel: xfs_bmap_cancel(&free_list); - cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); std_return: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4a91358c1470..9e6efccbae04 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -24,7 +24,6 @@ /* * Kernel only inode definitions */ - struct xfs_dinode; struct xfs_inode; struct xfs_buf; @@ -50,6 +49,9 @@ typedef struct xfs_inode { xfs_ifork_t *i_afp; /* attribute fork pointer */ xfs_ifork_t i_df; /* data fork */ + /* operations vectors */ + const struct xfs_dir_ops *d_ops; /* directory ops vector */ + /* Transaction and locking information. */ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ @@ -316,7 +318,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) int xfs_release(struct xfs_inode *ip); -int xfs_inactive(struct xfs_inode *ip); +void xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 63382d37f565..4fc9f39dd89e 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c @@ -17,20 +17,20 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_error.h" #include "xfs_cksum.h" #include "xfs_icache.h" +#include "xfs_trans.h" #include "xfs_ialloc.h" +#include "xfs_dinode.h" /* * Check that none of the inode's in the buffer have a next diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h index abba0ae8cf2d..9308c47f2a52 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/xfs_inode_buf.h @@ -47,7 +47,4 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ -extern const struct xfs_buf_ops xfs_inode_buf_ops; -extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; - #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index 02f1083955bb..cfee14a83cfe 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -20,31 +20,21 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_filestream.h" -#include "xfs_cksum.h" #include "xfs_trace.h" -#include "xfs_icache.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" kmem_zone_t *xfs_ifork_zone; @@ -1031,15 +1021,14 @@ xfs_iext_add( * the next index needed in the indirection array. */ else { - int count = ext_diff; + uint count = ext_diff; while (count) { erp = xfs_iext_irec_new(ifp, erp_idx); - erp->er_extcount = count; - count -= MIN(count, (int)XFS_LINEAR_EXTS); - if (count) { + erp->er_extcount = min(count, XFS_LINEAR_EXTS); + count -= erp->er_extcount; + if (count) erp_idx++; - } } } } @@ -1359,7 +1348,7 @@ xfs_iext_remove_indirect( void xfs_iext_realloc_direct( xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new size of extents */ + int new_size) /* new size of extents after adding */ { int rnew_size; /* real new size of extents */ @@ -1397,13 +1386,8 @@ xfs_iext_realloc_direct( rnew_size - ifp->if_real_bytes); } } - /* - * Switch from the inline extent buffer to a direct - * extent list. Be sure to include the inline extent - * bytes in new_size. - */ + /* Switch from the inline extent buffer to a direct extent list */ else { - new_size += ifp->if_bytes; if (!is_power_of_2(new_size)) { rnew_size = roundup_pow_of_two(new_size); } diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h index 28661a0d9058..eb329a1ea888 100644 --- a/fs/xfs/xfs_inode_fork.h +++ b/fs/xfs/xfs_inode_fork.h @@ -19,6 +19,7 @@ #define __XFS_INODE_FORK_H__ struct xfs_inode_log_item; +struct xfs_dinode; /* * The following xfs_ext_irec_t struct introduces a second (top) level diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 378081109844..7c0d391f9a6e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -17,19 +17,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_trans_priv.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_trans_priv.h" +#include "xfs_dinode.h" kmem_zone_t *xfs_ili_zone; /* inode log item zone */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 668e8f4ccf5e..4d613401a5e0 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -17,32 +17,31 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ioctl.h" +#include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_buf_item.h" #include "xfs_fsops.h" #include "xfs_discard.h" #include "xfs_quota.h" -#include "xfs_inode_item.h" #include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" +#include "xfs_dinode.h" +#include "xfs_trans.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -641,7 +640,11 @@ xfs_ioc_space( unsigned int cmd, xfs_flock64_t *bf) { - int attr_flags = 0; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + struct iattr iattr; + bool setprealloc = false; + bool clrprealloc = false; int error; /* @@ -661,19 +664,128 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - attr_flags |= XFS_ATTR_NONBLOCK; + error = mnt_want_write_file(filp); + if (error) + return error; - if (filp->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + switch (bf->l_whence) { + case 0: /*SEEK_SET*/ + break; + case 1: /*SEEK_CUR*/ + bf->l_start += filp->f_pos; + break; + case 2: /*SEEK_END*/ + bf->l_start += XFS_ISIZE(ip); + break; + default: + error = XFS_ERROR(EINVAL); + goto out_unlock; + } - if (ioflags & IO_INVIS) - attr_flags |= XFS_ATTR_DMI; + /* + * length of <= 0 for resv/unresv/zero is invalid. length for + * alloc/free is ignored completely and we have no idea what userspace + * might have set it to, so set it to zero to allow range + * checks to pass. + */ + switch (cmd) { + case XFS_IOC_ZERO_RANGE: + case XFS_IOC_RESVSP: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP: + case XFS_IOC_UNRESVSP64: + if (bf->l_len <= 0) { + error = XFS_ERROR(EINVAL); + goto out_unlock; + } + break; + default: + bf->l_len = 0; + break; + } + + if (bf->l_start < 0 || + bf->l_start > mp->m_super->s_maxbytes || + bf->l_start + bf->l_len < 0 || + bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { + error = XFS_ERROR(EINVAL); + goto out_unlock; + } + + switch (cmd) { + case XFS_IOC_ZERO_RANGE: + error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); + if (!error) + setprealloc = true; + break; + case XFS_IOC_RESVSP: + case XFS_IOC_RESVSP64: + error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, + XFS_BMAPI_PREALLOC); + if (!error) + setprealloc = true; + break; + case XFS_IOC_UNRESVSP: + case XFS_IOC_UNRESVSP64: + error = xfs_free_file_space(ip, bf->l_start, bf->l_len); + break; + case XFS_IOC_ALLOCSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP: + case XFS_IOC_FREESP64: + if (bf->l_start > XFS_ISIZE(ip)) { + error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), + bf->l_start - XFS_ISIZE(ip), 0); + if (error) + goto out_unlock; + } + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = bf->l_start; + + error = xfs_setattr_size(ip, &iattr); + if (!error) + clrprealloc = true; + break; + default: + ASSERT(0); + error = XFS_ERROR(EINVAL); + } - error = mnt_want_write_file(filp); if (error) - return error; - error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); + goto out_unlock; + + tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + goto out_unlock; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + if (!(ioflags & IO_INVIS)) { + ip->i_d.di_mode &= ~S_ISUID; + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } + + if (setprealloc) + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + else if (clrprealloc) + ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + if (filp->f_flags & O_DSYNC) + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + +out_unlock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); mnt_drop_write_file(filp); return -error; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index f671f7e472ac..e8fb1231db81 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -22,14 +22,13 @@ #include <asm/uaccess.h> #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_vnode.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_itable.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8d4d49b6fbf3..22d1cbea283d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -17,34 +17,28 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_trans_space.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" +#include "xfs_dinode.h" #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ @@ -110,7 +104,7 @@ xfs_alert_fsblock_zero( xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x\n", + "blkcnt: %llx extent-state: %x", (unsigned long long)ip->i_ino, (unsigned long long)imap->br_startblock, (unsigned long long)imap->br_startoff, @@ -655,7 +649,6 @@ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, - size_t count, xfs_bmbt_irec_t *imap) { xfs_mount_t *mp = ip->i_mount; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 80615760959a..411fbb8919ef 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -21,12 +21,12 @@ struct xfs_inode; struct xfs_bmbt_irec; -extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, +int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); -extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, +int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *); -extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, +int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, struct xfs_bmbt_irec *); -extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2b8952d9199b..27e0e544e963 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -17,32 +17,28 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_acl.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" +#include "xfs_acl.h" +#include "xfs_quota.h" #include "xfs_error.h" -#include "xfs_itable.h" #include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_inode_item.h" +#include "xfs_trans.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" #include "xfs_da_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2_priv.h" +#include "xfs_dinode.h" #include <linux/capability.h> #include <linux/xattr.h> @@ -709,8 +705,7 @@ out_dqrele: int xfs_setattr_size( struct xfs_inode *ip, - struct iattr *iattr, - int flags) + struct iattr *iattr) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -733,15 +728,11 @@ xfs_setattr_size( if (error) return XFS_ERROR(error); + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - if (!(flags & XFS_ATTR_NOLOCK)) { - lock_flags |= XFS_IOLOCK_EXCL; - xfs_ilock(ip, lock_flags); - } - oldsize = inode->i_size; newsize = iattr->ia_size; @@ -750,12 +741,11 @@ xfs_setattr_size( */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - goto out_unlock; + return 0; /* * Use the regular setattr path to update the timestamps. */ - xfs_iunlock(ip, lock_flags); iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } @@ -765,7 +755,7 @@ xfs_setattr_size( */ error = xfs_qm_dqattach(ip, 0); if (error) - goto out_unlock; + return error; /* * Now we can make the changes. Before we join the inode to the @@ -783,7 +773,7 @@ xfs_setattr_size( */ error = xfs_zero_eof(ip, newsize, oldsize); if (error) - goto out_unlock; + return error; } /* @@ -802,7 +792,7 @@ xfs_setattr_size( error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) - goto out_unlock; + return error; } /* @@ -812,7 +802,7 @@ xfs_setattr_size( error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) - goto out_unlock; + return error; tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); @@ -916,12 +906,21 @@ out_trans_cancel: STATIC int xfs_vn_setattr( - struct dentry *dentry, - struct iattr *iattr) + struct dentry *dentry, + struct iattr *iattr) { - if (iattr->ia_valid & ATTR_SIZE) - return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); - return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); + struct xfs_inode *ip = XFS_I(dentry->d_inode); + int error; + + if (iattr->ia_valid & ATTR_SIZE) { + xfs_ilock(ip, XFS_IOLOCK_EXCL); + error = xfs_setattr_size(ip, iattr); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } else { + error = xfs_setattr_nonsize(ip, iattr, 0); + } + + return -error; } STATIC int @@ -1169,6 +1168,7 @@ xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; + gfp_t gfp_mask; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; @@ -1204,6 +1204,7 @@ xfs_setup_inode( inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); + ip->d_ops = ip->i_mount->m_nondir_inode_ops; switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; @@ -1216,6 +1217,7 @@ xfs_setup_inode( else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; + ip->d_ops = ip->i_mount->m_dir_inode_ops; break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; @@ -1229,6 +1231,14 @@ xfs_setup_inode( } /* + * Ensure all page cache allocations are done from GFP_NOFS context to + * prevent direct reclaim recursion back into the filesystem and blowing + * stacks or deadlocking. + */ + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + + /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index d81fb41205ec..d2c5057b5cc4 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -30,14 +30,10 @@ extern void xfs_setup_inode(struct xfs_inode *); /* * Internal setattr interfaces. */ -#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ -#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if op would block */ -#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ -#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ -#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */ +#define XFS_ATTR_NOACL 0x01 /* Don't call xfs_acl_chmod */ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); -extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags); +extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 084b3e1741fd..c237ad15d500 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -17,24 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_btree.h" #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" #include "xfs_itable.h" #include "xfs_error.h" -#include "xfs_btree.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_dinode.h" STATIC int xfs_internal_inum( diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2dea108071a..8497a00e399d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -17,21 +17,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_buf_item.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_log_recover.h" -#include "xfs_trans_priv.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_trace.h" #include "xfs_fsops.h" @@ -1000,27 +998,34 @@ xfs_log_space_wake( } /* - * Determine if we have a transaction that has gone to disk - * that needs to be covered. To begin the transition to the idle state - * firstly the log needs to be idle (no AIL and nothing in the iclogs). - * If we are then in a state where covering is needed, the caller is informed - * that dummy transactions are required to move the log into the idle state. + * Determine if we have a transaction that has gone to disk that needs to be + * covered. To begin the transition to the idle state firstly the log needs to + * be idle. That means the CIL, the AIL and the iclogs needs to be empty before + * we start attempting to cover the log. * - * Because this is called as part of the sync process, we should also indicate - * that dummy transactions should be issued in anything but the covered or - * idle states. This ensures that the log tail is accurately reflected in - * the log at the end of the sync, hence if a crash occurrs avoids replay - * of transactions where the metadata is already on disk. + * Only if we are then in a state where covering is needed, the caller is + * informed that dummy transactions are required to move the log into the idle + * state. + * + * If there are any items in the AIl or CIL, then we do not want to attempt to + * cover the log as we may be in a situation where there isn't log space + * available to run a dummy transaction and this can lead to deadlocks when the + * tail of the log is pinned by an item that is modified in the CIL. Hence + * there's no point in running a dummy transaction at this point because we + * can't start trying to idle the log until both the CIL and AIL are empty. */ int xfs_log_need_covered(xfs_mount_t *mp) { - int needed = 0; struct xlog *log = mp->m_log; + int needed = 0; if (!xfs_fs_writable(mp)) return 0; + if (!xlog_cil_empty(log)) + return 0; + spin_lock(&log->l_icloglock); switch (log->l_covered_state) { case XLOG_STATE_COVER_DONE: @@ -1029,14 +1034,17 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (!xfs_ail_min_lsn(log->l_ailp) && - xlog_iclogs_empty(log)) { - if (log->l_covered_state == XLOG_STATE_COVER_NEED) - log->l_covered_state = XLOG_STATE_COVER_DONE; - else - log->l_covered_state = XLOG_STATE_COVER_DONE2; - } - /* FALLTHRU */ + if (xfs_ail_min_lsn(log->l_ailp)) + break; + if (!xlog_iclogs_empty(log)) + break; + + needed = 1; + if (log->l_covered_state == XLOG_STATE_COVER_NEED) + log->l_covered_state = XLOG_STATE_COVER_DONE; + else + log->l_covered_state = XLOG_STATE_COVER_DONE2; + break; default: needed = 1; break; @@ -1068,6 +1076,7 @@ xlog_assign_tail_lsn_locked( tail_lsn = lip->li_lsn; else tail_lsn = atomic64_read(&log->l_last_sync_lsn); + trace_xfs_log_assign_tail_lsn(log, tail_lsn); atomic64_set(&log->l_tail_lsn, tail_lsn); return tail_lsn; } @@ -1979,7 +1988,7 @@ xlog_print_tic_res( for (i = 0; i < ticket->t_res_num; i++) { uint r_type = ticket->t_res_arr[i].r_type; - xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, + xfs_warn(mp, "region[%u]: %s - %u bytes", i, ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? "bad-rtype" : res_type_str[r_type-1]), ticket->t_res_arr[i].r_len); @@ -3702,11 +3711,9 @@ xlog_verify_iclog( /* check validity of iclog pointers */ spin_lock(&log->l_icloglock); icptr = log->l_iclog; - for (i=0; i < log->l_iclog_bufs; i++) { - if (icptr == NULL) - xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); - icptr = icptr->ic_next; - } + for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next) + ASSERT(icptr); + if (icptr != log->l_iclog) xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 1c458487f000..e148719e0a5d 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -18,8 +18,6 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ -#include "xfs_log_format.h" - struct xfs_log_vec { struct xfs_log_vec *lv_next; /* next lv in build list */ int lv_niovecs; /* number of iovecs in lv */ @@ -82,11 +80,7 @@ struct xlog_ticket; struct xfs_log_item; struct xfs_item_ops; struct xfs_trans; - -void xfs_log_item_init(struct xfs_mount *mp, - struct xfs_log_item *item, - int type, - const struct xfs_item_ops *ops); +struct xfs_log_callback; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, @@ -114,7 +108,7 @@ xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); int xfs_log_notify(struct xfs_mount *mp, struct xlog_in_core *iclog, - xfs_log_callback_t *callback_entry); + struct xfs_log_callback *callback_entry); int xfs_log_release_iclog(struct xfs_mount *mp, struct xlog_in_core *iclog); int xfs_log_reserve(struct xfs_mount *mp, diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index cfe97973ba36..5eb51fc5eb84 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -17,11 +17,9 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_log_priv.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" @@ -29,6 +27,10 @@ #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_discard.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" +#include "xfs_log_priv.h" /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -711,6 +713,20 @@ xlog_cil_push_foreground( xlog_cil_push(log); } +bool +xlog_cil_empty( + struct xlog *log) +{ + struct xfs_cil *cil = log->l_cilp; + bool empty = false; + + spin_lock(&cil->xc_push_lock); + if (list_empty(&cil->xc_cil)) + empty = true; + spin_unlock(&cil->xc_push_lock); + return empty; +} + /* * Commit a transaction with the given vector to the Committed Item List. * diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h index ca7e28a8ed31..f0969c77bdbe 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/xfs_log_format.h @@ -234,178 +234,6 @@ typedef struct xfs_trans_header { { XFS_LI_ICREATE, "XFS_LI_ICREATE" } /* - * Transaction types. Used to distinguish types of buffers. - */ -#define XFS_TRANS_SETATTR_NOT_SIZE 1 -#define XFS_TRANS_SETATTR_SIZE 2 -#define XFS_TRANS_INACTIVE 3 -#define XFS_TRANS_CREATE 4 -#define XFS_TRANS_CREATE_TRUNC 5 -#define XFS_TRANS_TRUNCATE_FILE 6 -#define XFS_TRANS_REMOVE 7 -#define XFS_TRANS_LINK 8 -#define XFS_TRANS_RENAME 9 -#define XFS_TRANS_MKDIR 10 -#define XFS_TRANS_RMDIR 11 -#define XFS_TRANS_SYMLINK 12 -#define XFS_TRANS_SET_DMATTRS 13 -#define XFS_TRANS_GROWFS 14 -#define XFS_TRANS_STRAT_WRITE 15 -#define XFS_TRANS_DIOSTRAT 16 -/* 17 was XFS_TRANS_WRITE_SYNC */ -#define XFS_TRANS_WRITEID 18 -#define XFS_TRANS_ADDAFORK 19 -#define XFS_TRANS_ATTRINVAL 20 -#define XFS_TRANS_ATRUNCATE 21 -#define XFS_TRANS_ATTR_SET 22 -#define XFS_TRANS_ATTR_RM 23 -#define XFS_TRANS_ATTR_FLAG 24 -#define XFS_TRANS_CLEAR_AGI_BUCKET 25 -#define XFS_TRANS_QM_SBCHANGE 26 -/* - * Dummy entries since we use the transaction type to index into the - * trans_type[] in xlog_recover_print_trans_head() - */ -#define XFS_TRANS_DUMMY1 27 -#define XFS_TRANS_DUMMY2 28 -#define XFS_TRANS_QM_QUOTAOFF 29 -#define XFS_TRANS_QM_DQALLOC 30 -#define XFS_TRANS_QM_SETQLIM 31 -#define XFS_TRANS_QM_DQCLUSTER 32 -#define XFS_TRANS_QM_QINOCREATE 33 -#define XFS_TRANS_QM_QUOTAOFF_END 34 -#define XFS_TRANS_SB_UNIT 35 -#define XFS_TRANS_FSYNC_TS 36 -#define XFS_TRANS_GROWFSRT_ALLOC 37 -#define XFS_TRANS_GROWFSRT_ZERO 38 -#define XFS_TRANS_GROWFSRT_FREE 39 -#define XFS_TRANS_SWAPEXT 40 -#define XFS_TRANS_SB_COUNT 41 -#define XFS_TRANS_CHECKPOINT 42 -#define XFS_TRANS_ICREATE 43 -#define XFS_TRANS_TYPE_MAX 43 -/* new transaction types need to be reflected in xfs_logprint(8) */ - -#define XFS_TRANS_TYPES \ - { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \ - { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ - { XFS_TRANS_INACTIVE, "INACTIVE" }, \ - { XFS_TRANS_CREATE, "CREATE" }, \ - { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ - { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ - { XFS_TRANS_REMOVE, "REMOVE" }, \ - { XFS_TRANS_LINK, "LINK" }, \ - { XFS_TRANS_RENAME, "RENAME" }, \ - { XFS_TRANS_MKDIR, "MKDIR" }, \ - { XFS_TRANS_RMDIR, "RMDIR" }, \ - { XFS_TRANS_SYMLINK, "SYMLINK" }, \ - { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \ - { XFS_TRANS_GROWFS, "GROWFS" }, \ - { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \ - { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \ - { XFS_TRANS_WRITEID, "WRITEID" }, \ - { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \ - { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \ - { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \ - { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \ - { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ - { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ - { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ - { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ - { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ - { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ - { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ - { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ - { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ - { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ - { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ - { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ - { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ - { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ - { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ - { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ - { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ - { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ - { XFS_TRANS_DUMMY1, "DUMMY1" }, \ - { XFS_TRANS_DUMMY2, "DUMMY2" }, \ - { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } - -/* - * This structure is used to track log items associated with - * a transaction. It points to the log item and keeps some - * flags to track the state of the log item. It also tracks - * the amount of space needed to log the item it describes - * once we get to commit processing (see xfs_trans_commit()). - */ -struct xfs_log_item_desc { - struct xfs_log_item *lid_item; - struct list_head lid_trans; - unsigned char lid_flags; -}; - -#define XFS_LID_DIRTY 0x1 - -/* - * Values for t_flags. - */ -#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ -#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ -#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ -#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ -#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ -#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ -#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer - count in superblock */ - -/* - * Values for call flags parameter. - */ -#define XFS_TRANS_RELEASE_LOG_RES 0x4 -#define XFS_TRANS_ABORT 0x8 - -/* - * Field values for xfs_trans_mod_sb. - */ -#define XFS_TRANS_SB_ICOUNT 0x00000001 -#define XFS_TRANS_SB_IFREE 0x00000002 -#define XFS_TRANS_SB_FDBLOCKS 0x00000004 -#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008 -#define XFS_TRANS_SB_FREXTENTS 0x00000010 -#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020 -#define XFS_TRANS_SB_DBLOCKS 0x00000040 -#define XFS_TRANS_SB_AGCOUNT 0x00000080 -#define XFS_TRANS_SB_IMAXPCT 0x00000100 -#define XFS_TRANS_SB_REXTSIZE 0x00000200 -#define XFS_TRANS_SB_RBMBLOCKS 0x00000400 -#define XFS_TRANS_SB_RBLOCKS 0x00000800 -#define XFS_TRANS_SB_REXTENTS 0x00001000 -#define XFS_TRANS_SB_REXTSLOG 0x00002000 - -/* - * Here we centralize the specification of XFS meta-data buffer - * reference count values. This determine how hard the buffer - * cache tries to hold onto the buffer. - */ -#define XFS_AGF_REF 4 -#define XFS_AGI_REF 4 -#define XFS_AGFL_REF 3 -#define XFS_INO_BTREE_REF 3 -#define XFS_ALLOC_BTREE_REF 2 -#define XFS_BMAP_BTREE_REF 2 -#define XFS_DIR_BTREE_REF 2 -#define XFS_INO_REF 2 -#define XFS_ATTR_BTREE_REF 1 -#define XFS_DQUOT_REF 1 - -/* - * Flags for xfs_trans_ichgtime(). - */ -#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ -#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ - - -/* * Inode Log Item Format definitions. * * This is the structure used to lay out an inode log item in the @@ -797,7 +625,6 @@ typedef struct xfs_qoff_logformat { char qf_pad[12]; /* padding for future */ } xfs_qoff_logformat_t; - /* * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. */ @@ -849,8 +676,4 @@ struct xfs_icreate_log { __be32 icl_gen; /* inode generation number to use */ }; -int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); -int xfs_log_calc_minimum_size(struct xfs_mount *); - - #endif /* __XFS_LOG_FORMAT_H__ */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 136654b9400d..9bc403a9e54f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -22,6 +22,7 @@ struct xfs_buf; struct xlog; struct xlog_ticket; struct xfs_mount; +struct xfs_log_callback; /* * Flags for log structure @@ -227,8 +228,8 @@ typedef struct xlog_in_core { /* Callback structures need their own cacheline */ spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; - xfs_log_callback_t *ic_callback; - xfs_log_callback_t **ic_callback_tail; + struct xfs_log_callback *ic_callback; + struct xfs_log_callback **ic_callback_tail; /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; @@ -254,7 +255,7 @@ struct xfs_cil_ctx { int space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */ - xfs_log_callback_t log_cb; /* completion callback hook. */ + struct xfs_log_callback log_cb; /* completion callback hook. */ struct list_head committing; /* ctx committing list */ }; @@ -514,12 +515,10 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space) /* * Committed Item List interfaces */ -int -xlog_cil_init(struct xlog *log); -void -xlog_cil_init_post_recovery(struct xlog *log); -void -xlog_cil_destroy(struct xlog *log); +int xlog_cil_init(struct xlog *log); +void xlog_cil_init_post_recovery(struct xlog *log); +void xlog_cil_destroy(struct xlog *log); +bool xlog_cil_empty(struct xlog *log); /* * CIL force routines diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 39797490a1f1..b6b669df40f3 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -17,42 +17,34 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" +#include "xfs_trans.h" +#include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_buf_item.h" #include "xfs_log_recover.h" +#include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_trans_priv.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_icreate_item.h" - -/* Need all the magic numbers and buffer ops structures from these headers */ -#include "xfs_symlink.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_error.h" #include "xfs_dir2.h" -#include "xfs_attr_leaf.h" -#include "xfs_attr_remote.h" #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) @@ -305,9 +297,9 @@ xlog_header_check_dump( xfs_mount_t *mp, xlog_rec_header_t *head) { - xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", + xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d", __func__, &mp->m_sb.sb_uuid, XLOG_FMT); - xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", + xfs_debug(mp, " log : uuid = %pU, fmt = %d", &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); } #else @@ -2362,7 +2354,7 @@ xlog_recover_do_reg_buffer( item->ri_buf[i].i_len, __func__); goto next; } - error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, + error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); if (error) @@ -2394,133 +2386,6 @@ xlog_recover_do_reg_buffer( } /* - * Do some primitive error checking on ondisk dquot data structures. - */ -int -xfs_qm_dqcheck( - struct xfs_mount *mp, - xfs_disk_dquot_t *ddq, - xfs_dqid_t id, - uint type, /* used only when IO_dorepair is true */ - uint flags, - char *str) -{ - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; - - /* - * We can encounter an uninitialized dquot buffer for 2 reasons: - * 1. If we crash while deleting the quotainode(s), and those blks got - * used for user data. This is because we take the path of regular - * file deletion; however, the size field of quotainodes is never - * updated, so all the tricks that we play in itruncate_finish - * don't quite matter. - * - * 2. We don't play the quota buffers when there's a quotaoff logitem. - * But the allocation will be replayed so we'll end up with an - * uninitialized quota block. - * - * This is all fine; things are still consistent, and we haven't lost - * any quota information. Just don't complain about bad dquot blks. - */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } - - if (ddq->d_flags != XFS_DQ_USER && - ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } - - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } - - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } - - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; - - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); - - /* - * Typically, a repair is only requested by quotacheck. - */ - ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); - memset(d, 0, sizeof(xfs_dqblk_t)); - - d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); - d->dd_diskdq.d_version = XFS_DQUOT_VERSION; - d->dd_diskdq.d_flags = type; - d->dd_diskdq.d_id = cpu_to_be32(id); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); - xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), - XFS_DQUOT_CRC_OFF); - } - - return errs; -} - -/* * Perform a dquot buffer recovery. * Simple algorithm: if we have found a QUOTAOFF log item of the same type * (ie. USR or GRP), then just toss this buffer away; don't recover it. @@ -3125,7 +2990,7 @@ xlog_recover_dquot_pass2( */ dq_f = item->ri_buf[0].i_addr; ASSERT(dq_f); - error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, + error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, "xlog_recover_dquot_pass2 (log copy)"); if (error) return XFS_ERROR(EIO); @@ -3145,7 +3010,7 @@ xlog_recover_dquot_pass2( * was among a chunk of dquots created earlier, and we did some * minimal initialization then. */ - error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, + error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, "xlog_recover_dquot_pass2"); if (error) { xfs_buf_relse(bp); @@ -4077,7 +3942,7 @@ xlog_unpack_data_crc( if (crc != rhead->h_crc) { if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { xfs_alert(log->l_mp, - "log record CRC mismatch: found 0x%x, expected 0x%x.\n", + "log record CRC mismatch: found 0x%x, expected 0x%x.", le32_to_cpu(rhead->h_crc), le32_to_cpu(crc)); xfs_hex_dump(dp, 32); diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c index bbcec0bbc12d..2af1a0a4d0f1 100644 --- a/fs/xfs/xfs_log_rlimit.c +++ b/fs/xfs/xfs_log_rlimit.c @@ -17,16 +17,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_ag.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_trans_space.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_da_btree.h" #include "xfs_attr_leaf.h" +#include "xfs_bmap_btree.h" /* * Calculate the maximum length in bytes that would be required for a local diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 9163dc140532..63ca2f0420b1 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -17,9 +17,8 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5dcc68019d1b..02df7b408a26 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -17,35 +17,31 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_dir2.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_bmap.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_fsops.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_cksum.h" -#include "xfs_buf_item.h" +#include "xfs_dinode.h" #ifdef HAVE_PERCPU_SB @@ -723,8 +719,22 @@ xfs_mountfs( * Set the inode cluster size. * This may still be overridden by the file system * block size if it is larger than the chosen cluster size. + * + * For v5 filesystems, scale the cluster size with the inode size to + * keep a constant ratio of inode per cluster buffer, but only if mkfs + * has set the inode alignment value appropriately for larger cluster + * sizes. */ mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int new_size = mp->m_inode_cluster_size; + + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) + mp->m_inode_cluster_size = new_size; + xfs_info(mp, "Using inode cluster size of %d bytes", + mp->m_inode_cluster_size); + } /* * Set inode alignment fields diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1fa0584b5627..a466c5e5826e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -26,6 +26,7 @@ struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; struct xfs_quotainfo; +struct xfs_dir_ops; #ifdef HAVE_PERCPU_SB @@ -111,7 +112,7 @@ typedef struct xfs_mount { __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ __uint8_t m_agno_log; /* log #ag's */ __uint8_t m_agino_log; /* #bits for agino in inum */ - __uint16_t m_inode_cluster_size;/* min inode buf size */ + uint m_inode_cluster_size;/* min inode buf size */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ @@ -148,6 +149,8 @@ typedef struct xfs_mount { int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ + const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ + const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ int m_dirblksize; /* directory block sz--bytes */ int m_dirblkfsbs; /* directory block sz--fsbs */ xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 3e6c2e6c9cd2..14a4996cfec6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -17,31 +17,28 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_itable.h" -#include "xfs_rtalloc.h" +#include "xfs_quota.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans.h" #include "xfs_trans_space.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_cksum.h" +#include "xfs_dinode.h" /* * The global quota manager. There is only one of these for the entire @@ -664,20 +661,6 @@ xfs_qm_dqdetach( } } -int -xfs_qm_calc_dquots_per_chunk( - struct xfs_mount *mp, - unsigned int nbblks) /* basic block units */ -{ - unsigned int ndquots; - - ASSERT(nbblks > 0); - ndquots = BBTOB(nbblks); - do_div(ndquots, sizeof(xfs_dqblk_t)); - - return ndquots; -} - struct xfs_qm_isolate { struct list_head buffers; struct list_head dispose; @@ -858,7 +841,7 @@ xfs_qm_init_quotainfo( /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp, + qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp, qinf->qi_dqchunklen); mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); @@ -1092,10 +1075,10 @@ xfs_qm_reset_dqcounts( /* * Do a sanity check, and if needed, repair the dqblk. Don't * output any warnings because it's perfectly possible to - * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. + * find uninitialised dquot blks. See comment in xfs_dqcheck. */ - (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, - "xfs_quotacheck"); + xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, + "xfs_quotacheck"); ddq->d_bcount = 0; ddq->d_icount = 0; ddq->d_rtbcount = 0; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 2b602df9c242..a788b66a5cb1 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -103,8 +103,6 @@ xfs_dq_to_quota_inode(struct xfs_dquot *dqp) return NULL; } -extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, - unsigned int nbblks); extern void xfs_trans_mod_dquot(struct xfs_trans *, struct xfs_dquot *, uint, long); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 3af50ccdfac1..e9be63abd8d2 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -18,21 +18,15 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" #include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_qm.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 8174aad0b388..437c9198031a 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -20,24 +20,18 @@ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_trans.h" #include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" @@ -287,7 +281,7 @@ xfs_qm_scall_trunc_qfiles( int error = 0, error2 = 0; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { - xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", + xfs_debug(mp, "%s: flags=%x m_qflags=%x", __func__, flags, mp->m_qflags); return XFS_ERROR(EINVAL); } @@ -325,7 +319,7 @@ xfs_qm_scall_quotaon( sbflags = 0; if (flags == 0) { - xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", + xfs_debug(mp, "%s: zero flags, m_qflags=%x", __func__, mp->m_qflags); return XFS_ERROR(EINVAL); } @@ -348,7 +342,7 @@ xfs_qm_scall_quotaon( (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && (flags & XFS_PQUOTA_ENFD))) { xfs_debug(mp, - "%s: Can't enforce without acct, flags=%x sbflags=%x\n", + "%s: Can't enforce without acct, flags=%x sbflags=%x", __func__, flags, mp->m_sb.sb_qflags); return XFS_ERROR(EINVAL); } @@ -648,7 +642,7 @@ xfs_qm_scall_setqlim( q->qi_bsoftlimit = soft; } } else { - xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); + xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : @@ -664,7 +658,7 @@ xfs_qm_scall_setqlim( q->qi_rtbsoftlimit = soft; } } else { - xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); + xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? @@ -681,7 +675,7 @@ xfs_qm_scall_setqlim( q->qi_isoftlimit = soft; } } else { - xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); + xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft); } /* diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index e7d84d2d8683..5376dd406ba2 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -150,10 +150,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) -extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, - xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); -extern const struct xfs_buf_ops xfs_dquot_buf_ops; - #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h index e6b0d6e1f4f2..b3b2b1065c0f 100644 --- a/fs/xfs/xfs_quota_defs.h +++ b/fs/xfs/xfs_quota_defs.h @@ -154,4 +154,8 @@ typedef __uint16_t xfs_qwarncnt_t; (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) +extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, + xfs_dqid_t id, uint type, uint flags, char *str); +extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks); + #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 1326d81596c2..af33cafe69b6 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -17,15 +17,14 @@ */ #include "xfs.h" #include "xfs_format.h" +#include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_log.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_trans.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" #include "xfs_qm.h" #include <linux/quota.h> diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6f9e63c9fc26..a6a76b2b6a85 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -17,172 +17,260 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" -#include "xfs_fsops.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" #include "xfs_error.h" -#include "xfs_inode_item.h" +#include "xfs_trans.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_buf.h" #include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_rtalloc.h" /* - * Prototypes for internal functions. + * Read and return the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. */ +STATIC int /* error */ +xfs_rtget_summary( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb, /* in/out: summary block number */ + xfs_suminfo_t *sum) /* out: summary info for this block */ +{ + xfs_buf_t *bp; /* buffer for summary block */ + int error; /* error value */ + xfs_fsblock_t sb; /* summary fsblock */ + int so; /* index into the summary file */ + xfs_suminfo_t *sp; /* pointer to returned data */ + /* + * Compute entry number in the summary file. + */ + so = XFS_SUMOFFS(mp, log, bbno); + /* + * Compute the block number in the summary file. + */ + sb = XFS_SUMOFFSTOBLOCK(mp, so); + /* + * If we have an old buffer, and the block number matches, use that. + */ + if (rbpp && *rbpp && *rsb == sb) + bp = *rbpp; + /* + * Otherwise we have to get the buffer. + */ + else { + /* + * If there was an old one, get rid of it first. + */ + if (rbpp && *rbpp) + xfs_trans_brelse(tp, *rbpp); + error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); + if (error) { + return error; + } + /* + * Remember this buffer and block for the next call. + */ + if (rbpp) { + *rbpp = bp; + *rsb = sb; + } + } + /* + * Point to the summary information & copy it out. + */ + sp = XFS_SUMPTR(mp, bp, so); + *sum = *sp; + /* + * Drop the buffer if we're not asked to remember it. + */ + if (!rbpp) + xfs_trans_brelse(tp, bp); + return 0; +} -STATIC int xfs_rtallocate_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, - xfs_extlen_t, xfs_buf_t **, xfs_fsblock_t *); -STATIC int xfs_rtany_summary(xfs_mount_t *, xfs_trans_t *, int, int, - xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, int *); -STATIC int xfs_rtcheck_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, - xfs_extlen_t, int, xfs_rtblock_t *, int *); -STATIC int xfs_rtfind_back(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, - xfs_rtblock_t, xfs_rtblock_t *); -STATIC int xfs_rtfind_forw(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, - xfs_rtblock_t, xfs_rtblock_t *); -STATIC int xfs_rtget_summary( xfs_mount_t *, xfs_trans_t *, int, - xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, xfs_suminfo_t *); -STATIC int xfs_rtmodify_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, - xfs_extlen_t, int); -STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, - xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *); - -/* - * Internal functions. - */ /* - * Allocate space to the bitmap or summary file, and zero it, for growfs. + * Return whether there are any free extents in the size range given + * by low and high, for the bitmap block bbno. */ STATIC int /* error */ -xfs_growfs_rt_alloc( - xfs_mount_t *mp, /* file system mount point */ - xfs_extlen_t oblocks, /* old count of blocks */ - xfs_extlen_t nblocks, /* new count of blocks */ - xfs_inode_t *ip) /* inode (bitmap/summary) */ +xfs_rtany_summary( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + int low, /* low log2 extent size */ + int high, /* high log2 extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb, /* in/out: summary block number */ + int *stat) /* out: any good extents here? */ { - xfs_fileoff_t bno; /* block number in file */ - xfs_buf_t *bp; /* temporary buffer for zeroing */ - int committed; /* transaction committed flag */ - xfs_daddr_t d; /* disk block address */ - int error; /* error return value */ - xfs_fsblock_t firstblock; /* first block allocated in xaction */ - xfs_bmap_free_t flist; /* list of freed blocks */ - xfs_fsblock_t fsbno; /* filesystem block for bno */ - xfs_bmbt_irec_t map; /* block map output */ - int nmap; /* number of block maps */ - int resblks; /* space reservation */ + int error; /* error value */ + int log; /* loop counter, log2 of ext. size */ + xfs_suminfo_t sum; /* summary data */ /* - * Allocate space to the file, as necessary. + * Loop over logs of extent sizes. Order is irrelevant. */ - while (oblocks < nblocks) { - int cancelflags = 0; - xfs_trans_t *tp; - - tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); - resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); + for (log = low; log <= high; log++) { /* - * Reserve space & log for one extent added to the file. + * Get one summary datum. */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, - resblks, 0); - if (error) - goto error_cancel; - cancelflags = XFS_TRANS_RELEASE_LOG_RES; + error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); + if (error) { + return error; + } /* - * Lock the inode. + * If there are any, return success. */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + if (sum) { + *stat = 1; + return 0; + } + } + /* + * Found nothing, return failure. + */ + *stat = 0; + return 0; +} - xfs_bmap_init(&flist, &firstblock); - /* - * Allocate blocks to the bitmap file. - */ - nmap = 1; - cancelflags |= XFS_TRANS_ABORT; - error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, &firstblock, - resblks, &map, &nmap, &flist); - if (!error && nmap < 1) - error = XFS_ERROR(ENOSPC); - if (error) - goto error_cancel; - /* - * Free any blocks freed up in the transaction, then commit. - */ - error = xfs_bmap_finish(&tp, &flist, &committed); - if (error) - goto error_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - goto error; - /* - * Now we need to clear the allocated blocks. - * Do this one block per transaction, to keep it simple. - */ - cancelflags = 0; - for (bno = map.br_startoff, fsbno = map.br_startblock; - bno < map.br_startoff + map.br_blockcount; - bno++, fsbno++) { - tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO); - /* - * Reserve log for one block zeroing. - */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, - 0, 0); + +/* + * Copy and transform the summary file, given the old and new + * parameters in the mount structures. + */ +STATIC int /* error */ +xfs_rtcopy_summary( + xfs_mount_t *omp, /* old file system mount point */ + xfs_mount_t *nmp, /* new file system mount point */ + xfs_trans_t *tp) /* transaction pointer */ +{ + xfs_rtblock_t bbno; /* bitmap block number */ + xfs_buf_t *bp; /* summary buffer */ + int error; /* error return value */ + int log; /* summary level number (log length) */ + xfs_suminfo_t sum; /* summary data */ + xfs_fsblock_t sumbno; /* summary block number */ + + bp = NULL; + for (log = omp->m_rsumlevels - 1; log >= 0; log--) { + for (bbno = omp->m_sb.sb_rbmblocks - 1; + (xfs_srtblock_t)bbno >= 0; + bbno--) { + error = xfs_rtget_summary(omp, tp, log, bbno, &bp, + &sumbno, &sum); if (error) - goto error_cancel; - /* - * Lock the bitmap inode. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - /* - * Get a buffer for the block. - */ - d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize, 0); - if (bp == NULL) { - error = XFS_ERROR(EIO); -error_cancel: - xfs_trans_cancel(tp, cancelflags); - goto error; - } - memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); - xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); - /* - * Commit the transaction. - */ - error = xfs_trans_commit(tp, 0); + return error; + if (sum == 0) + continue; + error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, + &bp, &sumbno); if (error) - goto error; + return error; + error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, + &bp, &sumbno); + if (error) + return error; + ASSERT(sum > 0); } - /* - * Go on to the next extent, if any. - */ - oblocks = map.br_startoff + map.br_blockcount; } return 0; +} +/* + * Mark an extent specified by start and len allocated. + * Updates all the summary information as well as the bitmap. + */ +STATIC int /* error */ +xfs_rtallocate_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* start block to allocate */ + xfs_extlen_t len, /* length to allocate */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_rtblock_t end; /* end of the allocated extent */ + int error; /* error value */ + xfs_rtblock_t postblock = 0; /* first block allocated > end */ + xfs_rtblock_t preblock = 0; /* first block allocated < start */ -error: + end = start + len - 1; + /* + * Assume we're allocating out of the middle of a free extent. + * We need to find the beginning and end of the extent so we can + * properly update the summary. + */ + error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + if (error) { + return error; + } + /* + * Find the next allocated block (end of free extent). + */ + error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, + &postblock); + if (error) { + return error; + } + /* + * Decrement the summary information corresponding to the entire + * (old) free extent. + */ + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + if (error) { + return error; + } + /* + * If there are blocks not being allocated at the front of the + * old extent, add summary data for them to be free. + */ + if (preblock < start) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(start - preblock), + XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * If there are blocks not being allocated at the end of the + * old extent, add summary data for them to be free. + */ + if (postblock > end) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock - end), + XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * Modify the bitmap to mark this extent allocated. + */ + error = xfs_rtmodify_range(mp, tp, start, len, 0); return error; } @@ -721,1112 +809,126 @@ xfs_rtallocate_extent_size( } /* - * Mark an extent specified by start and len allocated. - * Updates all the summary information as well as the bitmap. + * Allocate space to the bitmap or summary file, and zero it, for growfs. */ STATIC int /* error */ -xfs_rtallocate_range( +xfs_growfs_rt_alloc( xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* start block to allocate */ - xfs_extlen_t len, /* length to allocate */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ + xfs_extlen_t oblocks, /* old count of blocks */ + xfs_extlen_t nblocks, /* new count of blocks */ + xfs_inode_t *ip) /* inode (bitmap/summary) */ { - xfs_rtblock_t end; /* end of the allocated extent */ - int error; /* error value */ - xfs_rtblock_t postblock = 0; /* first block allocated > end */ - xfs_rtblock_t preblock = 0; /* first block allocated < start */ + xfs_fileoff_t bno; /* block number in file */ + xfs_buf_t *bp; /* temporary buffer for zeroing */ + int committed; /* transaction committed flag */ + xfs_daddr_t d; /* disk block address */ + int error; /* error return value */ + xfs_fsblock_t firstblock; /* first block allocated in xaction */ + xfs_bmap_free_t flist; /* list of freed blocks */ + xfs_fsblock_t fsbno; /* filesystem block for bno */ + xfs_bmbt_irec_t map; /* block map output */ + int nmap; /* number of block maps */ + int resblks; /* space reservation */ - end = start + len - 1; - /* - * Assume we're allocating out of the middle of a free extent. - * We need to find the beginning and end of the extent so we can - * properly update the summary. - */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); - if (error) { - return error; - } - /* - * Find the next allocated block (end of free extent). - */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); - if (error) { - return error; - } - /* - * Decrement the summary information corresponding to the entire - * (old) free extent. - */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); - if (error) { - return error; - } - /* - * If there are blocks not being allocated at the front of the - * old extent, add summary data for them to be free. - */ - if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * If there are blocks not being allocated at the end of the - * old extent, add summary data for them to be free. - */ - if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); - if (error) { - return error; - } - } /* - * Modify the bitmap to mark this extent allocated. + * Allocate space to the file, as necessary. */ - error = xfs_rtmodify_range(mp, tp, start, len, 0); - return error; -} - -/* - * Return whether there are any free extents in the size range given - * by low and high, for the bitmap block bbno. - */ -STATIC int /* error */ -xfs_rtany_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int low, /* low log2 extent size */ - int high, /* high log2 extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - int *stat) /* out: any good extents here? */ -{ - int error; /* error value */ - int log; /* loop counter, log2 of ext. size */ - xfs_suminfo_t sum; /* summary data */ + while (oblocks < nblocks) { + int cancelflags = 0; + xfs_trans_t *tp; - /* - * Loop over logs of extent sizes. Order is irrelevant. - */ - for (log = low; log <= high; log++) { + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); + resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); /* - * Get one summary datum. + * Reserve space & log for one extent added to the file. */ - error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); - if (error) { - return error; - } + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, + resblks, 0); + if (error) + goto error_cancel; + cancelflags = XFS_TRANS_RELEASE_LOG_RES; /* - * If there are any, return success. + * Lock the inode. */ - if (sum) { - *stat = 1; - return 0; - } - } - /* - * Found nothing, return failure. - */ - *stat = 0; - return 0; -} - -/* - * Get a buffer for the bitmap or summary file block specified. - * The buffer is returned read and locked. - */ -STATIC int /* error */ -xfs_rtbuf_get( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t block, /* block number in bitmap or summary */ - int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ -{ - xfs_buf_t *bp; /* block buffer, result */ - xfs_inode_t *ip; /* bitmap or summary inode */ - xfs_bmbt_irec_t map; - int nmap = 1; - int error; /* error value */ - - ip = issum ? mp->m_rsumip : mp->m_rbmip; - - error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); - if (error) - return error; - - ASSERT(map.br_startblock != NULLFSBLOCK); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, map.br_startblock), - mp->m_bsize, 0, &bp, NULL); - if (error) - return error; - ASSERT(!xfs_buf_geterror(bp)); - *bpp = bp; - return 0; -} - -#ifdef DEBUG -/* - * Check that the given extent (block range) is allocated already. - */ -STATIC int /* error */ -xfs_rtcheck_alloc_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* out: 1 for allocated, 0 for not */ -{ - xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ - - return xfs_rtcheck_range(mp, tp, bno, len, 0, &new, stat); -} -#endif - -/* - * Check that the given range is either all allocated (val = 0) or - * all free (val = 1). - */ -STATIC int /* error */ -xfs_rtcheck_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block number of extent */ - xfs_extlen_t len, /* length of extent */ - int val, /* 1 for free, 0 for allocated */ - xfs_rtblock_t *new, /* out: first block not matching */ - int *stat) /* out: 1 for matches, 0 for not */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - /* - * Compute starting bitmap block number - */ - block = XFS_BITTOBLOCK(mp, start); - /* - * Read the bitmap block. - */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Compute the starting word's address, and starting bit. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - /* - * 0 (allocated) => all zero's; 1 (free) => all one's. - */ - val = -val; - /* - * If not starting on a word boundary, deal with the first - * (partial) word. - */ - if (bit) { - /* - * Compute first bit not examined. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - /* - * Mask of relevant bits. - */ - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ val) & mask)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i = XFS_RTLOBIT(wdiff) - bit; - *new = start + i; - *stat = 0; - return 0; - } - i = lastbit - bit; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ val)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *new = start + i; - *stat = 0; - return 0; - } - i += XFS_NBWORD; + xfs_bmap_init(&flist, &firstblock); /* - * Go on to next block if that's where the next word is - * and we need the next word. + * Allocate blocks to the bitmap file. */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { + nmap = 1; + cancelflags |= XFS_TRANS_ABORT; + error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, + XFS_BMAPI_METADATA, &firstblock, + resblks, &map, &nmap, &flist); + if (!error && nmap < 1) + error = XFS_ERROR(ENOSPC); + if (error) + goto error_cancel; /* - * Mask of relevant bits. + * Free any blocks freed up in the transaction, then commit. */ - mask = ((xfs_rtword_t)1 << lastbit) - 1; + error = xfs_bmap_finish(&tp, &flist, &committed); + if (error) + goto error_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + goto error; /* - * Compute difference between actual and desired value. + * Now we need to clear the allocated blocks. + * Do this one block per transaction, to keep it simple. */ - if ((wdiff = (*b ^ val) & mask)) { + cancelflags = 0; + for (bno = map.br_startoff, fsbno = map.br_startblock; + bno < map.br_startoff + map.br_blockcount; + bno++, fsbno++) { + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO); /* - * Different, compute first wrong bit and return. + * Reserve log for one block zeroing. */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *new = start + i; - *stat = 0; - return 0; - } else - i = len; - } - /* - * Successful, return. - */ - xfs_trans_brelse(tp, bp); - *new = start + i; - *stat = 1; - return 0; -} - -/* - * Copy and transform the summary file, given the old and new - * parameters in the mount structures. - */ -STATIC int /* error */ -xfs_rtcopy_summary( - xfs_mount_t *omp, /* old file system mount point */ - xfs_mount_t *nmp, /* new file system mount point */ - xfs_trans_t *tp) /* transaction pointer */ -{ - xfs_rtblock_t bbno; /* bitmap block number */ - xfs_buf_t *bp; /* summary buffer */ - int error; /* error return value */ - int log; /* summary level number (log length) */ - xfs_suminfo_t sum; /* summary data */ - xfs_fsblock_t sumbno; /* summary block number */ - - bp = NULL; - for (log = omp->m_rsumlevels - 1; log >= 0; log--) { - for (bbno = omp->m_sb.sb_rbmblocks - 1; - (xfs_srtblock_t)bbno >= 0; - bbno--) { - error = xfs_rtget_summary(omp, tp, log, bbno, &bp, - &sumbno, &sum); - if (error) - return error; - if (sum == 0) - continue; - error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, - &bp, &sumbno); - if (error) - return error; - error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, - &bp, &sumbno); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, + 0, 0); if (error) - return error; - ASSERT(sum > 0); - } - } - return 0; -} - -/* - * Searching backward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -STATIC int /* error */ -xfs_rtfind_back( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t firstbit; /* first useful bit in the word */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute and read in starting bitmap block for starting block. - */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Get the first word's index & point to it. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - len = start - limit + 1; - /* - * Compute match value, based on the bit at start: if 1 (free) - * then all-ones, else all-zeroes. - */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; - /* - * If the starting position is not word-aligned, deal with the - * partial word. - */ - if (bit < XFS_NBWORD - 1) { - /* - * Calculate first (leftmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); - mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << - firstbit; - /* - * Calculate the difference between the value there - * and what we're looking for. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different. Mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i = bit - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } - i = bit - firstbit + 1; - /* - * Go on to previous block if that's where the previous word is - * and we need the previous word. - */ - if (--word == -1 && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the previous one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ want)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to previous block if that's where the previous word is - * and we need the previous word. - */ - if (--word == -1 && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if (len - i) { - /* - * Calculate first (leftmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - firstbit = XFS_NBWORD - (len - i); - mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } else - i = len; - } - /* - * No match, return that we scanned the whole area. - */ - xfs_trans_brelse(tp, bp); - *rtblock = start - i + 1; - return 0; -} - -/* - * Searching forward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -STATIC int /* error */ -xfs_rtfind_forw( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in the word */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute and read in starting bitmap block for starting block. - */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Get the first word's index & point to it. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - len = limit - start + 1; - /* - * Compute match value, based on the bit at start: if 1 (free) - * then all-ones, else all-zeroes. - */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; - /* - * If the starting position is not word-aligned, deal with the - * partial word. - */ - if (bit) { - /* - * Calculate last (rightmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Calculate the difference between the value there - * and what we're looking for. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different. Mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i = XFS_RTLOBIT(wdiff) - bit; - *rtblock = start + i - 1; - return 0; - } - i = lastbit - bit; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the previous word in the buffer. - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ want)) { + goto error_cancel; /* - * Different, mark where we are and return. + * Lock the bitmap inode. */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); /* - * If done with this block, get the next one. + * Get a buffer for the block. */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; + d = XFS_FSB_TO_DADDR(mp, fsbno); + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize, 0); + if (bp == NULL) { + error = XFS_ERROR(EIO); +error_cancel: + xfs_trans_cancel(tp, cancelflags); + goto error; } - b = bufp = bp->b_addr; - word = 0; - } else { + memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); + xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); /* - * Go on to the next word in the buffer. + * Commit the transaction. */ - b++; + error = xfs_trans_commit(tp, 0); + if (error) + goto error; } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Calculate mask for all the relevant bits in this word. - */ - mask = ((xfs_rtword_t)1 << lastbit) - 1; /* - * Compute difference between actual and desired value. + * Go on to the next extent, if any. */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; - return 0; - } else - i = len; + oblocks = map.br_startoff + map.br_blockcount; } - /* - * No match, return that we scanned the whole area. - */ - xfs_trans_brelse(tp, bp); - *rtblock = start + i - 1; return 0; -} -/* - * Mark an extent specified by start and len freed. - * Updates all the summary information as well as the bitmap. - */ -STATIC int /* error */ -xfs_rtfree_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to free */ - xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_rtblock_t end; /* end of the freed extent */ - int error; /* error value */ - xfs_rtblock_t postblock; /* first block freed > end */ - xfs_rtblock_t preblock; /* first block freed < start */ - - end = start + len - 1; - /* - * Modify the bitmap to mark this extent freed. - */ - error = xfs_rtmodify_range(mp, tp, start, len, 1); - if (error) { - return error; - } - /* - * Assume we're freeing out of the middle of an allocated extent. - * We need to find the beginning and end of the extent so we can - * properly update the summary. - */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); - if (error) { - return error; - } - /* - * Find the next allocated block (end of allocated extent). - */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); - if (error) - return error; - /* - * If there are blocks not being freed at the front of the - * old extent, add summary data for them to be allocated. - */ - if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * If there are blocks not being freed at the end of the - * old extent, add summary data for them to be allocated. - */ - if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * Increment the summary information corresponding to the entire - * (new) free extent. - */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); +error: return error; } /* - * Read and return the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - */ -STATIC int /* error */ -xfs_rtget_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_suminfo_t *sum) /* out: summary info for this block */ -{ - xfs_buf_t *bp; /* buffer for summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ - - /* - * Compute entry number in the summary file. - */ - so = XFS_SUMOFFS(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (rbpp && *rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (rbpp && *rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } - } - /* - * Point to the summary information & copy it out. - */ - sp = XFS_SUMPTR(mp, bp, so); - *sum = *sp; - /* - * Drop the buffer if we're not asked to remember it. - */ - if (!rbpp) - xfs_trans_brelse(tp, bp); - return 0; -} - -/* - * Set the given range of bitmap bits to the given value. - * Do whatever I/O and logging is required. - */ -STATIC int /* error */ -xfs_rtmodify_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to modify */ - xfs_extlen_t len, /* length of extent to modify */ - int val) /* 1 for free, 0 for allocated */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtword_t *first; /* first used word in the buffer */ - int i; /* current bit number rel. to start */ - int lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask o frelevant bits for value */ - int word; /* word number in the buffer */ - - /* - * Compute starting bitmap block number. - */ - block = XFS_BITTOBLOCK(mp, start); - /* - * Read the bitmap block, and point to its data. - */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Compute the starting word's address, and starting bit. - */ - word = XFS_BITTOWORD(mp, start); - first = b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - /* - * 0 (allocated) => all zeroes; 1 (free) => all ones. - */ - val = -val; - /* - * If not starting on a word boundary, deal with the first - * (partial) word. - */ - if (bit) { - /* - * Compute first bit not changed and mask of relevant bits. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Set/clear the active bits. - */ - if (val) - *b |= mask; - else - *b &= ~mask; - i = lastbit - bit; - /* - * Go on to the next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * Log the changed part of this block. - * Get the next one. - */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Set the word value correctly. - */ - *b = val; - i += XFS_NBWORD; - /* - * Go on to the next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * Log the changed part of this block. - * Get the next one. - */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Compute a mask of relevant bits. - */ - bit = 0; - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Set/clear the active bits. - */ - if (val) - *b |= mask; - else - *b &= ~mask; - b++; - } - /* - * Log any remaining changed bytes. - */ - if (b > first) - xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp - 1)); - return 0; -} - -/* - * Read and modify the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - */ -STATIC int /* error */ -xfs_rtmodify_summary( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_buf_t *bp; /* buffer for the summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ - - /* - * Compute entry number in the summary file. - */ - so = XFS_SUMOFFS(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (rbpp && *rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (rbpp && *rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } - } - /* - * Point to the summary information, modify and log it. - */ - sp = XFS_SUMPTR(mp, bp, so); - *sp += delta; - xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), - (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); - return 0; -} - -/* * Visible (exported) functions. */ @@ -2129,66 +1231,6 @@ xfs_rtallocate_extent( } /* - * Free an extent in the realtime subvolume. Length is expressed in - * realtime extents, as is the block number. - */ -int /* error */ -xfs_rtfree_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to free */ - xfs_extlen_t len) /* length of extent freed */ -{ - int error; /* error value */ - xfs_mount_t *mp; /* file system mount structure */ - xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp; /* summary file block buffer */ - - mp = tp->t_mountp; - - ASSERT(mp->m_rbmip->i_itemp != NULL); - ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); - -#ifdef DEBUG - /* - * Check to see that this whole range is currently allocated. - */ - { - int stat; /* result from checking range */ - - error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat); - if (error) { - return error; - } - ASSERT(stat); - } -#endif - sumbp = NULL; - /* - * Free the range of realtime blocks. - */ - error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); - if (error) { - return error; - } - /* - * Mark more blocks free in the superblock. - */ - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); - /* - * If we've now freed all the blocks, reset the file sequence - * number to 0. - */ - if (tp->t_frextents_delta + mp->m_sb.sb_frextents == - mp->m_sb.sb_rextents) { - if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) - mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; - *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; - xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - } - return 0; -} - -/* * Initialize realtime fields in the mount structure. */ int /* error */ diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index b2a1a24c0e2f..752b63d10300 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -95,6 +95,30 @@ xfs_growfs_rt( struct xfs_mount *mp, /* file system mount structure */ xfs_growfs_rt_t *in); /* user supplied growfs struct */ +/* + * From xfs_rtbitmap.c + */ +int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t block, int issum, struct xfs_buf **bpp); +int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, int val, + xfs_rtblock_t *new, int *stat); +int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_rtblock_t limit, + xfs_rtblock_t *rtblock); +int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_rtblock_t limit, + xfs_rtblock_t *rtblock); +int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, int val); +int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, + xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, + xfs_fsblock_t *rsb); +int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb); + + #else # define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c new file mode 100644 index 000000000000..b1f2fe8af4a8 --- /dev/null +++ b/fs/xfs/xfs_rtbitmap.c @@ -0,0 +1,974 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" +#include "xfs_buf.h" +#include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_rtalloc.h" + + +/* + * Realtime allocator bitmap functions shared with userspace. + */ + +/* + * Get a buffer for the bitmap or summary file block specified. + * The buffer is returned read and locked. + */ +int +xfs_rtbuf_get( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t block, /* block number in bitmap or summary */ + int issum, /* is summary not bitmap */ + xfs_buf_t **bpp) /* output: buffer for the block */ +{ + xfs_buf_t *bp; /* block buffer, result */ + xfs_inode_t *ip; /* bitmap or summary inode */ + xfs_bmbt_irec_t map; + int nmap = 1; + int error; /* error value */ + + ip = issum ? mp->m_rsumip : mp->m_rbmip; + + error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); + if (error) + return error; + + ASSERT(map.br_startblock != NULLFSBLOCK); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp, NULL); + if (error) + return error; + ASSERT(!xfs_buf_geterror(bp)); + *bpp = bp; + return 0; +} + +/* + * Searching backward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +int +xfs_rtfind_back( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t firstbit; /* first useful bit in the word */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = start - limit + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit < XFS_NBWORD - 1) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); + mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << + firstbit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = bit - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i = bit - firstbit + 1; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the previous one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ want)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if (len - i) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_NBWORD - (len - i); + mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start - i + 1; + return 0; +} + +/* + * Searching forward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +int +xfs_rtfind_forw( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in the word */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = limit - start + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit) { + /* + * Calculate last (rightmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *rtblock = start + i - 1; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the previous word in the buffer. + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ want)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Calculate mask for all the relevant bits in this word. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start + i - 1; + return 0; +} + +/* + * Read and modify the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. + */ +int +xfs_rtmodify_summary( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + int delta, /* change to make to summary info */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_buf_t *bp; /* buffer for the summary block */ + int error; /* error value */ + xfs_fsblock_t sb; /* summary fsblock */ + int so; /* index into the summary file */ + xfs_suminfo_t *sp; /* pointer to returned data */ + + /* + * Compute entry number in the summary file. + */ + so = XFS_SUMOFFS(mp, log, bbno); + /* + * Compute the block number in the summary file. + */ + sb = XFS_SUMOFFSTOBLOCK(mp, so); + /* + * If we have an old buffer, and the block number matches, use that. + */ + if (rbpp && *rbpp && *rsb == sb) + bp = *rbpp; + /* + * Otherwise we have to get the buffer. + */ + else { + /* + * If there was an old one, get rid of it first. + */ + if (rbpp && *rbpp) + xfs_trans_brelse(tp, *rbpp); + error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); + if (error) { + return error; + } + /* + * Remember this buffer and block for the next call. + */ + if (rbpp) { + *rbpp = bp; + *rsb = sb; + } + } + /* + * Point to the summary information, modify and log it. + */ + sp = XFS_SUMPTR(mp, bp, so); + *sp += delta; + xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), + (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); + return 0; +} + +/* + * Set the given range of bitmap bits to the given value. + * Do whatever I/O and logging is required. + */ +int +xfs_rtmodify_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to modify */ + xfs_extlen_t len, /* length of extent to modify */ + int val) /* 1 for free, 0 for allocated */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtword_t *first; /* first used word in the buffer */ + int i; /* current bit number rel. to start */ + int lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask o frelevant bits for value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number. + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block, and point to its data. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + first = b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zeroes; 1 (free) => all ones. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { + /* + * Compute first bit not changed and mask of relevant bits. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + i = lastbit - bit; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Set the word value correctly. + */ + *b = val; + i += XFS_NBWORD; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Compute a mask of relevant bits. + */ + bit = 0; + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + b++; + } + /* + * Log any remaining changed bytes. + */ + if (b > first) + xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp - 1)); + return 0; +} + +/* + * Mark an extent specified by start and len freed. + * Updates all the summary information as well as the bitmap. + */ +int +xfs_rtfree_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to free */ + xfs_extlen_t len, /* length to free */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_rtblock_t end; /* end of the freed extent */ + int error; /* error value */ + xfs_rtblock_t postblock; /* first block freed > end */ + xfs_rtblock_t preblock; /* first block freed < start */ + + end = start + len - 1; + /* + * Modify the bitmap to mark this extent freed. + */ + error = xfs_rtmodify_range(mp, tp, start, len, 1); + if (error) { + return error; + } + /* + * Assume we're freeing out of the middle of an allocated extent. + * We need to find the beginning and end of the extent so we can + * properly update the summary. + */ + error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + if (error) { + return error; + } + /* + * Find the next allocated block (end of allocated extent). + */ + error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, + &postblock); + if (error) + return error; + /* + * If there are blocks not being freed at the front of the + * old extent, add summary data for them to be allocated. + */ + if (preblock < start) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(start - preblock), + XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * If there are blocks not being freed at the end of the + * old extent, add summary data for them to be allocated. + */ + if (postblock > end) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock - end), + XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * Increment the summary information corresponding to the entire + * (new) free extent. + */ + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); + return error; +} + +/* + * Check that the given range is either all allocated (val = 0) or + * all free (val = 1). + */ +int +xfs_rtcheck_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block number of extent */ + xfs_extlen_t len, /* length of extent */ + int val, /* 1 for free, 0 for allocated */ + xfs_rtblock_t *new, /* out: first block not matching */ + int *stat) /* out: 1 for matches, 0 for not */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zero's; 1 (free) => all one's. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { + /* + * Compute first bit not examined. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + /* + * Mask of relevant bits. + */ + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ val) & mask)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *new = start + i; + *stat = 0; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ val)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *new = start + i; + *stat = 0; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Mask of relevant bits. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ val) & mask)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *new = start + i; + *stat = 0; + return 0; + } else + i = len; + } + /* + * Successful, return. + */ + xfs_trans_brelse(tp, bp); + *new = start + i; + *stat = 1; + return 0; +} + +#ifdef DEBUG +/* + * Check that the given extent (block range) is allocated already. + */ +STATIC int /* error */ +xfs_rtcheck_alloc_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number of extent */ + xfs_extlen_t len) /* length of extent */ +{ + xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ + int stat; + int error; + + error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); + if (error) + return error; + ASSERT(stat); + return 0; +} +#else +#define xfs_rtcheck_alloc_range(m,t,b,l) (0) +#endif +/* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len) /* length of extent freed */ +{ + int error; /* error value */ + xfs_mount_t *mp; /* file system mount structure */ + xfs_fsblock_t sb; /* summary file block number */ + xfs_buf_t *sumbp = NULL; /* summary file block buffer */ + + mp = tp->t_mountp; + + ASSERT(mp->m_rbmip->i_itemp != NULL); + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + + error = xfs_rtcheck_alloc_range(mp, tp, bno, len); + if (error) + return error; + + /* + * Free the range of realtime blocks. + */ + error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); + if (error) { + return error; + } + /* + * Mark more blocks free in the superblock. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); + /* + * If we've now freed all the blocks, reset the file sequence + * number to 0. + */ + if (tp->t_frextents_delta + mp->m_sb.sb_frextents == + mp->m_sb.sb_rextents) { + if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) + mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; + xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); + } + return 0; +} + diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index a5b59d92eb70..b7c9aea77f8f 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -17,34 +17,26 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" -#include "xfs_rtalloc.h" -#include "xfs_bmap.h" #include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_fsops.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_dinode.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -249,13 +241,13 @@ xfs_mount_validate_sb( if (xfs_sb_version_has_pquotino(sbp)) { if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, - "Version 5 of Super block has XFS_OQUOTA bits.\n"); + "Version 5 of Super block has XFS_OQUOTA bits."); return XFS_ERROR(EFSCORRUPTED); } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, -"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.\n"); +"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); return XFS_ERROR(EFSCORRUPTED); } @@ -596,6 +588,11 @@ xfs_sb_verify( * single bit error could clear the feature bit and unused parts of the * superblock are supposed to be zero. Hence a non-null crc field indicates that * we've potentially lost a feature bit and we should check it anyway. + * + * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the + * last field in V4 secondary superblocks. So for secondary superblocks, + * we are more forgiving, and ignore CRC failures if the primary doesn't + * indicate that the fs version is V5. */ static void xfs_sb_read_verify( @@ -616,16 +613,21 @@ xfs_sb_read_verify( if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), offsetof(struct xfs_sb, sb_crc))) { - error = EFSCORRUPTED; - goto out_error; + /* Only fail bad secondaries on a known V5 filesystem */ + if (bp->b_bn != XFS_SB_DADDR && + xfs_sb_version_hascrc(&mp->m_sb)) { + error = EFSCORRUPTED; + goto out_error; + } } } error = xfs_sb_verify(bp, true); out_error: if (error) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); + if (error != EWRONGFS) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, + mp, bp->b_addr); xfs_buf_ioerror(bp, error); } } diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 6835b44f850e..35061d4b614c 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -699,7 +699,4 @@ extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); -extern const struct xfs_buf_ops xfs_sb_buf_ops; -extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; - #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h new file mode 100644 index 000000000000..8c5035a13df1 --- /dev/null +++ b/fs/xfs/xfs_shared.h @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SHARED_H__ +#define __XFS_SHARED_H__ + +/* + * Definitions shared between kernel and userspace that don't fit into any other + * header file that is shared with userspace. + */ +struct xfs_ifork; +struct xfs_buf; +struct xfs_buf_ops; +struct xfs_mount; +struct xfs_trans; +struct xfs_inode; + +/* + * Buffer verifier operations are widely used, including userspace tools + */ +extern const struct xfs_buf_ops xfs_agf_buf_ops; +extern const struct xfs_buf_ops xfs_agi_buf_ops; +extern const struct xfs_buf_ops xfs_agf_buf_ops; +extern const struct xfs_buf_ops xfs_agfl_buf_ops; +extern const struct xfs_buf_ops xfs_allocbt_buf_ops; +extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; +extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; +extern const struct xfs_buf_ops xfs_bmbt_buf_ops; +extern const struct xfs_buf_ops xfs_da3_node_buf_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ops; +extern const struct xfs_buf_ops xfs_symlink_buf_ops; +extern const struct xfs_buf_ops xfs_agi_buf_ops; +extern const struct xfs_buf_ops xfs_inobt_buf_ops; +extern const struct xfs_buf_ops xfs_inode_buf_ops; +extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ops; +extern const struct xfs_buf_ops xfs_sb_buf_ops; +extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; +extern const struct xfs_buf_ops xfs_symlink_buf_ops; + +/* + * Transaction types. Used to distinguish types of buffers. These never reach + * the log. + */ +#define XFS_TRANS_SETATTR_NOT_SIZE 1 +#define XFS_TRANS_SETATTR_SIZE 2 +#define XFS_TRANS_INACTIVE 3 +#define XFS_TRANS_CREATE 4 +#define XFS_TRANS_CREATE_TRUNC 5 +#define XFS_TRANS_TRUNCATE_FILE 6 +#define XFS_TRANS_REMOVE 7 +#define XFS_TRANS_LINK 8 +#define XFS_TRANS_RENAME 9 +#define XFS_TRANS_MKDIR 10 +#define XFS_TRANS_RMDIR 11 +#define XFS_TRANS_SYMLINK 12 +#define XFS_TRANS_SET_DMATTRS 13 +#define XFS_TRANS_GROWFS 14 +#define XFS_TRANS_STRAT_WRITE 15 +#define XFS_TRANS_DIOSTRAT 16 +/* 17 was XFS_TRANS_WRITE_SYNC */ +#define XFS_TRANS_WRITEID 18 +#define XFS_TRANS_ADDAFORK 19 +#define XFS_TRANS_ATTRINVAL 20 +#define XFS_TRANS_ATRUNCATE 21 +#define XFS_TRANS_ATTR_SET 22 +#define XFS_TRANS_ATTR_RM 23 +#define XFS_TRANS_ATTR_FLAG 24 +#define XFS_TRANS_CLEAR_AGI_BUCKET 25 +#define XFS_TRANS_QM_SBCHANGE 26 +/* + * Dummy entries since we use the transaction type to index into the + * trans_type[] in xlog_recover_print_trans_head() + */ +#define XFS_TRANS_DUMMY1 27 +#define XFS_TRANS_DUMMY2 28 +#define XFS_TRANS_QM_QUOTAOFF 29 +#define XFS_TRANS_QM_DQALLOC 30 +#define XFS_TRANS_QM_SETQLIM 31 +#define XFS_TRANS_QM_DQCLUSTER 32 +#define XFS_TRANS_QM_QINOCREATE 33 +#define XFS_TRANS_QM_QUOTAOFF_END 34 +#define XFS_TRANS_SB_UNIT 35 +#define XFS_TRANS_FSYNC_TS 36 +#define XFS_TRANS_GROWFSRT_ALLOC 37 +#define XFS_TRANS_GROWFSRT_ZERO 38 +#define XFS_TRANS_GROWFSRT_FREE 39 +#define XFS_TRANS_SWAPEXT 40 +#define XFS_TRANS_SB_COUNT 41 +#define XFS_TRANS_CHECKPOINT 42 +#define XFS_TRANS_ICREATE 43 +#define XFS_TRANS_TYPE_MAX 43 +/* new transaction types need to be reflected in xfs_logprint(8) */ + +#define XFS_TRANS_TYPES \ + { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \ + { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ + { XFS_TRANS_INACTIVE, "INACTIVE" }, \ + { XFS_TRANS_CREATE, "CREATE" }, \ + { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ + { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ + { XFS_TRANS_REMOVE, "REMOVE" }, \ + { XFS_TRANS_LINK, "LINK" }, \ + { XFS_TRANS_RENAME, "RENAME" }, \ + { XFS_TRANS_MKDIR, "MKDIR" }, \ + { XFS_TRANS_RMDIR, "RMDIR" }, \ + { XFS_TRANS_SYMLINK, "SYMLINK" }, \ + { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \ + { XFS_TRANS_GROWFS, "GROWFS" }, \ + { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \ + { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \ + { XFS_TRANS_WRITEID, "WRITEID" }, \ + { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \ + { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \ + { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \ + { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \ + { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ + { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ + { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ + { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ + { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ + { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ + { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ + { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ + { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ + { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ + { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ + { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ + { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ + { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ + { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ + { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ + { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ + { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ + { XFS_TRANS_DUMMY1, "DUMMY1" }, \ + { XFS_TRANS_DUMMY2, "DUMMY2" }, \ + { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } + +/* + * This structure is used to track log items associated with + * a transaction. It points to the log item and keeps some + * flags to track the state of the log item. It also tracks + * the amount of space needed to log the item it describes + * once we get to commit processing (see xfs_trans_commit()). + */ +struct xfs_log_item_desc { + struct xfs_log_item *lid_item; + struct list_head lid_trans; + unsigned char lid_flags; +}; + +#define XFS_LID_DIRTY 0x1 + +/* log size calculation functions */ +int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); +int xfs_log_calc_minimum_size(struct xfs_mount *); + + +/* + * Values for t_flags. + */ +#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ +#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ +#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ +#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ +#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ +#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ +#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer + count in superblock */ +/* + * Values for call flags parameter. + */ +#define XFS_TRANS_RELEASE_LOG_RES 0x4 +#define XFS_TRANS_ABORT 0x8 + +/* + * Field values for xfs_trans_mod_sb. + */ +#define XFS_TRANS_SB_ICOUNT 0x00000001 +#define XFS_TRANS_SB_IFREE 0x00000002 +#define XFS_TRANS_SB_FDBLOCKS 0x00000004 +#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008 +#define XFS_TRANS_SB_FREXTENTS 0x00000010 +#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020 +#define XFS_TRANS_SB_DBLOCKS 0x00000040 +#define XFS_TRANS_SB_AGCOUNT 0x00000080 +#define XFS_TRANS_SB_IMAXPCT 0x00000100 +#define XFS_TRANS_SB_REXTSIZE 0x00000200 +#define XFS_TRANS_SB_RBMBLOCKS 0x00000400 +#define XFS_TRANS_SB_RBLOCKS 0x00000800 +#define XFS_TRANS_SB_REXTENTS 0x00001000 +#define XFS_TRANS_SB_REXTSLOG 0x00002000 + +/* + * Here we centralize the specification of XFS meta-data buffer reference count + * values. This determines how hard the buffer cache tries to hold onto the + * buffer. + */ +#define XFS_AGF_REF 4 +#define XFS_AGI_REF 4 +#define XFS_AGFL_REF 3 +#define XFS_INO_BTREE_REF 3 +#define XFS_ALLOC_BTREE_REF 2 +#define XFS_BMAP_BTREE_REF 2 +#define XFS_DIR_BTREE_REF 2 +#define XFS_INO_REF 2 +#define XFS_ATTR_BTREE_REF 1 +#define XFS_DQUOT_REF 1 + +/* + * Flags for xfs_trans_ichgtime(). + */ +#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ +#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ + + +/* + * Symlink decoding/encoding functions + */ +int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); +int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, + uint32_t size, struct xfs_buf *bp); +bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, + uint32_t size, struct xfs_buf *bp); +void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_inode *ip, struct xfs_ifork *ifp); + +#endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8968f5036fa1..f317488263dd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -17,34 +17,26 @@ */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_ialloc.h" #include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_alloc.h" #include "xfs_error.h" -#include "xfs_itable.h" #include "xfs_fsops.h" -#include "xfs_attr.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_trans_priv.h" -#include "xfs_filestream.h" #include "xfs_da_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" @@ -52,6 +44,9 @@ #include "xfs_icache.h" #include "xfs_trace.h" #include "xfs_icreate_item.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h" +#include "xfs_quota.h" #include <linux/namei.h> #include <linux/init.h> @@ -946,10 +941,6 @@ xfs_fs_destroy_inode( XFS_STATS_INC(vn_reclaim); - /* bad inode, get out here ASAP */ - if (is_bad_inode(inode)) - goto out_reclaim; - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); /* @@ -965,7 +956,6 @@ xfs_fs_destroy_inode( * this more efficiently than we can here, so simply let background * reclaim tear down all inodes. */ -out_reclaim: xfs_inode_set_reclaim_tag(ip); } @@ -1165,7 +1155,7 @@ xfs_restore_resvblks(struct xfs_mount *mp) * Note: xfs_log_quiesce() stops background log work - the callers must ensure * it is started again when appropriate. */ -void +static void xfs_quiesce_attr( struct xfs_mount *mp) { @@ -1246,7 +1236,7 @@ xfs_fs_remount( */ #if 0 xfs_info(mp, - "mount option \"%s\" not supported for remount\n", p); + "mount option \"%s\" not supported for remount", p); return -EINVAL; #else break; @@ -1491,10 +1481,6 @@ xfs_fs_fill_super( error = ENOENT; goto out_unmount; } - if (is_bad_inode(root)) { - error = EINVAL; - goto out_unmount; - } sb->s_root = d_make_root(root); if (!sb->s_root) { error = ENOMEM; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index f622a97a7e33..14e58f2c96bd 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -17,31 +17,31 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_fs.h" #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" +#include "xfs_da_format.h" #include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap_util.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_symlink.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" +#include "xfs_log.h" +#include "xfs_dinode.h" /* ----- Kernel only functions below ----- */ STATIC int @@ -424,8 +424,7 @@ xfs_symlink( */ STATIC int xfs_inactive_symlink_rmt( - xfs_inode_t *ip, - xfs_trans_t **tpp) + struct xfs_inode *ip) { xfs_buf_t *bp; int committed; @@ -437,11 +436,9 @@ xfs_inactive_symlink_rmt( xfs_mount_t *mp; xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; int nmaps; - xfs_trans_t *ntp; int size; xfs_trans_t *tp; - tp = *tpp; mp = ip->i_mount; ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); /* @@ -453,6 +450,16 @@ xfs_inactive_symlink_rmt( */ ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); + tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + /* * Lock the inode, fix the size, and join it to the transaction. * Hold it so in the normal path, we still have it locked for @@ -471,7 +478,7 @@ xfs_inactive_symlink_rmt( error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), mval, &nmaps, 0); if (error) - goto error0; + goto error_trans_cancel; /* * Invalidate the block(s). No validation is done. */ @@ -481,22 +488,24 @@ xfs_inactive_symlink_rmt( XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { error = ENOMEM; - goto error1; + goto error_bmap_cancel; } xfs_trans_binval(tp, bp); } /* * Unmap the dead block(s) to the free_list. */ - if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done))) - goto error1; + error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, + &first_block, &free_list, &done); + if (error) + goto error_bmap_cancel; ASSERT(done); /* * Commit the first transaction. This logs the EFI and the inode. */ - if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) - goto error1; + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + goto error_bmap_cancel; /* * The transaction must have been committed, since there were * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. @@ -511,26 +520,13 @@ xfs_inactive_symlink_rmt( xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* - * Get a new, empty transaction to return to our caller. - */ - ntp = xfs_trans_dup(tp); - /* * Commit the transaction containing extent freeing and EFDs. - * If we get an error on the commit here or on the reserve below, - * we need to unlock the inode since the new transaction doesn't - * have the inode attached. */ - error = xfs_trans_commit(tp, 0); - tp = ntp; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - goto error0; + goto error_unlock; } - /* - * transaction commit worked ok so we can drop the extra ticket - * reference that we gained in xfs_trans_dup() - */ - xfs_log_ticket_put(tp->t_ticket); /* * Remove the memory for extent descriptions (just bookkeeping). @@ -538,23 +534,16 @@ xfs_inactive_symlink_rmt( if (ip->i_df.if_bytes) xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); - /* - * Put an itruncate log reservation in the new transaction - * for our caller. - */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); - if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - goto error0; - } - xfs_trans_ijoin(tp, ip, 0); - *tpp = tp; + xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; - error1: +error_bmap_cancel: xfs_bmap_cancel(&free_list); - error0: +error_trans_cancel: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); +error_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -563,41 +552,46 @@ xfs_inactive_symlink_rmt( */ int xfs_inactive_symlink( - struct xfs_inode *ip, - struct xfs_trans **tp) + struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; int pathlen; trace_xfs_inactive_symlink(ip); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + xfs_ilock(ip, XFS_ILOCK_EXCL); + /* * Zero length symlinks _can_ exist. */ pathlen = (int)ip->i_d.di_size; - if (!pathlen) + if (!pathlen) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; + } if (pathlen < 0 || pathlen > MAXPATHLEN) { xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", __func__, (unsigned long long)ip->i_ino, pathlen); + xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(0); return XFS_ERROR(EFSCORRUPTED); } if (ip->i_df.if_flags & XFS_IFINLINE) { - if (ip->i_df.if_bytes > 0) + if (ip->i_df.if_bytes > 0) xfs_idata_realloc(ip, -(ip->i_df.if_bytes), XFS_DATA_FORK); + xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(ip->i_df.if_bytes == 0); return 0; } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* remove the remote symlink */ - return xfs_inactive_symlink_rmt(ip, tp); + return xfs_inactive_symlink_rmt(ip); } diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index 99338ba666ac..e75245d09116 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -22,6 +22,6 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp); +int xfs_inactive_symlink(struct xfs_inode *ip); #endif /* __XFS_SYMLINK_H */ diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c index 01c85e3f6470..bf59a2b45f8c 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/xfs_symlink_remote.c @@ -19,8 +19,9 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_ag.h" #include "xfs_sb.h" #include "xfs_mount.h" @@ -30,6 +31,7 @@ #include "xfs_trace.h" #include "xfs_symlink.h" #include "xfs_cksum.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 5d7b3e40705f..dee3279c095e 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -17,19 +17,16 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_ialloc.h" #include "xfs_itable.h" @@ -37,6 +34,8 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" +#include "xfs_trans.h" +#include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_buf_item.h" #include "xfs_quota.h" @@ -46,6 +45,7 @@ #include "xfs_dquot.h" #include "xfs_log_recover.h" #include "xfs_inode_item.h" +#include "xfs_bmap_btree.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 47910e638c18..425dfa45b9a0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -31,8 +31,8 @@ struct xfs_da_args; struct xfs_da_node_entry; struct xfs_dquot; struct xfs_log_item; -struct xlog_ticket; struct xlog; +struct xlog_ticket; struct xlog_recover; struct xlog_recover_item; struct xfs_buf_log_format; @@ -135,6 +135,31 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks); +DECLARE_EVENT_CLASS(xfs_ag_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), + TP_ARGS(mp, agno), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + ), + TP_printk("dev %d:%d agno %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno) +); +#define DEFINE_AG_EVENT(name) \ +DEFINE_EVENT(xfs_ag_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), \ + TP_ARGS(mp, agno)) + +DEFINE_AG_EVENT(xfs_read_agf); +DEFINE_AG_EVENT(xfs_alloc_read_agf); +DEFINE_AG_EVENT(xfs_read_agi); +DEFINE_AG_EVENT(xfs_ialloc_read_agi); + TRACE_EVENT(xfs_attr_list_node_descend, TP_PROTO(struct xfs_attr_list_context *ctx, struct xfs_da_node_entry *btree), @@ -938,6 +963,63 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); +DECLARE_EVENT_CLASS(xfs_ail_class, + TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), + TP_ARGS(lip, old_lsn, new_lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(void *, lip) + __field(uint, type) + __field(uint, flags) + __field(xfs_lsn_t, old_lsn) + __field(xfs_lsn_t, new_lsn) + ), + TP_fast_assign( + __entry->dev = lip->li_mountp->m_super->s_dev; + __entry->lip = lip; + __entry->type = lip->li_type; + __entry->flags = lip->li_flags; + __entry->old_lsn = old_lsn; + __entry->new_lsn = new_lsn; + ), + TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->lip, + CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), + CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_AIL_EVENT(name) \ +DEFINE_EVENT(xfs_ail_class, name, \ + TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), \ + TP_ARGS(lip, old_lsn, new_lsn)) +DEFINE_AIL_EVENT(xfs_ail_insert); +DEFINE_AIL_EVENT(xfs_ail_move); +DEFINE_AIL_EVENT(xfs_ail_delete); + +TRACE_EVENT(xfs_log_assign_tail_lsn, + TP_PROTO(struct xlog *log, xfs_lsn_t new_lsn), + TP_ARGS(log, new_lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_lsn_t, new_lsn) + __field(xfs_lsn_t, old_lsn) + __field(xfs_lsn_t, last_sync_lsn) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->new_lsn = new_lsn; + __entry->old_lsn = atomic64_read(&log->l_tail_lsn); + __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); + ), + TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), + CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), + CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn)) +) DECLARE_EVENT_CLASS(xfs_file_class, TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 5411e01ab452..c812c5c060de 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -18,32 +18,21 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_alloc.h" #include "xfs_extent_busy.h" -#include "xfs_bmap.h" #include "xfs_quota.h" -#include "xfs_qm.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_trans_space.h" -#include "xfs_inode_item.h" -#include "xfs_log_priv.h" -#include "xfs_buf_item.h" +#include "xfs_log.h" #include "xfs_trace.h" +#include "xfs_error.h" kmem_zone_t *xfs_trans_zone; kmem_zone_t *xfs_log_item_desc_zone; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 09cf40b89e8c..9b96d35e483d 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -18,10 +18,6 @@ #ifndef __XFS_TRANS_H__ #define __XFS_TRANS_H__ -struct xfs_log_item; - -#include "xfs_trans_resv.h" - /* kernel only transaction subsystem defines */ struct xfs_buf; @@ -77,6 +73,9 @@ struct xfs_item_ops { void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); }; +void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, + int type, const struct xfs_item_ops *ops); + /* * Return values for the iop_push() routines. */ @@ -85,18 +84,12 @@ struct xfs_item_ops { #define XFS_ITEM_LOCKED 2 #define XFS_ITEM_FLUSHING 3 -/* - * This is the type of function which can be given to xfs_trans_callback() - * to be called upon the transaction's commit to disk. - */ -typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); /* * This is the structure maintained for every active transaction. */ typedef struct xfs_trans { unsigned int t_magic; /* magic number */ - xfs_log_callback_t t_logcb; /* log callback struct */ unsigned int t_type; /* transaction type */ unsigned int t_log_res; /* amt of log space resvd */ unsigned int t_log_count; /* count for perm log res */ @@ -132,7 +125,6 @@ typedef struct xfs_trans { int64_t t_rextents_delta;/* superblocks rextents chg */ int64_t t_rextslog_delta;/* superblocks rextslog chg */ struct list_head t_items; /* log item descriptors */ - xfs_trans_header_t t_header; /* header for in-log trans */ struct list_head t_busy; /* list of busy extents */ unsigned long t_pflags; /* saved process flags state */ } xfs_trans_t; @@ -237,10 +229,16 @@ void xfs_trans_log_efd_extent(xfs_trans_t *, xfs_fsblock_t, xfs_extlen_t); int xfs_trans_commit(xfs_trans_t *, uint flags); +int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); +void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, + enum xfs_blft); +void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, + struct xfs_buf *src_bp); + extern kmem_zone_t *xfs_trans_zone; extern kmem_zone_t *xfs_log_item_desc_zone; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 21c6d7ddbc06..a7287354e535 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -18,15 +18,16 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_trace.h" #include "xfs_error.h" +#include "xfs_log.h" #ifdef DEBUG /* @@ -658,11 +659,13 @@ xfs_trans_ail_update_bulk( if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) continue; + trace_xfs_ail_move(lip, lip->li_lsn, lsn); xfs_ail_delete(ailp, lip); if (mlip == lip) mlip_changed = 1; } else { lip->li_flags |= XFS_LI_IN_AIL; + trace_xfs_ail_insert(lip, 0, lsn); } lip->li_lsn = lsn; list_add(&lip->li_ail, &tmp); @@ -731,6 +734,7 @@ xfs_trans_ail_delete_bulk( return; } + trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); xfs_ail_delete(ailp, lip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8c75b8f67270..c035d11b7734 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -17,17 +17,15 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 54ee3c5dee76..cd2a10e15d3a 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -17,23 +17,18 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" +#include "xfs_quota.h" #include "xfs_qm.h" STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 8d71b16eccae..47978ba89dae 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -17,12 +17,13 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_extfree_item.h" diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 53dfe46f3680..50c3f5614288 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -17,18 +17,15 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" #include "xfs_trace.h" @@ -114,12 +111,14 @@ xfs_trans_log_inode( /* * First time we log the inode in a transaction, bump the inode change - * counter if it is configured for this to occur. + * counter if it is configured for this to occur. We don't use + * inode_inc_version() because there is no need for extra locking around + * i_version as we already hold the inode locked exclusively for + * metadata modification. */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - inode_inc_iversion(VFS_I(ip)); - ip->i_d.di_changecount = VFS_I(ip)->i_version; + ip->i_d.di_changecount = ++VFS_I(ip)->i_version; flags |= XFS_ILOG_CORE; } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index c52def0b441c..12e86af9d9b9 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -27,7 +27,6 @@ struct xfs_log_vec; void xfs_trans_init(struct xfs_mount *); -int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index a65a3cc40610..2fd59c0dae66 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -18,27 +18,19 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_ialloc.h" -#include "xfs_alloc.h" -#include "xfs_extent_busy.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_quota.h" +#include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_trans_space.h" #include "xfs_trace.h" @@ -393,8 +385,7 @@ xfs_calc_ifree_reservation( xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), - XFS_INODE_CLUSTER_SIZE(mp)) + + max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + xfs_calc_buf_res(1, 0) + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels, 0) + diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h index db14d0c08682..3e8e797c6d11 100644 --- a/fs/xfs/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h @@ -25,14 +25,6 @@ struct xfs_inode; struct attrlist_cursor_kern; /* - * Return values for xfs_inactive. A return value of - * VN_INACTIVE_NOCACHE implies that the file system behavior - * has disassociated its state and bhv_desc_t from the vnode. - */ -#define VN_INACTIVE_CACHE 0 -#define VN_INACTIVE_NOCACHE 1 - -/* * Flags for read/write calls - same values as IRIX */ #define IO_ISDIRECT 0x00004 /* bypass page cache */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index e01f35ea76ba..9d479073ba41 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -17,9 +17,13 @@ */ #include "xfs.h" +#include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" |