diff options
Diffstat (limited to 'fs')
291 files changed, 8141 insertions, 2493 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 1964f98e74be..b85efa773949 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -594,21 +594,21 @@ static int __init init_v9fs(void) int err; pr_info("Installing v9fs 9p2000 file system support\n"); /* TODO: Setup list of registered trasnport modules */ - err = register_filesystem(&v9fs_fs_type); - if (err < 0) { - pr_err("Failed to register filesystem\n"); - return err; - } err = v9fs_cache_register(); if (err < 0) { pr_err("Failed to register v9fs for caching\n"); - goto out_fs_unreg; + return err; } err = v9fs_sysfs_init(); if (err < 0) { pr_err("Failed to register with sysfs\n"); + goto out_cache; + } + err = register_filesystem(&v9fs_fs_type); + if (err < 0) { + pr_err("Failed to register filesystem\n"); goto out_sysfs_cleanup; } @@ -617,8 +617,8 @@ static int __init init_v9fs(void) out_sysfs_cleanup: v9fs_sysfs_cleanup(); -out_fs_unreg: - unregister_filesystem(&v9fs_fs_type); +out_cache: + v9fs_cache_unregister(); return err; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7b0cd87b07c2..10b7d3c9dba8 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto release_sb; } - root = d_alloc_root(inode); + root = d_make_root(inode); if (!root) { - iput(inode); retval = -ENOMEM; goto release_sb; } diff --git a/fs/Kconfig b/fs/Kconfig index d621f02a3f9e..f95ae3a027f3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -4,6 +4,10 @@ menu "File systems" +# Use unaligned word dcache accesses +config DCACHE_WORD_ACCESS + bool + if BLOCK source "fs/ext2/Kconfig" @@ -210,6 +214,7 @@ source "fs/minix/Kconfig" source "fs/omfs/Kconfig" source "fs/hpfs/Kconfig" source "fs/qnx4/Kconfig" +source "fs/qnx6/Kconfig" source "fs/romfs/Kconfig" source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 93804d4d66e1..2fb977934673 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/ obj-$(CONFIG_AFFS_FS) += affs/ obj-$(CONFIG_ROMFS_FS) += romfs/ obj-$(CONFIG_QNX4FS_FS) += qnx4/ +obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 8e3b36ace305..06fdcc9382c4 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_d_op = &adfs_dentry_operations; root = adfs_iget(sb, &root_obj); - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { int i; - iput(root); for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); diff --git a/fs/affs/super.c b/fs/affs/super.c index 8ba73fed7964..0782653a05a2 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -473,7 +473,7 @@ got_root: root_inode = affs_iget(sb, root_block); if (IS_ERR(root_inode)) { ret = PTR_ERR(root_inode); - goto out_error_noinode; + goto out_error; } if (AFFS_SB(sb)->s_flags & SF_INTL) @@ -481,7 +481,7 @@ got_root: else sb->s_d_op = &affs_dentry_operations; - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (!sb->s_root) { printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; @@ -494,9 +494,6 @@ got_root: * Begin the cascaded cleanup ... */ out_error: - if (root_inode) - iput(root_inode); -out_error_noinode: kfree(sbi->s_bitmap); affs_brelse(root_bh); kfree(sbi->s_prefix); diff --git a/fs/afs/file.c b/fs/afs/file.c index 14d89fa58fee..8f6e9234d565 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping, ASSERT(key != NULL); vnode = AFS_FS_I(mapping->host); - if (vnode->flags & AFS_VNODE_DELETED) { + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { _leave(" = -ESTALE"); return -ESTALE; } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 2f213d109c21..b960ff05ea0b 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, _debug("extract data"); if (call->count > 0) { page = call->reply3; - buffer = kmap_atomic(page, KM_USER0); + buffer = kmap_atomic(page); ret = afs_extract_data(call, skb, last, buffer, call->count); - kunmap_atomic(buffer, KM_USER0); + kunmap_atomic(buffer); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; - buffer = kmap_atomic(page, KM_USER0); + buffer = kmap_atomic(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap_atomic(buffer, KM_USER0); + kunmap_atomic(buffer); } _leave(" = 0 [done]"); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d2b0888126d4..a306bb6d88d9 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -109,7 +109,7 @@ struct afs_call { unsigned reply_size; /* current size of reply */ unsigned first_offset; /* offset into mapping[first] */ unsigned last_to; /* amount of mapping[last] */ - unsigned short offset; /* offset into received data store */ + unsigned offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 8f4ce2658b7d..298cf8919ec7 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) if (PageError(page)) goto error; - buf = kmap_atomic(page, KM_USER0); + buf = kmap_atomic(page); memcpy(devname, buf, size); - kunmap_atomic(buf, KM_USER0); + kunmap_atomic(buf); page_cache_release(page); page = NULL; } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index e45a323aebb4..8ad8c2a0703a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct msghdr msg; struct kvec iov[1]; int ret; + struct sk_buff *skb; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, error_do_abort: rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); + while ((skb = skb_dequeue(&call->rx_queue))) + afs_free_skb(skb); rxrpc_kernel_end_call(rxcall); call->rxcall = NULL; error_kill_call: diff --git a/fs/afs/super.c b/fs/afs/super.c index 983ec59fc80d..f02b31e7e648 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb, { struct afs_super_info *as = sb->s_fs_info; struct afs_fid fid; - struct dentry *root = NULL; struct inode *inode = NULL; int ret; @@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb, set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; - root = d_alloc_root(inode); - if (!root) + sb->s_root = d_make_root(inode); + if (!sb->s_root) goto error; sb->s_d_op = &afs_fs_dentry_operations; - sb->s_root = root; _leave(" = 0"); return 0; error: - iput(inode); _leave(" = %d", ret); return ret; } @@ -160,7 +160,7 @@ static int aio_setup_ring(struct kioctx *ctx) info->nr = nr_events; /* trusted copy */ - ring = kmap_atomic(info->ring_pages[0], KM_USER0); + ring = kmap_atomic(info->ring_pages[0]); ring->nr = nr_events; /* user copy */ ring->id = ctx->user_id; ring->head = ring->tail = 0; @@ -168,47 +168,38 @@ static int aio_setup_ring(struct kioctx *ctx) ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->header_length = sizeof(struct aio_ring); - kunmap_atomic(ring, KM_USER0); + kunmap_atomic(ring); return 0; } /* aio_ring_event: returns a pointer to the event at the given index from - * kmap_atomic(, km). Release the pointer with put_aio_ring_event(); + * kmap_atomic(). Release the pointer with put_aio_ring_event(); */ #define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -#define aio_ring_event(info, nr, km) ({ \ +#define aio_ring_event(info, nr) ({ \ unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ struct io_event *__event; \ __event = kmap_atomic( \ - (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ + (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \ __event += pos % AIO_EVENTS_PER_PAGE; \ __event; \ }) -#define put_aio_ring_event(event, km) do { \ +#define put_aio_ring_event(event) do { \ struct io_event *__event = (event); \ (void)__event; \ - kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ + kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \ } while(0) static void ctx_rcu_free(struct rcu_head *head) { struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); - unsigned nr_events = ctx->max_reqs; - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } } /* __put_ioctx @@ -217,23 +208,23 @@ static void ctx_rcu_free(struct rcu_head *head) */ static void __put_ioctx(struct kioctx *ctx) { + unsigned nr_events = ctx->max_reqs; BUG_ON(ctx->reqs_active); - cancel_delayed_work(&ctx->wq); - cancel_work_sync(&ctx->wq.work); + cancel_delayed_work_sync(&ctx->wq); aio_free_ring(ctx); mmdrop(ctx->mm); ctx->mm = NULL; + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } pr_debug("__put_ioctx: freeing %p\n", ctx); call_rcu(&ctx->rcu_head, ctx_rcu_free); } -static inline void get_ioctx(struct kioctx *kioctx) -{ - BUG_ON(atomic_read(&kioctx->users) <= 0); - atomic_inc(&kioctx->users); -} - static inline int try_get_ioctx(struct kioctx *kioctx) { return atomic_inc_not_zero(&kioctx->users); @@ -253,7 +244,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) { struct mm_struct *mm; struct kioctx *ctx; - int did_sync = 0; + int err = -ENOMEM; /* Prevent overflows */ if ((nr_events > (0x10000000U / sizeof(struct io_event))) || @@ -262,7 +253,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ERR_PTR(-EINVAL); } - if ((unsigned long)nr_events > aio_max_nr) + if (!nr_events || (unsigned long)nr_events > aio_max_nr) return ERR_PTR(-EAGAIN); ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); @@ -273,7 +264,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) mm = ctx->mm = current->mm; atomic_inc(&mm->mm_count); - atomic_set(&ctx->users, 1); + atomic_set(&ctx->users, 2); spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->ring_info.ring_lock); init_waitqueue_head(&ctx->wait); @@ -286,25 +277,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) goto out_freectx; /* limit the number of system wide aios */ - do { - spin_lock_bh(&aio_nr_lock); - if (aio_nr + nr_events > aio_max_nr || - aio_nr + nr_events < aio_nr) - ctx->max_reqs = 0; - else - aio_nr += ctx->max_reqs; - spin_unlock_bh(&aio_nr_lock); - if (ctx->max_reqs || did_sync) - break; - - /* wait for rcu callbacks to have completed before giving up */ - synchronize_rcu(); - did_sync = 1; - ctx->max_reqs = nr_events; - } while (1); - - if (ctx->max_reqs == 0) + spin_lock(&aio_nr_lock); + if (aio_nr + nr_events > aio_max_nr || + aio_nr + nr_events < aio_nr) { + spin_unlock(&aio_nr_lock); goto out_cleanup; + } + aio_nr += ctx->max_reqs; + spin_unlock(&aio_nr_lock); /* now link into global list. */ spin_lock(&mm->ioctx_lock); @@ -316,16 +296,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ctx; out_cleanup: - __put_ioctx(ctx); - return ERR_PTR(-EAGAIN); - + err = -EAGAIN; + aio_free_ring(ctx); out_freectx: mmdrop(mm); kmem_cache_free(kioctx_cachep, ctx); - ctx = ERR_PTR(-ENOMEM); - - dprintk("aio: error allocating ioctx %p\n", ctx); - return ctx; + dprintk("aio: error allocating ioctx %d\n", err); + return ERR_PTR(err); } /* aio_cancel_all @@ -413,10 +390,6 @@ void exit_aio(struct mm_struct *mm) aio_cancel_all(ctx); wait_for_all_aios(ctx); - /* - * Ensure we don't leave the ctx on the aio_wq - */ - cancel_work_sync(&ctx->wq.work); if (1 != atomic_read(&ctx->users)) printk(KERN_DEBUG @@ -490,6 +463,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; } + if (unlikely(!ctx->reqs_active && ctx->dead)) + wake_up_all(&ctx->wait); spin_unlock_irq(&ctx->ctx_lock); } @@ -607,11 +582,16 @@ static void aio_fput_routine(struct work_struct *data) fput(req->ki_filp); /* Link the iocb into the context's free list */ + rcu_read_lock(); spin_lock_irq(&ctx->ctx_lock); really_put_req(ctx, req); + /* + * at that point ctx might've been killed, but actual + * freeing is RCU'd + */ spin_unlock_irq(&ctx->ctx_lock); + rcu_read_unlock(); - put_ioctx(ctx); spin_lock_irq(&fput_lock); } spin_unlock_irq(&fput_lock); @@ -642,7 +622,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * this function will be executed w/out any aio kthread wakeup. */ if (unlikely(!fput_atomic(req->ki_filp))) { - get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); @@ -920,7 +899,7 @@ static void aio_kick_handler(struct work_struct *work) unuse_mm(mm); set_fs(oldfs); /* - * we're in a worker thread already, don't use queue_delayed_work, + * we're in a worker thread already; no point using non-zero delay */ if (requeue) queue_delayed_work(aio_wq, &ctx->wq, 0); @@ -1019,10 +998,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2) if (kiocbIsCancelled(iocb)) goto put_rq; - ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); + ring = kmap_atomic(info->ring_pages[0]); tail = info->tail; - event = aio_ring_event(info, tail, KM_IRQ0); + event = aio_ring_event(info, tail); if (++tail >= info->nr) tail = 0; @@ -1043,8 +1022,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2) info->tail = tail; ring->tail = tail; - put_aio_ring_event(event, KM_IRQ0); - kunmap_atomic(ring, KM_IRQ1); + put_aio_ring_event(event); + kunmap_atomic(ring); pr_debug("added to ring %p at [%lu]\n", iocb, tail); @@ -1089,7 +1068,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) unsigned long head; int ret = 0; - ring = kmap_atomic(info->ring_pages[0], KM_USER0); + ring = kmap_atomic(info->ring_pages[0]); dprintk("in aio_read_evt h%lu t%lu m%lu\n", (unsigned long)ring->head, (unsigned long)ring->tail, (unsigned long)ring->nr); @@ -1101,18 +1080,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) head = ring->head % info->nr; if (head != ring->tail) { - struct io_event *evp = aio_ring_event(info, head, KM_USER1); + struct io_event *evp = aio_ring_event(info, head); *ent = *evp; head = (head + 1) % info->nr; smp_mb(); /* finish reading the event before updatng the head */ ring->head = head; ret = 1; - put_aio_ring_event(evp, KM_USER1); + put_aio_ring_event(evp); } spin_unlock(&info->ring_lock); out: - kunmap_atomic(ring, KM_USER0); + kunmap_atomic(ring); dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, (unsigned long)ring->head, (unsigned long)ring->tail); return ret; @@ -1336,10 +1315,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); - if (!ret) + if (!ret) { + put_ioctx(ioctx); return 0; - - get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + } io_destroy(ioctx); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index f11e43ed907d..28d39fb84ae3 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { .d_dname = anon_inodefs_dname, }; -static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_pseudo(fs_type, "anon_inode:", NULL, - &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); -} - -static struct file_system_type anon_inode_fs_type = { - .name = "anon_inodefs", - .mount = anon_inodefs_mount, - .kill_sb = kill_anon_super, -}; - /* * nop .set_page_dirty method so that people can use .page_mkwrite on * anon inodes. @@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = { .set_page_dirty = anon_set_page_dirty, }; +/* + * A single inode exists for all anon_inode files. Contrary to pipes, + * anon_inode inodes have no associated per-instance data, so we need + * only allocate one of them. + */ +static struct inode *anon_inode_mkinode(struct super_block *s) +{ + struct inode *inode = new_inode_pseudo(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_ino = get_next_ino(); + inode->i_fop = &anon_inode_fops; + + inode->i_mapping->a_ops = &anon_aops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_flags |= S_PRIVATE; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; +} + +static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + struct dentry *root; + root = mount_pseudo(fs_type, "anon_inode:", NULL, + &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); + if (!IS_ERR(root)) { + struct super_block *s = root->d_sb; + anon_inode_inode = anon_inode_mkinode(s); + if (IS_ERR(anon_inode_inode)) { + dput(root); + deactivate_locked_super(s); + root = ERR_CAST(anon_inode_inode); + } + } + return root; +} + +static struct file_system_type anon_inode_fs_type = { + .name = "anon_inodefs", + .mount = anon_inodefs_mount, + .kill_sb = kill_anon_super, +}; + /** * anon_inode_getfile - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" @@ -180,38 +223,6 @@ err_put_unused_fd: } EXPORT_SYMBOL_GPL(anon_inode_getfd); -/* - * A single inode exists for all anon_inode files. Contrary to pipes, - * anon_inode inodes have no associated per-instance data, so we need - * only allocate one of them. - */ -static struct inode *anon_inode_mkinode(void) -{ - struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb); - - if (!inode) - return ERR_PTR(-ENOMEM); - - inode->i_ino = get_next_ino(); - inode->i_fop = &anon_inode_fops; - - inode->i_mapping->a_ops = &anon_aops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; -} - static int __init anon_inode_init(void) { int error; @@ -224,16 +235,8 @@ static int __init anon_inode_init(void) error = PTR_ERR(anon_inode_mnt); goto err_unregister_filesystem; } - anon_inode_inode = anon_inode_mkinode(); - if (IS_ERR(anon_inode_inode)) { - error = PTR_ERR(anon_inode_inode); - goto err_mntput; - } - return 0; -err_mntput: - kern_unmount(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d8d8e7ba6a1e..eb1cc92cd67d 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -110,6 +110,7 @@ struct autofs_sb_info { int sub_version; int min_proto; int max_proto; + int compat_daemon; unsigned long exp_timeout; unsigned int type; int reghost_enabled; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 76741d8d7786..85f1fcdb30e7 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->catatonic = 0; + sbi->compat_daemon = is_compat_task(); } out: mutex_unlock(&sbi->wq_mutex); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 450f529a4eae..1feb68ecef95 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -124,6 +124,7 @@ start: /* Negative dentry - try next */ if (!simple_positive(q)) { spin_unlock(&p->d_lock); + lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_); p = q; goto again; } @@ -186,6 +187,7 @@ again: /* Negative dentry - try next */ if (!simple_positive(ret)) { spin_unlock(&p->d_lock); + lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_); p = ret; goto again; } diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index c038727b4050..cddc74b9cdb2 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void) { int err; + autofs_dev_ioctl_init(); + err = register_filesystem(&autofs_fs_type); if (err) - return err; - - autofs_dev_ioctl_init(); + autofs_dev_ioctl_exit(); return err; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index e16980b00b8d..d8dc002e9cc3 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -19,6 +19,7 @@ #include <linux/parser.h> #include <linux/bitops.h> #include <linux/magic.h> +#include <linux/compat.h> #include "autofs_i.h" #include <linux/module.h> @@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) set_autofs_type_indirect(&sbi->type); sbi->min_proto = 0; sbi->max_proto = 0; + sbi->compat_daemon = is_compat_task(); mutex_init(&sbi->wq_mutex); mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); @@ -245,12 +247,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) if (!ino) goto fail_free; root_inode = autofs4_get_inode(s, S_IFDIR | 0755); - if (!root_inode) - goto fail_ino; - - root = d_alloc_root(root_inode); + root = d_make_root(root_inode); if (!root) - goto fail_iput; + goto fail_ino; pipe = NULL; root->d_fsdata = ino; @@ -315,9 +314,6 @@ fail_fput: fail_dput: dput(root); goto fail_free; -fail_iput: - printk("autofs: get root dentry failed\n"); - iput(root_inode); fail_ino: kfree(ino); fail_free: diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index da8876d38a7b..9c098db43344 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi, return (bytes > 0); } - + +/* + * The autofs_v5 packet was misdesigned. + * + * The packets are identical on x86-32 and x86-64, but have different + * alignment. Which means that 'sizeof()' will give different results. + * Fix it up for the case of running 32-bit user mode on a 64-bit kernel. + */ +static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi) +{ + size_t pktsz = sizeof(struct autofs_v5_packet); +#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT) + if (sbi->compat_daemon > 0) + pktsz -= 4; +#endif + return pktsz; +} + static void autofs4_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq, int type) @@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, { struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; - pktsz = sizeof(*packet); - + pktsz = autofs_v5_packet_size(sbi); packet->wait_queue_token = wq->wait_queue_token; packet->len = wq->name.len; memcpy(packet->name, wq->name.name, wq->name.len); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 6e6d536767fe..e18da23d42b5 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -852,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent) ret = PTR_ERR(root); goto unacquire_priv_sbp; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { - iput(root); befs_error(sb, "get root inode failed"); goto unacquire_priv_sbp; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index b0391bc402b1..e23dc7c8b884 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -367,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) ret = PTR_ERR(inode); goto out2; } - s->s_root = d_alloc_root(inode); + s->s_root = d_make_root(inode); if (!s->s_root) { - iput(inode); ret = -ENOMEM; goto out2; } diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index a6395bdb26ae..4d5e6d26578c 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -259,8 +259,14 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->free_area_cache = current->mm->mmap_base; current->mm->cached_hole_size = 0; + retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); + return retval; + } + install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; @@ -352,13 +358,6 @@ beyond_if: return retval; } - retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); - return retval; - } - current->mm->start_stack = (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); #ifdef __alpha__ @@ -454,7 +453,8 @@ out: static int __init init_aout_binfmt(void) { - return register_binfmt(&aout_format); + register_binfmt(&aout_format); + return 0; } static void __exit exit_aout_binfmt(void) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bcb884e2d613..81878b78c9d4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -712,7 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto out_free_dentry; /* OK, This is the point of no return */ - current->flags &= ~PF_FORKNOEXEC; current->mm->def_flags = def_flags; /* Do this immediately, since STACK_TOP as used in setup_arg_pages @@ -934,7 +933,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) { @@ -1421,7 +1419,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, for (i = 1; i < view->n; ++i) { const struct user_regset *regset = &view->regsets[i]; do_thread_regset_writeback(t->task, regset); - if (regset->core_note_type && + if (regset->core_note_type && regset->get && (!regset->active || regset->active(t->task, regset))) { int ret; size_t size = regset->n * regset->size; @@ -2077,7 +2075,8 @@ out: static int __init init_elf_binfmt(void) { - return register_binfmt(&elf_format); + register_binfmt(&elf_format); + return 0; } static void __exit exit_elf_binfmt(void) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 30745f459faf..c64bf5ee2df4 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -91,7 +91,8 @@ static struct linux_binfmt elf_fdpic_format = { static int __init init_elf_fdpic_binfmt(void) { - return register_binfmt(&elf_fdpic_format); + register_binfmt(&elf_fdpic_format); + return 0; } static void __exit exit_elf_fdpic_binfmt(void) @@ -334,8 +335,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, current->mm->context.exec_fdpic_loadmap = 0; current->mm->context.interp_fdpic_loadmap = 0; - current->flags &= ~PF_FORKNOEXEC; - #ifdef CONFIG_MMU elf_fdpic_arch_lay_out_mm(&exec_params, &interp_params, @@ -413,7 +412,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, #endif install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) goto error_kill; diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index b8e8b0acf9bd..2790c7e1912e 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = { static int __init init_em86_binfmt(void) { - return register_binfmt(&em86_format); + register_binfmt(&em86_format); + return 0; } static void __exit exit_em86_binfmt(void) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 1bffbe0ed778..04f61f0bdfde 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -902,7 +902,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) libinfo.lib_list[j].start_data:UNLOADED_LIB; install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; set_binfmt(&flat_format); @@ -950,7 +949,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) static int __init init_flat_binfmt(void) { - return register_binfmt(&flat_format); + register_binfmt(&flat_format); + return 0; } /****************************************************************************/ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a9198dfd5f85..1ffb60355cae 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -726,11 +726,8 @@ static struct file_system_type bm_fs_type = { static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); - if (!err) { - err = insert_binfmt(&misc_format); - if (err) - unregister_filesystem(&bm_fs_type); - } + if (!err) + insert_binfmt(&misc_format); return err; } diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 396a9884591f..d3b8c1f63155 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -105,7 +105,8 @@ static struct linux_binfmt script_format = { static int __init init_script_binfmt(void) { - return register_binfmt(&script_format); + register_binfmt(&script_format); + return 0; } static void __exit exit_script_binfmt(void) diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index cc8560f6c9b0..e4fc746629a7 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -225,7 +225,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out_free; /* OK, This is the point of no return */ - current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; setup_new_exec(bprm); @@ -289,7 +288,8 @@ static int load_som_library(struct file *f) static int __init init_som_binfmt(void) { - return register_binfmt(&som_format); + register_binfmt(&som_format); + return 0; } static void __exit exit_som_binfmt(void) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c2183f3917cd..e85c04b9f61c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio) bix.sector_size = bi->sector_size; bio_for_each_segment(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); + void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; bix.prot_buf = prot_buf; @@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio) total += sectors * bi->tuple_size; BUG_ON(total > bio->bi_integrity->bip_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } } @@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio) bix.sector_size = bi->sector_size; bio_for_each_segment(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); + void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; bix.prot_buf = prot_buf; @@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio) ret = bi->verify_fn(&bix); if (ret) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return ret; } @@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio) total += sectors * bi->tuple_size; BUG_ON(total > bio->bi_integrity->bip_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } return ret; @@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone); int bio_get_nr_vecs(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - int nr_pages; - - nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages > queue_max_segments(q)) - nr_pages = queue_max_segments(q); - - return nr_pages; + return min_t(unsigned, + queue_max_segments(q), + queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); } EXPORT_SYMBOL(bio_get_nr_vecs); diff --git a/fs/block_dev.c b/fs/block_dev.c index 0e575d1304b4..a9ff3000b83d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -109,7 +109,7 @@ void invalidate_bdev(struct block_device *bdev) /* 99% of the time, we don't need to flush the cleancache on the bdev. * But, for the strange corners, lets be cautious */ - cleancache_flush_inode(mapping); + cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); @@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) - rescan_partitions(disk, bdev); + if (bdev->bd_invalidated) { + if (!ret) + rescan_partitions(disk, bdev); + else if (ret == -ENOMEDIUM) + invalidate_partitions(disk, bdev); + } if (ret) goto out_clear; } else { @@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) - rescan_partitions(bdev->bd_disk, bdev); + if (bdev->bd_invalidated) { + if (!ret) + rescan_partitions(bdev->bd_disk, bdev); + else if (ret == -ENOMEDIUM) + invalidate_partitions(bdev->bd_disk, bdev); + } if (ret) goto out_unlock_bdev; } diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 633c701a287d..0436c12da8c2 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key info_key = { 0 }; struct btrfs_delayed_ref_root *delayed_refs = NULL; - struct btrfs_delayed_ref_head *head = NULL; + struct btrfs_delayed_ref_head *head; int info_level = 0; int ret; struct list_head prefs_delayed; @@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, * at a specified point in time */ again: + head = NULL; + ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; @@ -635,8 +637,10 @@ again: goto again; } ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); - if (ret) + if (ret) { + spin_unlock(&delayed_refs->lock); goto out; + } } spin_unlock(&delayed_refs->lock); @@ -892,6 +896,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, if (eb != eb_in) free_extent_buffer(eb); ret = inode_ref_info(parent, 0, fs_root, path, &found_key); + if (ret > 0) + ret = -ENOENT; if (ret) break; next_inum = found_key.offset; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b669a7d8e499..c053e90f2006 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -89,7 +89,6 @@ #include "disk-io.h" #include "transaction.h" #include "extent_io.h" -#include "disk-io.h" #include "volumes.h" #include "print-tree.h" #include "locking.h" @@ -644,7 +643,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - int ret; + int ret = 0; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 14f1c5a0b2d2..b805afb37fa8 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode, page = cb->compressed_pages[i]; csum = ~(u32)0; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); btrfs_csum_final(csum, (char *)&csum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (csum != *cb_sum) { printk(KERN_INFO "btrfs csum failed ino %llu " @@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, if (zero_offset) { int zeros; zeros = PAGE_CACHE_SIZE - zero_offset; - userpage = kmap_atomic(page, KM_USER0); + userpage = kmap_atomic(page); memset(userpage + zero_offset, 0, zeros); flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); + kunmap_atomic(userpage); } } @@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page_offset(bio->bi_io_vec->bv_page), PAGE_CACHE_SIZE); read_unlock(&em_tree->lock); + if (!em) + return -EIO; compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); @@ -991,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(PAGE_CACHE_SIZE - *pg_offset, PAGE_CACHE_SIZE - buf_offset); bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(page_out, KM_USER0); + kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); flush_dcache_page(page_out); *pg_offset += bytes; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27ebe61d3ccc..80b6486fd5e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -886,7 +886,7 @@ struct btrfs_block_rsv { u64 reserved; struct btrfs_space_info *space_info; spinlock_t lock; - unsigned int full:1; + unsigned int full; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 811d9f918b1c..534266fe505f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2260,6 +2260,12 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } + if (sectorsize < PAGE_SIZE) { + printk(KERN_WARNING "btrfs: Incompatible sector size " + "found on %s\n", sb->s_id); + goto fail_sb_buffer; + } + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); @@ -2301,6 +2307,12 @@ int open_ctree(struct super_block *sb, btrfs_close_extra_devices(fs_devices); + if (!fs_devices->latest_bdev) { + printk(KERN_CRIT "btrfs: failed to read devices on %s\n", + sb->s_id); + goto fail_tree_roots; + } + retry_root_backup: blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 283af7a676a3..37e0a800d34e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3312,7 +3312,8 @@ commit_trans: } data_sinfo->bytes_may_use += bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)data_sinfo, bytes, 1); + (u64)(unsigned long)data_sinfo, + bytes, 1); spin_unlock(&data_sinfo->lock); return 0; @@ -3333,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)data_sinfo, bytes, 0); + (u64)(unsigned long)data_sinfo, + bytes, 0); spin_unlock(&data_sinfo->lock); } @@ -3611,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root, if (space_info != delayed_rsv->space_info) return -ENOSPC; + spin_lock(&space_info->lock); spin_lock(&delayed_rsv->lock); - if (delayed_rsv->size < bytes) { + if (space_info->bytes_pinned + delayed_rsv->size < bytes) { spin_unlock(&delayed_rsv->lock); + spin_unlock(&space_info->lock); return -ENOSPC; } spin_unlock(&delayed_rsv->lock); + spin_unlock(&space_info->lock); commit: trans = btrfs_join_transaction(root); @@ -3695,9 +3700,9 @@ again: if (used + orig_bytes <= space_info->total_bytes) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)space_info, - orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { /* @@ -3766,9 +3771,9 @@ again: if (used + num_bytes < space_info->total_bytes + avail) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)space_info, - orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { wait_ordered = true; @@ -3913,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)space_info, - num_bytes, 0); + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -4105,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) num_bytes += div64_u64(data_used + meta_used, 50); if (num_bytes * 3 > meta_used) - num_bytes = div64_u64(meta_used, 3); + num_bytes = div64_u64(meta_used, 3) * 2; return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); } @@ -4132,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->reserved += num_bytes; sinfo->bytes_may_use += num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)sinfo, num_bytes, 1); + (u64)(unsigned long)sinfo, num_bytes, 1); } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)sinfo, num_bytes, 0); + (u64)(unsigned long)sinfo, num_bytes, 0); sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -4192,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, if (!trans->bytes_reserved) return; - trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "transaction", + (u64)(unsigned long)trans, trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; @@ -4710,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_reserved += num_bytes; if (reserve == RESERVE_ALLOC) { trace_btrfs_space_reservation(cache->fs_info, - "space_info", - (u64)space_info, - num_bytes, 0); + "space_info", + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->bytes_may_use -= num_bytes; } } @@ -7886,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; + u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret = 0; - cache = btrfs_lookup_block_group(fs_info, range->start); + /* + * try to trim all FS space, our block group may start from non-zero. + */ + if (range->len == total_bytes) + cache = btrfs_lookup_first_block_group(fs_info, range->start); + else + cache = btrfs_lookup_block_group(fs_info, range->start); while (cache) { if (cache->key.objectid >= (range->start + range->len)) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fcf77e1ded40..2862454bcdb3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -513,6 +513,15 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; + if (state->end < end && !need_resched()) + next_node = rb_next(&state->rb_node); + else + next_node = NULL; + + /* the state doesn't have the wanted bits, go ahead */ + if (!(state->state & bits)) + goto next; + /* * | ---- desired range ---- | * | state | or @@ -565,20 +574,15 @@ hit_next: goto out; } - if (state->end < end && prealloc && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - set |= clear_state_bit(tree, state, &bits, wake); +next: if (last_end == (u64)-1) goto out; start = last_end + 1; if (start <= end && next_node) { state = rb_entry(next_node, struct extent_state, rb_node); - if (state->start == start) - goto hit_next; + goto hit_next; } goto search_again; @@ -961,8 +965,6 @@ hit_next: set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - - merge_state(tree, state); if (last_end == (u64)-1) goto out; @@ -1007,7 +1009,6 @@ hit_next: if (state->end <= end) { set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -1068,8 +1069,6 @@ hit_next: set_state_bits(tree, prealloc, &bits); clear_state_bit(tree, prealloc, &clear_bits, 0); - - merge_state(tree, prealloc); prealloc = NULL; goto out; } @@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, failrec->this_mirror, num_copies, failrec->in_validation); - tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, - failrec->bio_flags, 0); - return 0; + ret = tree->ops->submit_bio_hook(inode, read_mode, bio, + failrec->this_mirror, + failrec->bio_flags, 0); + return ret; } /* lots and lots of room for performance fixes in the end_bio funcs */ +int end_extent_writepage(struct page *page, int err, u64 start, u64 end) +{ + int uptodate = (err == 0); + struct extent_io_tree *tree; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, NULL, uptodate); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(NULL, page, + start, end, NULL); + /* Writeback already completed */ + if (ret == 0) + return 1; + } + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); + ClearPageUptodate(page); + SetPageError(page); + } + return 0; +} + /* * after a writepage IO is done, we need to: * clear the uptodate bits on error @@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, */ static void end_bio_extent_writepage(struct bio *bio, int err) { - int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_io_tree *tree; u64 start; u64 end; int whole_page; - int ret; do { struct page *page = bvec->bv_page; @@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { - ret = tree->ops->writepage_end_io_hook(page, start, - end, NULL, uptodate); - if (ret) - uptodate = 0; - } - - if (!uptodate && tree->ops && - tree->ops->writepage_io_failed_hook) { - ret = tree->ops->writepage_io_failed_hook(bio, page, - start, end, NULL); - if (ret == 0) { - uptodate = (err == 0); - continue; - } - } - if (!uptodate) { - clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); - ClearPageUptodate(page); - SetPageError(page); - } + if (end_extent_writepage(page, err, start, end)) + continue; if (whole_page) end_page_writeback(page); @@ -2535,10 +2546,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree, if (zero_offset) { iosize = PAGE_CACHE_SIZE - zero_offset; - userpage = kmap_atomic(page, KM_USER0); + userpage = kmap_atomic(page); memset(userpage + zero_offset, 0, iosize); flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); + kunmap_atomic(userpage); } } while (cur <= end) { @@ -2547,10 +2558,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct extent_state *cached = NULL; iosize = PAGE_CACHE_SIZE - pg_offset; - userpage = kmap_atomic(page, KM_USER0); + userpage = kmap_atomic(page); memset(userpage + pg_offset, 0, iosize); flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); + kunmap_atomic(userpage); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1, @@ -2596,10 +2607,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree, char *userpage; struct extent_state *cached = NULL; - userpage = kmap_atomic(page, KM_USER0); + userpage = kmap_atomic(page); memset(userpage + pg_offset, 0, iosize); flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); + kunmap_atomic(userpage); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); @@ -2745,10 +2756,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (page->index == end_index) { char *userpage; - userpage = kmap_atomic(page, KM_USER0); + userpage = kmap_atomic(page); memset(userpage + pg_offset, 0, PAGE_CACHE_SIZE - pg_offset); - kunmap_atomic(userpage, KM_USER0); + kunmap_atomic(userpage); flush_dcache_page(page); } pg_offset = 0; @@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; continue; } - tree->ops->fill_delalloc(inode, page, delalloc_start, - delalloc_end, &page_started, - &nr_written); + ret = tree->ops->fill_delalloc(inode, page, + delalloc_start, + delalloc_end, + &page_started, + &nr_written); + BUG_ON(ret); /* * delalloc_end is already one less than the total * length, so we don't subtract one from @@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (tree->ops && tree->ops->writepage_start_hook) { ret = tree->ops->writepage_start_hook(page, start, page_end); - if (ret == -EAGAIN) { - redirty_page_for_writepage(wbc, page); + if (ret) { + /* Fixup worker will requeue */ + if (ret == -EBUSY) + wbc->pages_skipped++; + else + redirty_page_for_writepage(wbc, page); update_nr_written(page, wbc, nr_written); unlock_page(page); ret = 0; @@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, len = end - start + 1; write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); - if (IS_ERR_OR_NULL(em)) { + if (!em) { write_unlock(&map->lock); break; } @@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - if (eb_straddles_pages(eb)) { - clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - cached_state, GFP_NOFS); - } + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bc6a042cb6fc..cecc3518c121 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -319,4 +319,5 @@ struct btrfs_mapping_tree; int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, u64 length, u64 logical, struct page *page, int mirror_num); +int end_extent_writepage(struct page *page, int err, u64 start, u64 end); #endif diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 33a7890b1f40..1195f09761fe 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -26,8 +26,8 @@ struct extent_map { unsigned long flags; struct block_device *bdev; atomic_t refs; - unsigned int in_tree:1; - unsigned int compress_type:4; + unsigned int in_tree; + unsigned int compress_type; }; struct extent_map_tree { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index c7fb3a4247d3..078b4fd54500 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -447,13 +447,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, sums->bytenr = ordered->start; } - data = kmap_atomic(bvec->bv_page, KM_USER0); + data = kmap_atomic(bvec->bv_page); sector_sum->sum = ~(u32)0; sector_sum->sum = btrfs_csum_data(root, data + bvec->bv_offset, sector_sum->sum, bvec->bv_len); - kunmap_atomic(data, KM_USER0); + kunmap_atomic(data); btrfs_csum_final(sector_sum->sum, (char *)§or_sum->sum); sector_sum->bytenr = disk_bytenr; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 859ba2dd8890..e8d06b6b9194 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; /* + * Make sure we have enough space before we do the + * allocation. + */ + ret = btrfs_check_data_free_space(inode, len); + if (ret) + return ret; + + /* * wait for ordered IO before we have any locks. We'll loop again * below with the locks held. */ @@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode, if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { - - /* - * Make sure we have enough space before we do the - * allocation. - */ - ret = btrfs_check_data_free_space(inode, last_byte - - cur_offset); - if (ret) { - free_extent_map(em); - break; - } - ret = btrfs_prealloc_file_range(inode, mode, cur_offset, last_byte - cur_offset, 1 << inode->i_blkbits, offset + len, &alloc_hint); - /* Let go of our reservation. */ - btrfs_free_reserved_data_space(inode, last_byte - - cur_offset); if (ret < 0) { free_extent_map(em); break; @@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode, &cached_state, GFP_NOFS); out: mutex_unlock(&inode->i_mutex); + /* Let go of our reservation. */ + btrfs_free_reserved_data_space(inode, len); return ret; } @@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) start - root->sectorsize, root->sectorsize, 0); if (IS_ERR(em)) { - ret = -ENXIO; + ret = PTR_ERR(em); goto out; } last_end = em->start + em->len; @@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) while (1) { em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { - ret = -ENXIO; + ret = PTR_ERR(em); break; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c2f20594c9f7..b02e379b14c7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, spin_lock(&block_group->lock); if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { spin_unlock(&block_group->lock); + btrfs_free_path(path); goto out; } spin_unlock(&block_group->lock); @@ -1067,7 +1068,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, spin_unlock(&block_group->lock); ret = 0; #ifdef DEBUG - printk(KERN_ERR "btrfs: failed to write free space cace " + printk(KERN_ERR "btrfs: failed to write free space cache " "for block group %llu\n", block_group->key.objectid); #endif } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 213ffa86ce1b..ee15d88b33d2 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, trans->bytes_reserved); if (ret) goto out; - trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "ino_cache", + (u64)(unsigned long)trans, trans->bytes_reserved, 1); again: inode = lookup_free_ino_inode(root, path); @@ -500,7 +501,8 @@ again: out_put: iput(inode); out_release: - trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "ino_cache", + (u64)(unsigned long)trans, trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 32214fe0f7e3..3a0b5c1f9d31 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -173,9 +173,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, cur_size = min_t(unsigned long, compressed_size, PAGE_CACHE_SIZE); - kaddr = kmap_atomic(cpage, KM_USER0); + kaddr = kmap_atomic(cpage); write_extent_buffer(leaf, kaddr, ptr, cur_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); i++; ptr += cur_size; @@ -187,10 +187,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, page = find_get_page(inode->i_mapping, start >> PAGE_CACHE_SHIFT); btrfs_set_file_extent_compression(leaf, ei, 0); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); offset = start & (PAGE_CACHE_SIZE - 1); write_extent_buffer(leaf, kaddr + offset, ptr, size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); page_cache_release(page); } btrfs_mark_buffer_dirty(leaf); @@ -422,10 +422,10 @@ again: * sending it down to disk */ if (offset) { - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } will_compress = 1; } @@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) struct inode *inode; u64 page_start; u64 page_end; + int ret; fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; @@ -1582,12 +1583,21 @@ again: page_end, &cached_state, GFP_NOFS); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); goto again; } - BUG(); + ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); ClearPageChecked(page); + set_page_dirty(page); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state, GFP_NOFS); @@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) fixup->work.func = btrfs_writepage_fixup_worker; fixup->page = page; btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); - return -EAGAIN; + return -EBUSY; } static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, @@ -1863,7 +1873,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, } else { ret = get_state_private(io_tree, start, &private); } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (ret) goto zeroit; @@ -1872,7 +1882,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, if (csum != private) goto zeroit; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); good: return 0; @@ -1884,7 +1894,7 @@ zeroit: (unsigned long long)private); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (private == 0) return 0; return -EIO; @@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, name, name_len, parent_inode, &key, btrfs_inode_type(inode), index); - BUG_ON(ret); + if (ret) + goto fail_dir_item; btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); @@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, parent_inode); } return ret; + +fail_dir_item: + if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { + u64 local_index; + int err; + err = btrfs_del_root_ref(trans, root->fs_info->tree_root, + key.objectid, root->root_key.objectid, + parent_ino, &local_index, name, name_len); + + } else if (add_backref) { + u64 local_index; + int err; + + err = btrfs_del_inode_ref(trans, root, name, name_len, + ino, parent_ino, &local_index); + } + return ret; } static int btrfs_add_nondir(struct btrfs_trans_handle *trans, @@ -4909,12 +4937,12 @@ static noinline int uncompress_inline(struct btrfs_path *path, ret = btrfs_decompress(compress_type, tmp, page, extent_offset, inline_size, max_size); if (ret) { - char *kaddr = kmap_atomic(page, KM_USER0); + char *kaddr = kmap_atomic(page); unsigned long copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, max_size - extent_offset); memset(kaddr + pg_offset, 0, copy_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } kfree(tmp); return 0; @@ -5691,11 +5719,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) unsigned long flags; local_irq_save(flags); - kaddr = kmap_atomic(page, KM_IRQ0); + kaddr = kmap_atomic(page); csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, csum, bvec->bv_len); btrfs_csum_final(csum, (char *)&csum); - kunmap_atomic(kaddr, KM_IRQ0); + kunmap_atomic(kaddr); local_irq_restore(flags); flush_dcache_page(bvec->bv_page); @@ -6696,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int err; u64 index = 0; - inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, S_IFDIR | 0700, &index); + inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, + new_dirid, new_dirid, + S_IFDIR | (~current_umask() & S_IRWXUGO), + &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 03bb62a9ee24..d8b54715c2de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode, int i_done; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; + struct extent_io_tree *tree; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); if (isize == 0) @@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode, num_pages << PAGE_CACHE_SHIFT); if (ret) return ret; -again: - ret = 0; i_done = 0; + tree = &BTRFS_I(inode)->io_tree; /* step one, lock all the pages */ for (i = 0; i < num_pages; i++) { struct page *page; +again: page = find_or_create_page(inode->i_mapping, - start_index + i, mask); + start_index + i, mask); if (!page) break; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + while (1) { + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, + page_start); + unlock_extent(tree, page_start, page_end, GFP_NOFS); + if (!ordered) + break; + + unlock_page(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + lock_page(page); + } + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -893,15 +910,22 @@ again: break; } } + isize = i_size_read(inode); file_end = (isize - 1) >> PAGE_CACHE_SHIFT; - if (!isize || page->index > file_end || - page->mapping != inode->i_mapping) { + if (!isize || page->index > file_end) { /* whoops, we blew past eof, skip this page */ unlock_page(page); page_cache_release(page); break; } + + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + goto again; + } + pages[i] = page; i_done++; } @@ -924,25 +948,6 @@ again: lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 0, &cached_state, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1); - if (ordered && - ordered->file_offset + ordered->len > page_start && - ordered->file_offset < page_end) { - btrfs_put_ordered_extent(ordered); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, - &cached_state, GFP_NOFS); - for (i = 0; i < i_done; i++) { - unlock_page(pages[i]); - page_cache_release(pages[i]); - } - btrfs_wait_ordered_range(inode, page_start, - page_end - page_start); - goto again; - } - if (ordered) - btrfs_put_ordered_extent(ordered); - clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, @@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, goto out; } + if (name[0] == '.' && + (namelen == 1 || (name[1] == '.' && namelen == 2))) { + ret = -EEXIST; + goto out; + } + if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, NULL, transid, readonly); diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index a178f5ebea78..743b86fa4fcb 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, bytes = min_t(unsigned long, destlen, out_len - start_byte); - kaddr = kmap_atomic(dest_page, KM_USER0); + kaddr = kmap_atomic(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); out: return ret; } diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 2373b39a132b..22db04550f6a 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -305,7 +305,7 @@ again: spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, - (unsigned long)zone->end >> PAGE_CACHE_SHIFT, + (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), zone); spin_unlock(&fs_info->reada_lock); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9770cc5bfb76..390e7102b0ff 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -591,7 +591,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix) u64 flags = sbio->spag[ix].flags; page = sbio->bio->bi_io_vec[ix].bv_page; - buffer = kmap_atomic(page, KM_USER0); + buffer = kmap_atomic(page); if (flags & BTRFS_EXTENT_FLAG_DATA) { ret = scrub_checksum_data(sbio->sdev, sbio->spag + ix, buffer); @@ -603,7 +603,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix) } else { WARN_ON(1); } - kunmap_atomic(buffer, KM_USER0); + kunmap_atomic(buffer); return ret; } @@ -792,7 +792,7 @@ static void scrub_checksum(struct btrfs_work *work) } for (i = 0; i < sbio->count; ++i) { page = sbio->bio->bi_io_vec[i].bv_page; - buffer = kmap_atomic(page, KM_USER0); + buffer = kmap_atomic(page); flags = sbio->spag[i].flags; logical = sbio->logical + i * PAGE_SIZE; ret = 0; @@ -807,7 +807,7 @@ static void scrub_checksum(struct btrfs_work *work) } else { WARN_ON(1); } - kunmap_atomic(buffer, KM_USER0); + kunmap_atomic(buffer); if (ret) { ret = scrub_recheck_error(sbio, i); if (!ret) { @@ -1367,7 +1367,8 @@ out: } static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, - u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) + u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length, + u64 dev_offset) { struct btrfs_mapping_tree *map_tree = &sdev->dev->dev_root->fs_info->mapping_tree; @@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, goto out; for (i = 0; i < map->num_stripes; ++i) { - if (map->stripes[i].dev == sdev->dev) { + if (map->stripes[i].dev == sdev->dev && + map->stripes[i].physical == dev_offset) { ret = scrub_stripe(sdev, map, i, chunk_offset, length); if (ret) goto out; @@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) break; } ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, - chunk_offset, length); + chunk_offset, length, found_key.offset); btrfs_put_block_group(cache); if (ret) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3ce97b217cbe..81df3fec6a6d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -629,7 +629,6 @@ static int btrfs_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - struct dentry *root_dentry; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_key key; int err; @@ -660,15 +659,12 @@ static int btrfs_fill_super(struct super_block *sb, goto fail_close; } - root_dentry = d_alloc_root(inode); - if (!root_dentry) { - iput(inode); + sb->s_root = d_make_root(inode); + if (!sb->s_root) { err = -ENOMEM; goto fail_close; } - sb->s_root = root_dentry; - save_mount_options(sb, data); cleancache_init_fs(sb); sb->s_flags |= MS_ACTIVE; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 287a6728b1ad..04b77e3ceb7a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -327,7 +327,8 @@ again: if (num_bytes) { trace_btrfs_space_reservation(root->fs_info, "transaction", - (u64)h, num_bytes, 1); + (u64)(unsigned long)h, + num_bytes, 1); h->block_rsv = &root->fs_info->trans_block_rsv; h->bytes_reserved = num_bytes; } @@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, parent_inode, &key, BTRFS_FT_DIR, index); - BUG_ON(ret); + if (ret) { + pending->error = -EEXIST; + dput(parent); + goto fail; + } btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); @@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; - int ret; - list_for_each_entry(pending, head, list) { - ret = create_pending_snapshot(trans, fs_info, pending); - BUG_ON(ret); - } + list_for_each_entry(pending, head, list) + create_pending_snapshot(trans, fs_info, pending); return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0b4e2af7954d..ef41f285a475 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device, *next; + struct block_device *latest_bdev = NULL; + u64 latest_devid = 0; + u64 latest_transid = 0; + mutex_lock(&uuid_mutex); again: /* This is the initialized path, it is safe to release the devices. */ list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { - if (device->in_fs_metadata) + if (device->in_fs_metadata) { + if (!latest_transid || + device->generation > latest_transid) { + latest_devid = device->devid; + latest_transid = device->generation; + latest_bdev = device->bdev; + } continue; + } if (device->bdev) { blkdev_put(device->bdev, device->mode); @@ -487,6 +498,10 @@ again: goto again; } + fs_devices->latest_bdev = latest_bdev; + fs_devices->latest_devid = latest_devid; + fs_devices->latest_trans = latest_transid; + mutex_unlock(&uuid_mutex); return 0; } @@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, em = lookup_extent_mapping(em_tree, chunk_offset, 1); read_unlock(&em_tree->lock); - BUG_ON(em->start > chunk_offset || + BUG_ON(!em || em->start > chunk_offset || em->start + em->len < chunk_offset); map = (struct map_lookup *)em->bdev; @@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root) return -ENOMEM; btrfs_set_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); + /* + * The sb extent buffer is artifical and just used to read the system array. + * btrfs_set_buffer_uptodate() call does not properly mark all it's + * pages up-to-date when the page is larger: extent does not cover the + * whole page and consequently check_page_uptodate does not find all + * the page's extents up-to-date (the hole beyond sb), + * write_extent_buffer then triggers a WARN_ON. + * + * Regular short extents go through mark_extent_buffer_dirty/writeback cycle, + * but sb spans only this function. Add an explicit SetPageUptodate call + * to silence the warning eg. on PowerPC 64. + */ + if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) + SetPageUptodate(sb->first_page); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index faccd47c6c46..92c20654cc55 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -370,9 +370,9 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, PAGE_CACHE_SIZE - buf_offset); bytes = min(bytes, bytes_left); - kaddr = kmap_atomic(dest_page, KM_USER0); + kaddr = kmap_atomic(dest_page); memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); pg_offset += bytes; bytes_left -= bytes; diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index a0358c2189cb..7f0771d3894e 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -646,7 +646,8 @@ lookup_again: * (this is used to keep track of culling, and atimes are only * updated by read, write and readdir but not lookup or * open) */ - touch_atime(cache->mnt, next); + path.dentry = next; + touch_atime(&path); } /* open a file interface onto a data file */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 00de2c9568cd..256f85221926 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -655,9 +655,8 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, dout("open_root_inode success\n"); if (ceph_ino(inode) == CEPH_INO_ROOT && fsc->sb->s_root == NULL) { - root = d_alloc_root(inode); + root = d_make_root(inode); if (!root) { - iput(inode); root = ERR_PTR(-ENOMEM); goto out; } diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0554b00a7b33..2b243af70aa3 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -139,7 +139,7 @@ config CIFS_DFS_UPCALL points. If unsure, say N. config CIFS_FSCACHE - bool "Provide CIFS client caching support (EXPERIMENTAL)" + bool "Provide CIFS client caching support" depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y help Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data @@ -147,7 +147,7 @@ config CIFS_FSCACHE manager. If unsure, say N. config CIFS_ACL - bool "Provide CIFS ACL support (EXPERIMENTAL)" + bool "Provide CIFS ACL support" depends on CIFS_XATTR && KEYS help Allows to fetch CIFS/NTFS ACL from the server. The DACL blob diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index c1b254487388..3cc1b251ca08 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -556,6 +556,7 @@ init_cifs_idmap(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; root_cred = cred; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b1fd382d1952..418fc42fb8b2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -119,12 +119,10 @@ cifs_read_super(struct super_block *sb) if (IS_ERR(inode)) { rc = PTR_ERR(inode); - inode = NULL; goto out_no_root; } - sb->s_root = d_alloc_root(inode); - + sb->s_root = d_make_root(inode); if (!sb->s_root) { rc = -ENOMEM; goto out_no_root; @@ -147,9 +145,6 @@ cifs_read_super(struct super_block *sb) out_no_root: cERROR(1, "cifs_read_super: get root inode failed"); - if (inode) - iput(inode); - return rc; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 986709a8d903..602f77c304c9 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); - if (mid) - handle_mid(mid, server, smb_buffer, length); + if (!mid) + return length; - return length; + handle_mid(mid, server, smb_buffer, length); + return 0; } static int @@ -2125,7 +2126,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) down_read(&key->sem); upayload = key->payload.data; if (IS_ERR_OR_NULL(upayload)) { - rc = PTR_ERR(key); + rc = upayload ? PTR_ERR(upayload) : -EINVAL; goto out_key_put; } @@ -2142,14 +2143,14 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) len = delim - payload; if (len > MAX_USERNAME_SIZE || len <= 0) { - cFYI(1, "Bad value from username search (len=%ld)", len); + cFYI(1, "Bad value from username search (len=%zd)", len); rc = -EINVAL; goto out_key_put; } vol->username = kstrndup(payload, len, GFP_KERNEL); if (!vol->username) { - cFYI(1, "Unable to allocate %ld bytes for username", len); + cFYI(1, "Unable to allocate %zd bytes for username", len); rc = -ENOMEM; goto out_key_put; } @@ -2157,7 +2158,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) len = key->datalen - (len + 1); if (len > MAX_PASSWORD_SIZE || len <= 0) { - cFYI(1, "Bad len for password search (len=%ld)", len); + cFYI(1, "Bad len for password search (len=%zd)", len); rc = -EINVAL; kfree(vol->username); vol->username = NULL; @@ -2167,7 +2168,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) ++delim; vol->password = kstrndup(delim, len, GFP_KERNEL); if (!vol->password) { - cFYI(1, "Unable to allocate %ld bytes for password", len); + cFYI(1, "Unable to allocate %zd bytes for password", len); rc = -ENOMEM; kfree(vol->username); vol->username = NULL; @@ -3857,10 +3858,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) struct smb_vol *vol_info; vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); - if (vol_info == NULL) { - tcon = ERR_PTR(-ENOMEM); - goto out; - } + if (vol_info == NULL) + return ERR_PTR(-ENOMEM); vol_info->local_nls = cifs_sb->local_nls; vol_info->linux_uid = fsuid; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index df8fecb5b993..bc7e24420ac0 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock = 0; + __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -584,10 +584,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, * If either that or op not supported returned, follow * the normal lookup. */ - if ((rc == 0) || (rc == -ENOENT)) + switch (rc) { + case 0: + /* + * The server may allow us to open things like + * FIFOs, but the client isn't set up to deal + * with that. If it's not a regular file, just + * close it and proceed as if it were a normal + * lookup. + */ + if (newInode && !S_ISREG(newInode->i_mode)) { + CIFSSMBClose(xid, pTcon, fileHandle); + break; + } + case -ENOENT: posix_open = true; - else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) + case -EOPNOTSUPP: + break; + default: pTcon->broken_posix_open = true; + } } if (!posix_open) rc = cifs_get_inode_info_unix(&newInode, full_path, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4dd9283885e7..5e64748a2917 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -920,16 +920,26 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) for (lockp = &inode->i_flock; *lockp != NULL; \ lockp = &(*lockp)->fl_next) +struct lock_to_push { + struct list_head llist; + __u64 offset; + __u64 length; + __u32 pid; + __u16 netfid; + __u8 type; +}; + static int cifs_push_posix_locks(struct cifsFileInfo *cfile) { struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct file_lock *flock, **before; - struct cifsLockInfo *lck, *tmp; + unsigned int count = 0, i = 0; int rc = 0, xid, type; + struct list_head locks_to_send, *el; + struct lock_to_push *lck, *tmp; __u64 length; - struct list_head locks_to_send; xid = GetXid(); @@ -940,29 +950,55 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) return rc; } + lock_flocks(); + cifs_for_each_lock(cfile->dentry->d_inode, before) { + if ((*before)->fl_flags & FL_POSIX) + count++; + } + unlock_flocks(); + INIT_LIST_HEAD(&locks_to_send); + /* + * Allocating count locks is enough because no locks can be added to + * the list while we are holding cinode->lock_mutex that protects + * locking operations of this inode. + */ + for (; i < count; i++) { + lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); + if (!lck) { + rc = -ENOMEM; + goto err_out; + } + list_add_tail(&lck->llist, &locks_to_send); + } + + i = 0; + el = locks_to_send.next; lock_flocks(); cifs_for_each_lock(cfile->dentry->d_inode, before) { + if (el == &locks_to_send) { + /* something is really wrong */ + cERROR(1, "Can't push all brlocks!"); + break; + } flock = *before; + if ((flock->fl_flags & FL_POSIX) == 0) + continue; length = 1 + flock->fl_end - flock->fl_start; if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) type = CIFS_RDLCK; else type = CIFS_WRLCK; - - lck = cifs_lock_init(flock->fl_start, length, type, - cfile->netfid); - if (!lck) { - rc = -ENOMEM; - goto send_locks; - } + lck = list_entry(el, struct lock_to_push, llist); lck->pid = flock->fl_pid; - - list_add_tail(&lck->llist, &locks_to_send); + lck->netfid = cfile->netfid; + lck->length = length; + lck->type = type; + lck->offset = flock->fl_start; + i++; + el = el->next; } - -send_locks: unlock_flocks(); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { @@ -979,11 +1015,18 @@ send_locks: kfree(lck); } +out: cinode->can_cache_brlcks = false; mutex_unlock(&cinode->lock_mutex); FreeXid(xid); return rc; +err_out: + list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { + list_del(&lck->llist); + kfree(lck); + } + goto out; } static int diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a5f54b7d9822..745da3d0653e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; + /* + * Server can return wrong NumberOfLinks value for directories + * when Unix extensions are disabled - fake it. + */ + fattr->cf_nlink = 2; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; @@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, /* clear write bits if ATTR_READONLY is set */ if (fattr->cf_cifsattrs & ATTR_READONLY) fattr->cf_mode &= ~(S_IWUGO); - } - fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + } fattr->cf_uid = cifs_sb->mnt_uid; fattr->cf_gid = cifs_sb->mnt_gid; @@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) } /*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need to set uid/gid */ - inc_nlink(inode); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1355,7 +1359,6 @@ mkdir_retry_old: d_drop(direntry); } else { mkdir_get_info: - inc_nlink(inode); if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); @@ -1436,6 +1439,11 @@ mkdir_get_info: } } mkdir_out: + /* + * Force revalidate to get parent dir info when needed since cached + * attributes are invalid now. + */ + CIFS_I(inode)->time = 0; kfree(full_path); FreeXid(xid); cifs_put_tlink(tlink); @@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifs_put_tlink(tlink); if (!rc) { - drop_nlink(inode); spin_lock(&direntry->d_inode->i_lock); i_size_write(direntry->d_inode, 0); clear_nlink(direntry->d_inode); @@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) } cifsInode = CIFS_I(direntry->d_inode); - cifsInode->time = 0; /* force revalidate to go get info when - needed */ + /* force revalidate to go get info when needed */ + cifsInode->time = 0; cifsInode = CIFS_I(inode); - cifsInode->time = 0; /* force revalidate to get parent dir info - since cached search results now invalid */ + /* + * Force revalidate to get parent dir info when needed since cached + * attributes are invalid now. + */ + cifsInode->time = 0; direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d85efad5765f..551d0c2b9736 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -246,16 +246,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy user */ /* BB what about null user mounts - check that we do this BB */ /* copy user */ - if (ses->user_name != NULL) + if (ses->user_name != NULL) { strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); + bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); + } /* else null user mount */ - - bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ /* copy domain */ - if (ses->domainName != NULL) { strncpy(bcc_ptr, ses->domainName, 256); bcc_ptr += strnlen(ses->domainName, 256); @@ -395,6 +394,10 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); + if (tioffset > blob_len || tioffset + tilen > blob_len) { + cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen); + return -EINVAL; + } if (tilen) { ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 45f07c46f3ed..10d92cf57ab6 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -105,7 +105,6 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, struct cifs_tcon *pTcon; struct super_block *sb; char *full_path; - struct cifs_ntsd *pacl; if (direntry == NULL) return -EIO; @@ -164,23 +163,24 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, strlen(CIFS_XATTR_CIFS_ACL)) == 0) { +#ifdef CONFIG_CIFS_ACL + struct cifs_ntsd *pacl; pacl = kmalloc(value_size, GFP_KERNEL); if (!pacl) { cFYI(1, "%s: Can't allocate memory for ACL", __func__); rc = -ENOMEM; } else { -#ifdef CONFIG_CIFS_ACL memcpy(pacl, ea_value, value_size); rc = set_cifs_acl(pacl, value_size, direntry->d_inode, full_path, CIFS_ACL_DACL); if (rc == 0) /* force revalidate of the inode */ CIFS_I(direntry->d_inode)->time = 0; kfree(pacl); + } #else cFYI(1, "Set CIFS ACL not supported yet"); #endif /* CONFIG_CIFS_ACL */ - } } else { int temp; temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 5e2e1b3f068d..05156c17b551 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -208,13 +208,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(root)) { error = PTR_ERR(root); printk("Failure of coda_cnode_make for root: error %d\n", error); - root = NULL; goto error; } printk("coda_read_super: rootinode is %ld dev %s\n", root->i_ino, root->i_sb->s_id); - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { error = -EINVAL; goto error; @@ -222,9 +221,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return 0; error: - if (root) - iput(root); - mutex_lock(&vc->vc_mutex); bdi_destroy(&vc->bdi); vc->vc_sb = NULL; diff --git a/fs/compat.c b/fs/compat.c index fa9d721ecfee..07880bae28a9 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { - compat_ino_t ino = stat->ino; - typeof(ubuf->st_uid) uid = 0; - typeof(ubuf->st_gid) gid = 0; - int err; + struct compat_stat tmp; - SET_UID(uid, stat->uid); - SET_GID(gid, stat->gid); + if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + return -EOVERFLOW; - if ((u64) stat->size > MAX_NON_LFS || - !old_valid_dev(stat->dev) || - !old_valid_dev(stat->rdev)) + memset(&tmp, 0, sizeof(tmp)); + tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_ino = stat->ino; + if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) + tmp.st_mode = stat->mode; + tmp.st_nlink = stat->nlink; + if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; - - if (clear_user(ubuf, sizeof(*ubuf))) - return -EFAULT; - - err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); - err |= __put_user(ino, &ubuf->st_ino); - err |= __put_user(stat->mode, &ubuf->st_mode); - err |= __put_user(stat->nlink, &ubuf->st_nlink); - err |= __put_user(uid, &ubuf->st_uid); - err |= __put_user(gid, &ubuf->st_gid); - err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); - err |= __put_user(stat->size, &ubuf->st_size); - err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); - err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); - err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); - err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); - err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); - err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); - err |= __put_user(stat->blksize, &ubuf->st_blksize); - err |= __put_user(stat->blocks, &ubuf->st_blocks); - return err; + SET_UID(tmp.st_uid, stat->uid); + SET_GID(tmp.st_gid, stat->gid); + tmp.st_rdev = old_encode_dev(stat->rdev); + if ((u64) stat->size > MAX_NON_LFS) + return -EOVERFLOW; + tmp.st_size = stat->size; + tmp.st_atime = stat->atime.tv_sec; + tmp.st_atime_nsec = stat->atime.tv_nsec; + tmp.st_mtime = stat->mtime.tv_sec; + tmp.st_mtime_nsec = stat->mtime.tv_nsec; + tmp.st_ctime = stat->ctime.tv_sec; + tmp.st_ctime_nsec = stat->ctime.tv_nsec; + tmp.st_blocks = stat->blocks; + tmp.st_blksize = stat->blksize; + return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } asmlinkage long compat_sys_newstat(const char __user * filename, diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a26bea10e81b..10d8cd90ca6f 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -34,7 +34,7 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/ppp_defs.h> -#include <linux/if_ppp.h> +#include <linux/ppp-ioctl.h> #include <linux/if_pppox.h> #include <linux/mtio.h> #include <linux/auto_fs.h> diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index ede857d20a04..b5f0a3b91f18 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -58,12 +58,11 @@ struct configfs_dirent { extern struct mutex configfs_symlink_mutex; extern spinlock_t configfs_dirent_lock; -extern struct vfsmount * configfs_mount; extern struct kmem_cache *configfs_dir_cachep; extern int configfs_is_root(struct config_item *item); -extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *); +extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *); extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); extern int configfs_inode_init(void); extern void configfs_inode_exit(void); @@ -80,15 +79,15 @@ extern const unsigned char * configfs_get_name(struct configfs_dirent *sd); extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr); -extern int configfs_pin_fs(void); +extern struct dentry *configfs_pin_fs(void); extern void configfs_release_fs(void); extern struct rw_semaphore configfs_rename_sem; -extern struct super_block * configfs_sb; extern const struct file_operations configfs_dir_operations; extern const struct file_operations configfs_file_operations; extern const struct file_operations bin_fops; extern const struct inode_operations configfs_dir_inode_operations; +extern const struct inode_operations configfs_root_inode_operations; extern const struct inode_operations configfs_symlink_inode_operations; extern const struct dentry_operations configfs_dentry_ops; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5ddd7ebd9dcd..7e6c52d8a207 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -264,11 +264,13 @@ static int init_symlink(struct inode * inode) return 0; } -static int create_dir(struct config_item * k, struct dentry * p, - struct dentry * d) +static int create_dir(struct config_item *k, struct dentry *d) { int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; + struct dentry *p = d->d_parent; + + BUG_ON(!k); error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); if (!error) @@ -304,19 +306,7 @@ static int create_dir(struct config_item * k, struct dentry * p, static int configfs_create_dir(struct config_item * item, struct dentry *dentry) { - struct dentry * parent; - int error = 0; - - BUG_ON(!item); - - if (item->ci_parent) - parent = item->ci_parent->ci_dentry; - else if (configfs_mount) - parent = configfs_mount->mnt_root; - else - return -EFAULT; - - error = create_dir(item,parent,dentry); + int error = create_dir(item, dentry); if (!error) item->ci_dentry = dentry; return error; @@ -1079,23 +1069,24 @@ int configfs_depend_item(struct configfs_subsystem *subsys, int ret; struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; struct config_item *s_item = &subsys->su_group.cg_item; + struct dentry *root; /* * Pin the configfs filesystem. This means we can safely access * the root of the configfs filesystem. */ - ret = configfs_pin_fs(); - if (ret) - return ret; + root = configfs_pin_fs(); + if (IS_ERR(root)) + return PTR_ERR(root); /* * Next, lock the root directory. We're going to check that the * subsystem is really registered, and so we need to lock out * configfs_[un]register_subsystem(). */ - mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); + mutex_lock(&root->d_inode->i_mutex); - root_sd = configfs_sb->s_root->d_fsdata; + root_sd = root->d_fsdata; list_for_each_entry(p, &root_sd->s_children, s_sibling) { if (p->s_type & CONFIGFS_DIR) { @@ -1129,7 +1120,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys, out_unlock_dirent_lock: spin_unlock(&configfs_dirent_lock); out_unlock_fs: - mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); + mutex_unlock(&root->d_inode->i_mutex); /* * If we succeeded, the fs is pinned via other methods. If not, @@ -1183,11 +1174,6 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct module *subsys_owner = NULL, *new_item_owner = NULL; char *name; - if (dentry->d_parent == configfs_sb->s_root) { - ret = -EPERM; - goto out; - } - sd = dentry->d_parent->d_fsdata; /* @@ -1359,9 +1345,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; - if (dentry->d_parent == configfs_sb->s_root) - return -EPERM; - sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; @@ -1459,6 +1442,11 @@ const struct inode_operations configfs_dir_inode_operations = { .setattr = configfs_setattr, }; +const struct inode_operations configfs_root_inode_operations = { + .lookup = configfs_lookup, + .setattr = configfs_setattr, +}; + #if 0 int configfs_rename_dir(struct config_item * item, const char *new_name) { @@ -1546,6 +1534,7 @@ static inline unsigned char dt_type(struct configfs_dirent *sd) static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dentry *dentry = filp->f_path.dentry; + struct super_block *sb = dentry->d_sb; struct configfs_dirent * parent_sd = dentry->d_fsdata; struct configfs_dirent *cursor = filp->private_data; struct list_head *p, *q = &cursor->s_sibling; @@ -1608,7 +1597,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir ino = inode->i_ino; spin_unlock(&configfs_dirent_lock); if (!inode) - ino = iunique(configfs_sb, 2); + ino = iunique(sb, 2); if (filldir(dirent, name, len, filp->f_pos, ino, dt_type(next)) < 0) @@ -1680,27 +1669,27 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) struct config_group *group = &subsys->su_group; struct qstr name; struct dentry *dentry; + struct dentry *root; struct configfs_dirent *sd; - err = configfs_pin_fs(); - if (err) - return err; + root = configfs_pin_fs(); + if (IS_ERR(root)) + return PTR_ERR(root); if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; - sd = configfs_sb->s_root->d_fsdata; + sd = root->d_fsdata; link_group(to_config_group(sd->s_element), group); - mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, - I_MUTEX_PARENT); + mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); name.name = group->cg_item.ci_name; name.len = strlen(name.name); name.hash = full_name_hash(name.name, name.len); err = -ENOMEM; - dentry = d_alloc(configfs_sb->s_root, &name); + dentry = d_alloc(root, &name); if (dentry) { d_add(dentry, NULL); @@ -1717,7 +1706,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) } } - mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); + mutex_unlock(&root->d_inode->i_mutex); if (err) { unlink_group(group); @@ -1731,13 +1720,14 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) { struct config_group *group = &subsys->su_group; struct dentry *dentry = group->cg_item.ci_dentry; + struct dentry *root = dentry->d_sb->s_root; - if (dentry->d_parent != configfs_sb->s_root) { + if (dentry->d_parent != root) { printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n"); return; } - mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, + mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); mutex_lock(&configfs_symlink_mutex); @@ -1754,7 +1744,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) d_delete(dentry); - mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); + mutex_unlock(&root->d_inode->i_mutex); dput(dentry); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 3ee36d418863..0074362d9f7f 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -44,8 +44,6 @@ static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; #endif -extern struct super_block * configfs_sb; - static const struct address_space_operations configfs_aops = { .readpage = simple_readpage, .write_begin = simple_write_begin, @@ -132,9 +130,10 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) inode->i_ctime = iattr->ia_ctime; } -struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd) +struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd, + struct super_block *s) { - struct inode * inode = new_inode(configfs_sb); + struct inode * inode = new_inode(s); if (inode) { inode->i_ino = get_next_ino(); inode->i_mapping->a_ops = &configfs_aops; @@ -188,36 +187,35 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd, int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *)) { int error = 0; - struct inode * inode = NULL; - if (dentry) { - if (!dentry->d_inode) { - struct configfs_dirent *sd = dentry->d_fsdata; - if ((inode = configfs_new_inode(mode, sd))) { - if (dentry->d_parent && dentry->d_parent->d_inode) { - struct inode *p_inode = dentry->d_parent->d_inode; - p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; - } - configfs_set_inode_lock_class(sd, inode); - goto Proceed; - } - else - error = -ENOMEM; - } else - error = -EEXIST; - } else - error = -ENOENT; - goto Done; + struct inode *inode = NULL; + struct configfs_dirent *sd; + struct inode *p_inode; + + if (!dentry) + return -ENOENT; + + if (dentry->d_inode) + return -EEXIST; - Proceed: - if (init) + sd = dentry->d_fsdata; + inode = configfs_new_inode(mode, sd, dentry->d_sb); + if (!inode) + return -ENOMEM; + + p_inode = dentry->d_parent->d_inode; + p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; + configfs_set_inode_lock_class(sd, inode); + + if (init) { error = init(inode); - if (!error) { - d_instantiate(dentry, inode); - if (S_ISDIR(mode) || S_ISLNK(mode)) - dget(dentry); /* pin link and directory dentries in core */ - } else - iput(inode); - Done: + if (error) { + iput(inode); + return error; + } + } + d_instantiate(dentry, inode); + if (S_ISDIR(mode) || S_ISLNK(mode)) + dget(dentry); /* pin link and directory dentries in core */ return error; } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 276e15cafd58..aee0a7ebbd8e 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -37,8 +37,7 @@ /* Random magic number */ #define CONFIGFS_MAGIC 0x62656570 -struct vfsmount * configfs_mount = NULL; -struct super_block * configfs_sb = NULL; +static struct vfsmount *configfs_mount = NULL; struct kmem_cache *configfs_dir_cachep; static int configfs_mnt_count = 0; @@ -77,12 +76,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = CONFIGFS_MAGIC; sb->s_op = &configfs_ops; sb->s_time_gran = 1; - configfs_sb = sb; inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, - &configfs_root); + &configfs_root, sb); if (inode) { - inode->i_op = &configfs_dir_inode_operations; + inode->i_op = &configfs_root_inode_operations; inode->i_fop = &configfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); @@ -91,10 +89,9 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; } - root = d_alloc_root(inode); + root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n",__func__); - iput(inode); return -ENOMEM; } config_group_init(&configfs_root_group); @@ -118,10 +115,11 @@ static struct file_system_type configfs_fs_type = { .kill_sb = kill_litter_super, }; -int configfs_pin_fs(void) +struct dentry *configfs_pin_fs(void) { - return simple_pin_fs(&configfs_fs_type, &configfs_mount, + int err = simple_pin_fs(&configfs_fs_type, &configfs_mount, &configfs_mnt_count); + return err ? ERR_PTR(err) : configfs_mount->mnt_root; } void configfs_release_fs(void) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 0f3eb41d9201..cc9f2546ea4a 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -110,13 +110,13 @@ out: static int get_target(const char *symname, struct path *path, - struct config_item **target) + struct config_item **target, struct super_block *sb) { int ret; ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); if (!ret) { - if (path->dentry->d_sb == configfs_sb) { + if (path->dentry->d_sb == sb) { *target = configfs_get_config_item(path->dentry); if (!*target) { ret = -ENOENT; @@ -141,10 +141,6 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna struct config_item *target_item = NULL; struct config_item_type *type; - ret = -EPERM; /* What lack-of-symlink returns */ - if (dentry->d_parent == configfs_sb->s_root) - goto out; - sd = dentry->d_parent->d_fsdata; /* * Fake invisibility if dir belongs to a group/default groups hierarchy @@ -162,7 +158,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna !type->ct_item_ops->allow_link) goto out_put; - ret = get_target(symname, &path, &target_item); + ret = get_target(symname, &path, &target_item, dentry->d_sb); if (ret) goto out_put; @@ -198,8 +194,6 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry) if (!(sd->s_type & CONFIGFS_ITEM_LINK)) goto out; - BUG_ON(dentry->d_parent == configfs_sb->s_root); - sl = sd->s_element; parent_item = configfs_get_config_item(dentry->d_parent); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a2ee8f9f5a38..d013c46402ed 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -257,10 +257,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) /* Do sanity checks on the superblock */ if (super.magic != CRAMFS_MAGIC) { - /* check for wrong endianess */ + /* check for wrong endianness */ if (super.magic == CRAMFS_MAGIC_WEND) { if (!silent) - printk(KERN_ERR "cramfs: wrong endianess\n"); + printk(KERN_ERR "cramfs: wrong endianness\n"); goto out; } @@ -270,7 +270,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) mutex_unlock(&read_mutex); if (super.magic != CRAMFS_MAGIC) { if (super.magic == CRAMFS_MAGIC_WEND && !silent) - printk(KERN_ERR "cramfs: wrong endianess\n"); + printk(KERN_ERR "cramfs: wrong endianness\n"); else if (!silent) printk(KERN_ERR "cramfs: wrong magic\n"); goto out; @@ -318,11 +318,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) root = get_cramfs_inode(sb, &super.root, 0); if (IS_ERR(root)) goto out; - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); + sb->s_root = d_make_root(root); + if (!sb->s_root) goto out; - } return 0; out: kfree(sbi); diff --git a/fs/dcache.c b/fs/dcache.c index 16a53cc2cc02..2b55bd0c1061 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -104,11 +104,11 @@ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; -static inline struct hlist_bl_head *d_hash(struct dentry *parent, - unsigned long hash) +static inline struct hlist_bl_head *d_hash(const struct dentry *parent, + unsigned int hash) { - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + hash += (unsigned long) parent / L1_CACHE_BYTES; + hash = hash + (hash >> D_HASHBITS); return dentry_hashtable + (hash & D_HASHMASK); } @@ -137,6 +137,49 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, } #endif +/* + * Compare 2 name strings, return 0 if they match, otherwise non-zero. + * The strings are both count bytes long, and count is non-zero. + */ +static inline int dentry_cmp(const unsigned char *cs, size_t scount, + const unsigned char *ct, size_t tcount) +{ +#ifdef CONFIG_DCACHE_WORD_ACCESS + unsigned long a,b,mask; + + if (unlikely(scount != tcount)) + return 1; + + for (;;) { + a = *(unsigned long *)cs; + b = *(unsigned long *)ct; + if (tcount < sizeof(unsigned long)) + break; + if (unlikely(a != b)) + return 1; + cs += sizeof(unsigned long); + ct += sizeof(unsigned long); + tcount -= sizeof(unsigned long); + if (!tcount) + return 0; + } + mask = ~(~0ul << tcount*8); + return unlikely(!!((a ^ b) & mask)); +#else + if (scount != tcount) + return 1; + + do { + if (*cs != *ct) + return 1; + cs++; + ct++; + tcount--; + } while (tcount); + return 0; +#endif +} + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); @@ -1423,30 +1466,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); -/** - * d_alloc_root - allocate root dentry - * @root_inode: inode to allocate the root for - * - * Allocate a root ("/") dentry for the inode given. The inode is - * instantiated and returned. %NULL is returned if there is insufficient - * memory or the inode passed is %NULL. - */ - -struct dentry * d_alloc_root(struct inode * root_inode) -{ - struct dentry *res = NULL; - - if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1 }; - - res = __d_alloc(root_inode->i_sb, &name); - if (res) - d_instantiate(res, root_inode); - } - return res; -} -EXPORT_SYMBOL(d_alloc_root); - struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; @@ -1694,7 +1713,7 @@ EXPORT_SYMBOL(d_add_ci); * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry * @name: qstr of name we wish to find - * @seq: returns d_seq value at the point where the dentry was found + * @seqp: returns d_seq value at the point where the dentry was found * @inode: returns dentry->d_inode when the inode was found valid. * Returns: dentry, or NULL * @@ -1717,8 +1736,9 @@ EXPORT_SYMBOL(d_add_ci); * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. */ -struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, - unsigned *seq, struct inode **inode) +struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, + unsigned *seqp, struct inode **inode) { unsigned int len = name->len; unsigned int hash = name->hash; @@ -1748,6 +1768,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { + unsigned seq; struct inode *i; const char *tname; int tlen; @@ -1756,7 +1777,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, continue; seqretry: - *seq = read_seqcount_begin(&dentry->d_seq); + seq = read_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) @@ -1771,7 +1792,7 @@ seqretry: * edge of memory when walking. If we could load this * atomically some other way, we could drop this check. */ - if (read_seqcount_retry(&dentry->d_seq, *seq)) + if (read_seqcount_retry(&dentry->d_seq, seq)) goto seqretry; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (parent->d_op->d_compare(parent, *inode, @@ -1788,6 +1809,7 @@ seqretry: * order to do anything useful with the returned dentry * anyway. */ + *seqp = seq; *inode = i; return dentry; } @@ -2968,7 +2990,7 @@ __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { - int loop; + unsigned int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. @@ -2986,13 +3008,13 @@ static void __init dcache_init_early(void) &d_hash_mask, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) + for (loop = 0; loop < (1U << d_hash_shift); loop++) INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } static void __init dcache_init(void) { - int loop; + unsigned int loop; /* * A constructor could be added for stable state like the lists, @@ -3016,7 +3038,7 @@ static void __init dcache_init(void) &d_hash_mask, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) + for (loop = 0; loop < (1U << d_hash_shift); loop++) INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index ef023eef0464..21e93605161c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -611,7 +611,7 @@ static const struct file_operations fops_regset32 = { * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ -struct dentry *debugfs_create_regset32(const char *name, mode_t mode, +struct dentry *debugfs_create_regset32(const char *name, umode_t mode, struct dentry *parent, struct debugfs_regset32 *regset) { diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 956d5ddddf6e..b80bc846a15a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -23,9 +23,13 @@ #include <linux/debugfs.h> #include <linux/fsnotify.h> #include <linux/string.h> +#include <linux/seq_file.h> +#include <linux/parser.h> #include <linux/magic.h> #include <linux/slab.h> +#define DEBUGFS_DEFAULT_MODE 0755 + static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; @@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } +struct debugfs_mount_opts { + uid_t uid; + gid_t gid; + umode_t mode; +}; + +enum { + Opt_uid, + Opt_gid, + Opt_mode, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_mode, "mode=%o"}, + {Opt_err, NULL} +}; + +struct debugfs_fs_info { + struct debugfs_mount_opts mount_opts; +}; + +static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + char *p; + + opts->mode = DEBUGFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + opts->uid = option; + break; + case Opt_gid: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->gid = option; + break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->mode = option & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally debugfs has ignored all mount options + */ + } + } + + return 0; +} + +static int debugfs_apply_options(struct super_block *sb) +{ + struct debugfs_fs_info *fsi = sb->s_fs_info; + struct inode *inode = sb->s_root->d_inode; + struct debugfs_mount_opts *opts = &fsi->mount_opts; + + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + + inode->i_uid = opts->uid; + inode->i_gid = opts->gid; + + return 0; +} + +static int debugfs_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct debugfs_fs_info *fsi = sb->s_fs_info; + + err = debugfs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + debugfs_apply_options(sb); + +fail: + return err; +} + +static int debugfs_show_options(struct seq_file *m, struct dentry *root) +{ + struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; + struct debugfs_mount_opts *opts = &fsi->mount_opts; + + if (opts->uid != 0) + seq_printf(m, ",uid=%u", opts->uid); + if (opts->gid != 0) + seq_printf(m, ",gid=%u", opts->gid); + if (opts->mode != DEBUGFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", opts->mode); + + return 0; +} + +static const struct super_operations debugfs_super_operations = { + .statfs = simple_statfs, + .remount_fs = debugfs_remount, + .show_options = debugfs_show_options, +}; + static int debug_fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr debug_files[] = {{""}}; + struct debugfs_fs_info *fsi; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL); + sb->s_fs_info = fsi; + if (!fsi) { + err = -ENOMEM; + goto fail; + } + + err = debugfs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; + + err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); + if (err) + goto fail; + + sb->s_op = &debugfs_super_operations; + + debugfs_apply_options(sb); + + return 0; - return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); +fail: + kfree(fsi); + sb->s_fs_info = NULL; + return err; } static struct dentry *debug_mount(struct file_system_type *fs_type, diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c4e2a58a2e82..10f5e0b484db 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -36,7 +36,61 @@ #define DEVPTS_DEFAULT_PTMX_MODE 0000 #define PTMX_MINOR 2 -extern int pty_limit; /* Config limit on Unix98 ptys */ +/* + * sysctl support for setting limits on the number of Unix98 ptys allocated. + * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly. + */ +static int pty_limit = NR_UNIX98_PTY_DEFAULT; +static int pty_reserve = NR_UNIX98_PTY_RESERVE; +static int pty_limit_min; +static int pty_limit_max = INT_MAX; +static int pty_count; + +static struct ctl_table pty_table[] = { + { + .procname = "max", + .maxlen = sizeof(int), + .mode = 0644, + .data = &pty_limit, + .proc_handler = proc_dointvec_minmax, + .extra1 = &pty_limit_min, + .extra2 = &pty_limit_max, + }, { + .procname = "reserve", + .maxlen = sizeof(int), + .mode = 0644, + .data = &pty_reserve, + .proc_handler = proc_dointvec_minmax, + .extra1 = &pty_limit_min, + .extra2 = &pty_limit_max, + }, { + .procname = "nr", + .maxlen = sizeof(int), + .mode = 0444, + .data = &pty_count, + .proc_handler = proc_dointvec, + }, + {} +}; + +static struct ctl_table pty_kern_table[] = { + { + .procname = "pty", + .mode = 0555, + .child = pty_table, + }, + {} +}; + +static struct ctl_table pty_root_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = pty_kern_table, + }, + {} +}; + static DEFINE_MUTEX(allocated_ptys_lock); static struct vfsmount *devpts_mnt; @@ -49,10 +103,11 @@ struct pts_mount_opts { umode_t mode; umode_t ptmxmode; int newinstance; + int max; }; enum { - Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, + Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_max, Opt_err }; @@ -63,6 +118,7 @@ static const match_table_t tokens = { #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES {Opt_ptmxmode, "ptmxmode=%o"}, {Opt_newinstance, "newinstance"}, + {Opt_max, "max=%d"}, #endif {Opt_err, NULL} }; @@ -109,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) opts->gid = 0; opts->mode = DEVPTS_DEFAULT_MODE; opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + opts->max = NR_UNIX98_PTY_MAX; /* newinstance makes sense only on initial mount */ if (op == PARSE_MOUNT) @@ -152,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) if (op == PARSE_MOUNT) opts->newinstance = 1; break; + case Opt_max: + if (match_int(&args[0], &option) || + option < 0 || option > NR_UNIX98_PTY_MAX) + return -EINVAL; + opts->max = option; + break; #endif default: printk(KERN_ERR "devpts: called with bogus options\n"); @@ -258,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",mode=%03o", opts->mode); #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); + if (opts->max < NR_UNIX98_PTY_MAX) + seq_printf(seq, ",max=%d", opts->max); #endif return 0; @@ -309,12 +374,11 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode->i_fop = &simple_dir_operations; set_nlink(inode, 2); - s->s_root = d_alloc_root(inode); + s->s_root = d_make_root(inode); if (s->s_root) return 0; printk(KERN_ERR "devpts: get root dentry failed\n"); - iput(inode); fail: return -ENOMEM; @@ -438,6 +502,12 @@ retry: return -ENOMEM; mutex_lock(&allocated_ptys_lock); + if (pty_count >= pty_limit - + (fsi->mount_opts.newinstance ? pty_reserve : 0)) { + mutex_unlock(&allocated_ptys_lock); + return -ENOSPC; + } + ida_ret = ida_get_new(&fsi->allocated_ptys, &index); if (ida_ret < 0) { mutex_unlock(&allocated_ptys_lock); @@ -446,11 +516,12 @@ retry: return -EIO; } - if (index >= pty_limit) { + if (index >= fsi->mount_opts.max) { ida_remove(&fsi->allocated_ptys, index); mutex_unlock(&allocated_ptys_lock); - return -EIO; + return -ENOSPC; } + pty_count++; mutex_unlock(&allocated_ptys_lock); return index; } @@ -462,6 +533,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_lock(&allocated_ptys_lock); ida_remove(&fsi->allocated_ptys, idx); + pty_count--; mutex_unlock(&allocated_ptys_lock); } @@ -558,11 +630,15 @@ void devpts_pty_kill(struct tty_struct *tty) static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); + struct ctl_table_header *table; + if (!err) { + table = register_sysctl_table(pty_root_table); devpts_mnt = kern_mount(&devpts_fs_type); if (IS_ERR(devpts_mnt)) { err = PTR_ERR(devpts_mnt); unregister_filesystem(&devpts_fs_type); + unregister_sysctl_table(table); } } return err; diff --git a/fs/direct-io.c b/fs/direct-io.c index 4a588dbd11bf..f4aadd15b613 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode) if (atomic_read(&inode->i_dio_count)) __inode_dio_wait(inode); } -EXPORT_SYMBOL_GPL(inode_dio_wait); +EXPORT_SYMBOL(inode_dio_wait); /* * inode_dio_done - signal finish of a direct I/O requests @@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode) if (atomic_dec_and_test(&inode->i_dio_count)) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } -EXPORT_SYMBOL_GPL(inode_dio_done); +EXPORT_SYMBOL(inode_dio_done); /* * How many pages are in the queue? diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 83641574b016..dc5eb598b81f 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -351,11 +351,28 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) { struct dlm_rsb *r; + uint32_t hash, bucket; + int rv; + + hash = jhash(name, len, 0); + bucket = hash & (ls->ls_rsbtbl_size - 1); + + spin_lock(&ls->ls_rsbtbl[bucket].lock); + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r); + if (rv) + rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, + name, len, 0, &r); + spin_unlock(&ls->ls_rsbtbl[bucket].lock); + + if (!rv) + return r; down_read(&ls->ls_root_sem); list_for_each_entry(r, &ls->ls_root_list, res_root_list) { if (len == r->res_length && !memcmp(name, r->res_name, len)) { up_read(&ls->ls_root_sem); + log_error(ls, "find_rsb_root revert to root_list %s", + r->res_name); return r; } } diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d47183043c59..fa5c07d51dcc 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -411,8 +411,8 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); } -static int search_rsb_tree(struct rb_root *tree, char *name, int len, - unsigned int flags, struct dlm_rsb **r_ret) +int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, + unsigned int flags, struct dlm_rsb **r_ret) { struct rb_node *node = tree->rb_node; struct dlm_rsb *r; @@ -474,12 +474,12 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, struct dlm_rsb *r; int error; - error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); if (!error) { kref_get(&r->res_ref); goto out; } - error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); + error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); if (error) goto out; diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 265017a7c3e7..1a255307f6ff 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -28,6 +28,9 @@ void dlm_scan_waiters(struct dlm_ls *ls); void dlm_scan_timeout(struct dlm_ls *ls); void dlm_adjust_timeouts(struct dlm_ls *ls); +int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, + unsigned int flags, struct dlm_rsb **r_ret); + int dlm_purge_locks(struct dlm_ls *ls); void dlm_purge_mstcpy_locks(struct dlm_rsb *r); void dlm_grant_after_purge(struct dlm_ls *ls); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 0b3109ee4257..133ef6dc7cb7 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -52,6 +52,7 @@ #include <linux/mutex.h> #include <linux/sctp.h> #include <linux/slab.h> +#include <net/sctp/sctp.h> #include <net/sctp/user.h> #include <net/ipv6.h> @@ -474,9 +475,6 @@ static void process_sctp_notification(struct connection *con, int prim_len, ret; int addr_len; struct connection *new_con; - sctp_peeloff_arg_t parg; - int parglen = sizeof(parg); - int err; /* * We get this before any data for an association. @@ -525,23 +523,19 @@ static void process_sctp_notification(struct connection *con, return; /* Peel off a new sock */ - parg.associd = sn->sn_assoc_change.sac_assoc_id; - ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, - SCTP_SOCKOPT_PEELOFF, - (void *)&parg, &parglen); + sctp_lock_sock(con->sock->sk); + ret = sctp_do_peeloff(con->sock->sk, + sn->sn_assoc_change.sac_assoc_id, + &new_con->sock); + sctp_release_sock(con->sock->sk); if (ret < 0) { log_print("Can't peel off a socket for " "connection %d to node %d: err=%d", - parg.associd, nodeid, ret); - return; - } - new_con->sock = sockfd_lookup(parg.sd, &err); - if (!new_con->sock) { - log_print("sockfd_lookup error %d", err); + (int)sn->sn_assoc_change.sac_assoc_id, + nodeid, ret); return; } add_sock(new_con->sock, new_con); - sockfd_put(new_con->sock); log_print("connecting to %d sctp association %d", nodeid, (int)sn->sn_assoc_change.sac_assoc_id); @@ -1082,7 +1076,7 @@ static void init_local(void) int i; dlm_local_count = 0; - for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { + for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) { if (dlm_our_addr(&sas, i)) break; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 63ab24510649..ea9931281557 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1990,6 +1990,17 @@ out: return; } +static size_t ecryptfs_max_decoded_size(size_t encoded_size) +{ + /* Not exact; conservatively long. Every block of 4 + * encoded characters decodes into a block of 3 + * decoded characters. This segment of code provides + * the caller with the maximum amount of allocated + * space that @dst will need to point to in a + * subsequent call. */ + return ((encoded_size + 1) * 3) / 4; +} + /** * ecryptfs_decode_from_filename * @dst: If NULL, this function only sets @dst_size and returns. If @@ -2008,13 +2019,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, size_t dst_byte_offset = 0; if (dst == NULL) { - /* Not exact; conservatively long. Every block of 4 - * encoded characters decodes into a block of 3 - * decoded characters. This segment of code provides - * the caller with the maximum amount of allocated - * space that @dst will need to point to in a - * subsequent call. */ - (*dst_size) = (((src_size + 1) * 3) / 4); + (*dst_size) = ecryptfs_max_decoded_size(src_size); goto out; } while (src_byte_offset < src_size) { @@ -2239,3 +2244,52 @@ out_free: out: return rc; } + +#define ENC_NAME_MAX_BLOCKLEN_8_OR_16 143 + +int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + struct blkcipher_desc desc; + struct mutex *tfm_mutex; + size_t cipher_blocksize; + int rc; + + if (!(mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) { + (*namelen) = lower_namelen; + return 0; + } + + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex, + mount_crypt_stat->global_default_fn_cipher_name); + if (unlikely(rc)) { + (*namelen) = 0; + return rc; + } + + mutex_lock(tfm_mutex); + cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm); + mutex_unlock(tfm_mutex); + + /* Return an exact amount for the common cases */ + if (lower_namelen == NAME_MAX + && (cipher_blocksize == 8 || cipher_blocksize == 16)) { + (*namelen) = ENC_NAME_MAX_BLOCKLEN_8_OR_16; + return 0; + } + + /* Return a safe estimate for the uncommon cases */ + (*namelen) = lower_namelen; + (*namelen) -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; + /* Since this is the max decoded size, subtract 1 "decoded block" len */ + (*namelen) = ecryptfs_max_decoded_size(*namelen) - 3; + (*namelen) -= ECRYPTFS_TAG_70_MAX_METADATA_SIZE; + (*namelen) -= ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES; + /* Worst case is that the filename is padded nearly a full block size */ + (*namelen) -= cipher_blocksize - 1; + + if ((*namelen) < 0) + (*namelen) = 0; + + return 0; +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index a2362df58ae8..867b64c5d84f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -162,6 +162,10 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */ #define MD5_DIGEST_SIZE 16 #define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE +#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \ + + ECRYPTFS_SIG_SIZE + 1 + 1) +#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \ + + ECRYPTFS_SIG_SIZE + 1 + 1) #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED." #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23 #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED." @@ -701,6 +705,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, size_t *packet_size, struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *data, size_t max_packet_size); +int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat); int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, loff_t offset); diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index d3f95f941c47..2b17f2f9b121 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -48,8 +48,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, unsigned long nr_segs, loff_t pos) { ssize_t rc; - struct dentry *lower_dentry; - struct vfsmount *lower_vfsmount; + struct path lower; struct file *file = iocb->ki_filp; rc = generic_file_aio_read(iocb, iov, nr_segs, pos); @@ -60,9 +59,9 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, if (-EIOCBQUEUED == rc) rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { - lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); - lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); - touch_atime(lower_vfsmount, lower_dentry); + lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); + lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); + touch_atime(&lower); } return rc; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 19892d7d2ed1..ab35b113003b 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1085,6 +1085,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, } rc = vfs_setxattr(lower_dentry, name, value, size, flags); + if (!rc) + fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); out: return rc; } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 8e3b943e330f..2333203a120b 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -679,10 +679,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, * Octets N3-N4: Block-aligned encrypted filename * - Consists of a minimum number of random characters, a \0 * separator, and then the filename */ - s->max_packet_size = (1 /* Tag 70 identifier */ - + 3 /* Max Tag 70 packet size */ - + ECRYPTFS_SIG_SIZE /* FNEK sig */ - + 1 /* Cipher identifier */ + s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE + s->block_aligned_filename_size); if (dest == NULL) { (*packet_size) = s->max_packet_size; @@ -934,10 +931,10 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, goto out; } s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) { + if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) { printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be " "at least [%d]\n", __func__, max_packet_size, - (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)); + ECRYPTFS_TAG_70_MIN_METADATA_SIZE); rc = -EINVAL; goto out; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index b4a6befb1216..68954937a071 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -550,9 +550,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags if (IS_ERR(inode)) goto out_free; - s->s_root = d_alloc_root(inode); + s->s_root = d_make_root(inode); if (!s->s_root) { - iput(inode); rc = -ENOMEM; goto out_free; } @@ -795,15 +794,10 @@ static int __init ecryptfs_init(void) "Failed to allocate one or more kmem_cache objects\n"); goto out; } - rc = register_filesystem(&ecryptfs_fs_type); - if (rc) { - printk(KERN_ERR "Failed to register filesystem\n"); - goto out_free_kmem_caches; - } rc = do_sysfs_registration(); if (rc) { printk(KERN_ERR "sysfs registration failed\n"); - goto out_unregister_filesystem; + goto out_free_kmem_caches; } rc = ecryptfs_init_kthread(); if (rc) { @@ -824,19 +818,24 @@ static int __init ecryptfs_init(void) "rc = [%d]\n", rc); goto out_release_messaging; } + rc = register_filesystem(&ecryptfs_fs_type); + if (rc) { + printk(KERN_ERR "Failed to register filesystem\n"); + goto out_destroy_crypto; + } if (ecryptfs_verbosity > 0) printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values " "will be written to the syslog!\n", ecryptfs_verbosity); goto out; +out_destroy_crypto: + ecryptfs_destroy_crypto(); out_release_messaging: ecryptfs_release_messaging(); out_destroy_kthread: ecryptfs_destroy_kthread(); out_do_sysfs_unregistration: do_sysfs_unregistration(); -out_unregister_filesystem: - unregister_filesystem(&ecryptfs_fs_type); out_free_kmem_caches: ecryptfs_free_kmem_caches(); out: diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 349209dc6a91..3a06f4043df4 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -429,7 +429,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, goto memdup; } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { printk(KERN_WARNING "%s: Acceptable packet size range is " - "[%d-%lu], but amount of data written is [%zu].", + "[%d-%zu], but amount of data written is [%zu].", __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); return -EINVAL; } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 10ec695ccd68..a46b3a8fee1e 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -150,7 +150,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, /* This is a header extent */ char *page_virt; - page_virt = kmap_atomic(page, KM_USER0); + page_virt = kmap_atomic(page); memset(page_virt, 0, PAGE_CACHE_SIZE); /* TODO: Support more than one header extent */ if (view_extent_num == 0) { @@ -163,7 +163,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, crypt_stat, &written); } - kunmap_atomic(page_virt, KM_USER0); + kunmap_atomic(page_virt); flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 5c0106f75775..b2a34a192f4f 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -156,7 +156,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); + ecryptfs_page_virt = kmap_atomic(ecryptfs_page); /* * pos: where we're now writing, offset: where the request was @@ -179,7 +179,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, (data + data_offset), num_bytes); data_offset += num_bytes; } - kunmap_atomic(ecryptfs_page_virt, KM_USER0); + kunmap_atomic(ecryptfs_page_virt); flush_dcache_page(ecryptfs_page); SetPageUptodate(ecryptfs_page); unlock_page(ecryptfs_page); diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 9df7fd6e0c39..2dd946b636d2 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -30,6 +30,8 @@ #include <linux/seq_file.h> #include <linux/file.h> #include <linux/crypto.h> +#include <linux/statfs.h> +#include <linux/magic.h> #include "ecryptfs_kernel.h" struct kmem_cache *ecryptfs_inode_info_cache; @@ -102,10 +104,20 @@ static void ecryptfs_destroy_inode(struct inode *inode) static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + int rc; if (!lower_dentry->d_sb->s_op->statfs) return -ENOSYS; - return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); + + rc = lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); + if (rc) + return rc; + + buf->f_type = ECRYPTFS_SUPER_MAGIC; + rc = ecryptfs_set_f_namelen(&buf->f_namelen, buf->f_namelen, + &ecryptfs_superblock_to_private(dentry->d_sb)->mount_crypt_stat); + + return rc; } /** @@ -172,7 +184,6 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) const struct super_operations ecryptfs_sops = { .alloc_inode = ecryptfs_alloc_inode, .destroy_inode = ecryptfs_destroy_inode, - .drop_inode = generic_drop_inode, .statfs = ecryptfs_statfs, .remount_fs = NULL, .evict_inode = ecryptfs_evict_inode, diff --git a/fs/efs/super.c b/fs/efs/super.c index 981106429a9f..e755ec746c69 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -317,10 +317,9 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) goto out_no_fs; } - s->s_root = d_alloc_root(root); + s->s_root = d_make_root(root); if (!(s->s_root)) { printk(KERN_ERR "EFS: get root dentry failed\n"); - iput(root); ret = -ENOMEM; goto out_no_fs; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aabdfc38cf24..4d9d3a45e356 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p) return !list_empty(p); } +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) +{ + return container_of(p, struct eppoll_entry, wait); +} + /* Get the "struct epitem" from a wait queue pointer */ static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { @@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq) put_cpu(); } +static void ep_remove_wait_queue(struct eppoll_entry *pwq) +{ + wait_queue_head_t *whead; + + rcu_read_lock(); + /* If it is cleared by POLLFREE, it should be rcu-safe */ + whead = rcu_dereference(pwq->whead); + if (whead) + remove_wait_queue(whead, &pwq->wait); + rcu_read_unlock(); +} + /* * This function unregisters poll callbacks from the associated file * descriptor. Must be called with "mtx" held (or "epmutex" if called from @@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) pwq = list_first_entry(lsthead, struct eppoll_entry, llink); list_del(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); + ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } } @@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; + if ((unsigned long)key & POLLFREE) { + ep_pwq_from_wait(wait)->whead = NULL; + /* + * whead = NULL above can race with ep_remove_wait_queue() + * which can do another remove_wait_queue() after us, so we + * can't use __remove_wait_queue(). whead->lock is held by + * the caller. + */ + list_del_init(&wait->task_list); + } + spin_lock_irqsave(&ep->lock, flags); /* @@ -960,6 +988,10 @@ static int path_count[PATH_ARR_SIZE]; static int path_count_inc(int nests) { + /* Allow an arbitrary number of depth 1 paths */ + if (nests == 0) + return 0; + if (++path_count[nests] > path_limits[nests]) return -1; return 0; diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5c..23559c227d9c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -63,6 +63,8 @@ #include <trace/events/task.h> #include "internal.h" +#include <trace/events/sched.h> + int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; @@ -79,15 +81,13 @@ static atomic_t call_count = ATOMIC_INIT(1); static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int __register_binfmt(struct linux_binfmt * fmt, int insert) +void __register_binfmt(struct linux_binfmt * fmt, int insert) { - if (!fmt) - return -EINVAL; + BUG_ON(!fmt); write_lock(&binfmt_lock); insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); - return 0; } EXPORT_SYMBOL(__register_binfmt); @@ -822,7 +822,7 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; - sync_mm_rss(tsk, old_mm); + sync_mm_rss(old_mm); mm_release(tsk, old_mm); if (old_mm) { @@ -848,6 +848,7 @@ static int exec_mmap(struct mm_struct *mm) if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); + setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); return 0; @@ -975,8 +976,8 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = 0; no_thread_group: - if (current->mm) - setmax_mm_hiwater_rss(&sig->maxrss, current->mm); + /* we have changed execution domain */ + tsk->exit_signal = SIGCHLD; exit_itimers(sig); flush_itimer_signals(); @@ -1071,6 +1072,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1101,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1096,7 +1113,7 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); + current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1116,10 +1133,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1143,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on @@ -1338,13 +1340,13 @@ int remove_arg_zero(struct linux_binprm *bprm) ret = -EFAULT; goto out; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); for (; offset < PAGE_SIZE && kaddr[offset]; offset++, bprm->p++) ; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); put_arg_page(page); if (offset == PAGE_SIZE) @@ -1401,9 +1403,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) */ bprm->recursion_depth = depth; if (retval >= 0) { - if (depth == 0) - ptrace_event(PTRACE_EVENT_EXEC, - old_pid); + if (depth == 0) { + trace_sched_process_exec(current, old_pid, bprm); + ptrace_event(PTRACE_EVENT_EXEC, old_pid); + } put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) @@ -1914,7 +1917,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion *vfork_done; int core_waiters = -EBUSY; init_completion(&core_state->startup); @@ -1926,22 +1928,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state) core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); - if (unlikely(core_waiters < 0)) - goto fail; - - /* - * Make sure nobody is waiting for us to release the VM, - * otherwise we can deadlock when we wait on each other - */ - vfork_done = tsk->vfork_done; - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } - - if (core_waiters) + if (core_waiters > 0) wait_for_completion(&core_state->startup); -fail: + return core_waiters; } diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 80405836ba6e..c61e62ac231c 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent) goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); de = (struct exofs_dir_entry *)kaddr; de->name_len = 1; de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); @@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent) de->inode_no = cpu_to_le64(parent->i_ino); memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); exofs_set_de_type(de, inode); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = exofs_commit_chunk(page, 0, chunk_size); fail: page_cache_release(page); diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 9dbf0c301030..fc7161d6bf6b 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, { struct inode *inode = old_dentry->d_inode; - if (inode->i_nlink >= EXOFS_LINK_MAX) - return -EMLINK; - inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); ihold(inode); @@ -156,10 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; - int err = -EMLINK; - - if (dir->i_nlink >= EXOFS_LINK_MAX) - goto out; + int err; inode_inc_link_count(dir); @@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out_dir; } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= EXOFS_LINK_MAX) - goto out_dir; - } err = exofs_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index d22cd168c6ee..7f2b590a36b7 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -754,6 +754,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize = EXOFS_BLKSIZE; sb->s_blocksize_bits = EXOFS_BLKSHIFT; sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_max_links = EXOFS_LINK_MAX; atomic_set(&sbi->s_curr_pending, 0); sb->s_bdev = NULL; sb->s_dev = 0; @@ -818,9 +819,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ret = PTR_ERR(root); goto free_sbi; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { - iput(root); EXOFS_ERR("ERROR: get root inode failed\n"); ret = -ENOMEM; goto free_sbi; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index d37df352d324..0f4f5c929257 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -645,7 +645,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) unlock_page(page); goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, 0, chunk_size); de = (struct ext2_dir_entry_2 *)kaddr; de->name_len = 1; @@ -660,7 +660,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) de->inode = cpu_to_le32(parent->i_ino); memcpy (de->name, "..\0", 4); ext2_set_de_type (de, inode); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = ext2_commit_chunk(page, 0, chunk_size); fail: page_cache_release(page); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 080419814bae..dffb86536285 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -195,9 +195,6 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct inode *inode = old_dentry->d_inode; int err; - if (inode->i_nlink >= EXT2_LINK_MAX) - return -EMLINK; - dquot_initialize(dir); inode->i_ctime = CURRENT_TIME_SEC; @@ -217,10 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) { struct inode * inode; - int err = -EMLINK; - - if (dir->i_nlink >= EXT2_LINK_MAX) - goto out; + int err; dquot_initialize(dir); @@ -346,11 +340,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= EXT2_LINK_MAX) - goto out_dir; - } err = ext2_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 0090595beb28..e1025c7a437a 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -919,6 +919,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); + sb->s_max_links = EXT2_LINK_MAX; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; @@ -1087,9 +1088,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount3; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { - iput(root); ext2_msg(sb, KERN_ERR, "error: get root inode failed"); ret = -ENOMEM; goto failed_mount3; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 726c7ef6cdf1..e0b45b93327b 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2046,10 +2046,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); goto failed_mount3; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); - iput(root); ret = -ENOMEM; goto failed_mount3; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 502c61fd7392..933900909ed0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3735,9 +3735,8 @@ no_journal: iput(root); goto failed_mount4; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { - iput(root); ext4_msg(sb, KERN_ERR, "get root dentry failed"); ret = -ENOMEM; goto failed_mount4; @@ -5056,6 +5055,9 @@ static int __init ext4_init_fs(void) { int i, err; + ext4_li_info = NULL; + mutex_init(&ext4_li_mtx); + ext4_check_flag_values(); for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { @@ -5094,8 +5096,6 @@ static int __init ext4_init_fs(void) if (err) goto out; - ext4_li_info = NULL; - mutex_init(&ext4_li_mtx); return 0; out: unregister_as_ext2(); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 3ab841054d53..21687e31acc0 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1496,11 +1496,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, root_inode->i_ino = MSDOS_ROOT_INO; root_inode->i_version = 1; error = fat_read_root(root_inode); - if (error < 0) + if (error < 0) { + iput(root_inode); goto out_fail; + } error = -ENOMEM; insert_inode_hash(root_inode); - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (!sb->s_root) { fat_msg(sb, KERN_ERR, "get root inode failed"); goto out_fail; @@ -1516,8 +1518,6 @@ out_invalid: out_fail: if (fat_inode) iput(fat_inode); - if (root_inode) - iput(root_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); if (sbi->options.iocharset != fat_default_iocharset) diff --git a/fs/file_table.c b/fs/file_table.c index 20002e39754d..70f2a0fd6aec 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -204,7 +204,7 @@ EXPORT_SYMBOL(alloc_file); * to write to @file, along with access to write through * its vfsmount. */ -void drop_file_write_access(struct file *file) +static void drop_file_write_access(struct file *file) { struct vfsmount *mnt = file->f_path.mnt; struct dentry *dentry = file->f_path.dentry; @@ -219,7 +219,6 @@ void drop_file_write_access(struct file *file) mnt_drop_write(mnt); file_release_write(file); } -EXPORT_SYMBOL_GPL(drop_file_write_access); /* the real guts of fput() - releasing the last reference to file */ diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 9d1c99558389..d4fabd26084e 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -224,9 +224,8 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) ret = PTR_ERR(root); goto out; } - sbp->s_root = d_alloc_root(root); + sbp->s_root = d_make_root(root); if (!sbp->s_root) { - iput(root); printk(KERN_WARNING "vxfs: unable to get root dentry.\n"); goto out_free_ilist; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f855916657ba..77b535ac7136 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,14 +53,6 @@ struct wb_writeback_work { }; /* - * Include the creation of the trace points after defining the - * wb_writeback_work structure so that the definition remains local to this - * file. - */ -#define CREATE_TRACE_POINTS -#include <trace/events/writeback.h> - -/* * We don't actually have pdflush, but this one is exported though /proc... */ int nr_pdflush_threads; @@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure and inline functions so that the definition + * remains local to this file. + */ +#define CREATE_TRACE_POINTS +#include <trace/events/writeback.h> + /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { @@ -1284,7 +1284,7 @@ int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason) EXPORT_SYMBOL(writeback_inodes_sb_if_idle); /** - * writeback_inodes_sb_if_idle - start writeback if none underway + * writeback_inodes_sb_nr_if_idle - start writeback if none underway * @sb: the superblock * @nr: the number of pages to write * @reason: reason why some writeback work was initiated diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 78b519c13536..6324c4274959 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -26,11 +26,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; + path_get_longterm(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) @@ -45,11 +45,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; + path_get_longterm(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); @@ -57,6 +57,14 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) path_put_longterm(&old_pwd); } +static inline int replace_path(struct path *p, const struct path *old, const struct path *new) +{ + if (likely(p->dentry != old->dentry || p->mnt != old->mnt)) + return 0; + *p = *new; + return 1; +} + void chroot_fs_refs(struct path *old_root, struct path *new_root) { struct task_struct *g, *p; @@ -68,21 +76,16 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { + int hits = 0; spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); - if (fs->root.dentry == old_root->dentry - && fs->root.mnt == old_root->mnt) { - path_get_longterm(new_root); - fs->root = *new_root; + hits += replace_path(&fs->root, old_root, new_root); + hits += replace_path(&fs->pwd, old_root, new_root); + write_seqcount_end(&fs->seq); + while (hits--) { count++; - } - if (fs->pwd.dentry == old_root->dentry - && fs->pwd.mnt == old_root->mnt) { path_get_longterm(new_root); - fs->pwd = *new_root; - count++; } - write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); } task_unlock(p); @@ -107,10 +110,8 @@ void exit_fs(struct task_struct *tsk) int kill; task_lock(tsk); spin_lock(&fs->lock); - write_seqcount_begin(&fs->seq); tsk->fs = NULL; kill = !--fs->users; - write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); task_unlock(tsk); if (kill) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5f3368ab0fa9..7df2b5e8fbe1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -838,10 +838,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, } } if (page) { - void *mapaddr = kmap_atomic(page, KM_USER0); + void *mapaddr = kmap_atomic(page); void *buf = mapaddr + offset; offset += fuse_copy_do(cs, &buf, &count); - kunmap_atomic(mapaddr, KM_USER0); + kunmap_atomic(mapaddr); } else offset += fuse_copy_do(cs, NULL, &count); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4a199fd93fbd..a841868bf9ce 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1887,11 +1887,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) goto out; - vaddr = kmap_atomic(pages[0], KM_USER0); + vaddr = kmap_atomic(pages[0]); err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); - kunmap_atomic(vaddr, KM_USER0); + kunmap_atomic(vaddr); if (err) goto out; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 64cf8d07393e..4aec5995867e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -988,14 +988,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; root = fuse_get_root_inode(sb, d.rootmode); - if (!root) + root_dentry = d_make_root(root); + if (!root_dentry) goto err_put_conn; - - root_dentry = d_alloc_root(root); - if (!root_dentry) { - iput(root); - goto err_put_conn; - } /* only now - we want root dentry with NULL ->d_op */ sb->s_d_op = &fuse_dentry_operations; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 501e5cba09b3..38b7a74a0f91 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -434,12 +434,12 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) if (error) return error; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) dsize = (dibh->b_size - sizeof(struct gfs2_dinode)); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); flush_dcache_page(page); brelse(dibh); SetPageUptodate(page); @@ -542,9 +542,9 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, page = read_cache_page(mapping, index, __gfs2_readpage, NULL); if (IS_ERR(page)) return PTR_ERR(page); - p = kmap_atomic(page, KM_USER0); + p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); - kunmap_atomic(p, KM_USER0); + kunmap_atomic(p); mark_page_accessed(page); page_cache_release(page); copied += amt; @@ -788,11 +788,11 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(buf + pos, kaddr + pos, copied); memset(kaddr + pos + copied, 0, len - copied); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (!PageUptodate(page)) SetPageUptodate(page); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 14a704015970..197c5c47e577 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -60,7 +60,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, int release = 0; if (!page || page->index) { - page = grab_cache_page(inode->i_mapping, 0); + page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); if (!page) return -ENOMEM; release = 1; @@ -930,7 +930,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) struct page *page; int err; - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) return 0; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index c5fb3597f696..76834587a8a4 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -313,6 +313,8 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return gfs2_get_flags(filp, (u32 __user *)arg); case FS_IOC_SETFLAGS: return gfs2_set_flags(filp, (u32 __user *)arg); + case FITRIM: + return gfs2_fitrim(filp, (void __user *)arg); } return -ENOTTY; } @@ -674,6 +676,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; + loff_t size = len; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; @@ -707,8 +710,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, goto out; } } - if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) - i_size_write(inode, offset + len); + if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) + i_size_write(inode, offset + size); mark_inode_dirty(inode); @@ -777,12 +780,14 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (unlikely(error)) goto out_uninit; - if (!gfs2_write_alloc_required(ip, offset, len)) - goto out_unlock; - while (len > 0) { if (len < bytes) bytes = len; + if (!gfs2_write_alloc_required(ip, offset, bytes)) { + len -= bytes; + offset += bytes; + continue; + } qa = gfs2_qadata_get(ip); if (!qa) { error = -ENOMEM; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 376816fcd040..dab2526071cc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -29,6 +29,7 @@ #include <linux/rcupdate.h> #include <linux/rculist_bl.h> #include <linux/bit_spinlock.h> +#include <linux/percpu.h> #include "gfs2.h" #include "incore.h" @@ -167,14 +168,19 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } -static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { - spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); clear_bit(GLF_LRU, &gl->gl_flags); } +} + +static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); spin_unlock(&lru_lock); } @@ -217,11 +223,12 @@ void gfs2_glock_put(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); - if (atomic_dec_and_test(&gl->gl_ref)) { + if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); spin_lock_bucket(gl->gl_hash); hlist_bl_del_rcu(&gl->gl_list); spin_unlock_bucket(gl->gl_hash); - gfs2_glock_remove_from_lru(gl); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); @@ -537,6 +544,11 @@ __acquires(&gl->gl_spin) do_error(gl, 0); /* Fail queued try locks */ } gl->gl_req = target; + set_bit(GLF_BLOCKING, &gl->gl_flags); + if ((gl->gl_req == LM_ST_UNLOCKED) || + (gl->gl_state == LM_ST_EXCLUSIVE) || + (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) + clear_bit(GLF_BLOCKING, &gl->gl_flags); spin_unlock(&gl->gl_spin); if (glops->go_xmote_th) glops->go_xmote_th(gl); @@ -738,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, return -ENOMEM; atomic_inc(&sdp->sd_glock_disposal); + gl->gl_sbd = sdp; gl->gl_flags = 0; gl->gl_name = name; atomic_set(&gl->gl_ref, 1); @@ -746,12 +759,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_hash = hash; gl->gl_ops = glops; - snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number); + gl->gl_dstamp = ktime_set(0, 0); + preempt_disable(); + /* We use the global stats to estimate the initial per-glock stats */ + gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; + preempt_enable(); + gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; + gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); gl->gl_lksb.sb_lvbptr = gl->gl_lvb; gl->gl_tchange = jiffies; gl->gl_object = NULL; - gl->gl_sbd = sdp; gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); @@ -993,6 +1011,8 @@ fail: } set_bit(GLF_QUEUED, &gl->gl_flags); trace_gfs2_glock_queue(gh, 1); + gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); + gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); if (likely(insert_pt == NULL)) { list_add_tail(&gh->gh_list, &gl->gl_holders); if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) @@ -1652,6 +1672,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'L'; if (gl->gl_object) *p++ = 'o'; + if (test_bit(GLF_BLOCKING, gflags)) + *p++ = 'b'; *p = 0; return buf; } @@ -1708,8 +1730,78 @@ out: return error; } +static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) +{ + struct gfs2_glock *gl = iter_ptr; + + seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + (long long)gl->gl_stats.stats[GFS2_LKS_SRTT], + (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], + (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], + (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], + (long long)gl->gl_stats.stats[GFS2_LKS_SIRT], + (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], + (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], + (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); + return 0; +} + +static const char *gfs2_gltype[] = { + "type", + "reserved", + "nondisk", + "inode", + "rgrp", + "meta", + "iopen", + "flock", + "plock", + "quota", + "journal", +}; + +static const char *gfs2_stype[] = { + [GFS2_LKS_SRTT] = "srtt", + [GFS2_LKS_SRTTVAR] = "srttvar", + [GFS2_LKS_SRTTB] = "srttb", + [GFS2_LKS_SRTTVARB] = "srttvarb", + [GFS2_LKS_SIRT] = "sirt", + [GFS2_LKS_SIRTVAR] = "sirtvar", + [GFS2_LKS_DCOUNT] = "dlm", + [GFS2_LKS_QCOUNT] = "queue", +}; + +#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) + +static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) +{ + struct gfs2_glock_iter *gi = seq->private; + struct gfs2_sbd *sdp = gi->sdp; + unsigned index = gi->hash >> 3; + unsigned subindex = gi->hash & 0x07; + s64 value; + int i; + + if (index == 0 && subindex != 0) + return 0; + seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], + (index == 0) ? "cpu": gfs2_stype[subindex]); + for_each_possible_cpu(i) { + const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); + if (index == 0) { + value = i; + } else { + value = lkstats->lkstats[index - 1].stats[subindex]; + } + seq_printf(seq, " %15lld", (long long)value); + } + seq_putc(seq, '\n'); + return 0; +} int __init gfs2_glock_init(void) { @@ -1822,6 +1914,35 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) return dump_glock(seq, iter_ptr); } +static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct gfs2_glock_iter *gi = seq->private; + + gi->hash = *pos; + if (*pos >= GFS2_NR_SBSTATS) + return NULL; + preempt_disable(); + return SEQ_START_TOKEN; +} + +static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, + loff_t *pos) +{ + struct gfs2_glock_iter *gi = seq->private; + (*pos)++; + gi->hash++; + if (gi->hash >= GFS2_NR_SBSTATS) { + preempt_enable(); + return NULL; + } + return SEQ_START_TOKEN; +} + +static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) +{ + preempt_enable(); +} + static const struct seq_operations gfs2_glock_seq_ops = { .start = gfs2_glock_seq_start, .next = gfs2_glock_seq_next, @@ -1829,7 +1950,21 @@ static const struct seq_operations gfs2_glock_seq_ops = { .show = gfs2_glock_seq_show, }; -static int gfs2_debugfs_open(struct inode *inode, struct file *file) +static const struct seq_operations gfs2_glstats_seq_ops = { + .start = gfs2_glock_seq_start, + .next = gfs2_glock_seq_next, + .stop = gfs2_glock_seq_stop, + .show = gfs2_glstats_seq_show, +}; + +static const struct seq_operations gfs2_sbstats_seq_ops = { + .start = gfs2_sbstats_seq_start, + .next = gfs2_sbstats_seq_next, + .stop = gfs2_sbstats_seq_stop, + .show = gfs2_sbstats_seq_show, +}; + +static int gfs2_glocks_open(struct inode *inode, struct file *file) { int ret = seq_open_private(file, &gfs2_glock_seq_ops, sizeof(struct gfs2_glock_iter)); @@ -1841,9 +1976,49 @@ static int gfs2_debugfs_open(struct inode *inode, struct file *file) return ret; } -static const struct file_operations gfs2_debug_fops = { +static int gfs2_glstats_open(struct inode *inode, struct file *file) +{ + int ret = seq_open_private(file, &gfs2_glstats_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + } + return ret; +} + +static int gfs2_sbstats_open(struct inode *inode, struct file *file) +{ + int ret = seq_open_private(file, &gfs2_sbstats_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + } + return ret; +} + +static const struct file_operations gfs2_glocks_fops = { + .owner = THIS_MODULE, + .open = gfs2_glocks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static const struct file_operations gfs2_glstats_fops = { .owner = THIS_MODULE, - .open = gfs2_debugfs_open, + .open = gfs2_glstats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static const struct file_operations gfs2_sbstats_fops = { + .owner = THIS_MODULE, + .open = gfs2_sbstats_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, @@ -1857,20 +2032,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, - &gfs2_debug_fops); + &gfs2_glocks_fops); if (!sdp->debugfs_dentry_glocks) - return -ENOMEM; + goto fail; + + sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_glstats_fops); + if (!sdp->debugfs_dentry_glstats) + goto fail; + + sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_sbstats_fops); + if (!sdp->debugfs_dentry_sbstats) + goto fail; return 0; +fail: + gfs2_delete_debugfs_file(sdp); + return -ENOMEM; } void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) { - if (sdp && sdp->debugfs_dir) { + if (sdp->debugfs_dir) { if (sdp->debugfs_dentry_glocks) { debugfs_remove(sdp->debugfs_dentry_glocks); sdp->debugfs_dentry_glocks = NULL; } + if (sdp->debugfs_dentry_glstats) { + debugfs_remove(sdp->debugfs_dentry_glstats); + sdp->debugfs_dentry_glstats = NULL; + } + if (sdp->debugfs_dentry_sbstats) { + debugfs_remove(sdp->debugfs_dentry_sbstats); + sdp->debugfs_dentry_sbstats = NULL; + } debugfs_remove(sdp->debugfs_dir); sdp->debugfs_dir = NULL; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 97742a7ea9cc..47d0bda5ac2b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -19,6 +19,8 @@ #include <linux/rculist_bl.h> #include <linux/completion.h> #include <linux/rbtree.h> +#include <linux/ktime.h> +#include <linux/percpu.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -205,6 +207,22 @@ struct gfs2_glock_operations { }; enum { + GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */ + GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */ + GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */ + GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */ + GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */ + GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */ + GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */ + GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */ + GFS2_NR_LKSTATS +}; + +struct gfs2_lkstats { + s64 stats[GFS2_NR_LKSTATS]; +}; + +enum { /* States */ HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_FIRST = 7, @@ -238,10 +256,12 @@ enum { GLF_QUEUED = 12, GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ + GLF_BLOCKING = 15, }; struct gfs2_glock { struct hlist_bl_node gl_list; + struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; atomic_t gl_ref; @@ -261,16 +281,14 @@ struct gfs2_glock { struct list_head gl_holders; const struct gfs2_glock_operations *gl_ops; - char gl_strname[GDLM_STRNAME_BYTES]; + ktime_t gl_dstamp; + struct gfs2_lkstats gl_stats; struct dlm_lksb gl_lksb; char gl_lvb[32]; unsigned long gl_tchange; void *gl_object; struct list_head gl_lru; - - struct gfs2_sbd *gl_sbd; - struct list_head gl_ail_list; atomic_t gl_ail_count; atomic_t gl_revokes; @@ -560,8 +578,14 @@ struct lm_lockstruct { uint32_t *ls_recover_result; /* result of last jid recovery */ }; +struct gfs2_pcpu_lkstats { + /* One struct for each glock type */ + struct gfs2_lkstats lkstats[10]; +}; + struct gfs2_sbd { struct super_block *sd_vfs; + struct gfs2_pcpu_lkstats __percpu *sd_lkstats; struct kobject sd_kobj; unsigned long sd_flags; /* SDF_... */ struct gfs2_sb_host sd_sb; @@ -620,7 +644,6 @@ struct gfs2_sbd { int sd_rindex_uptodate; spinlock_t sd_rindex_spin; - struct mutex sd_rindex_mutex; struct rb_root sd_rindex_tree; unsigned int sd_rgrps; unsigned int sd_max_rg_data; @@ -725,8 +748,23 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ - struct dentry *debugfs_dentry_glocks; /* for debugfs */ + struct dentry *debugfs_dentry_glocks; + struct dentry *debugfs_dentry_glstats; + struct dentry *debugfs_dentry_sbstats; }; +static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) +{ + gl->gl_stats.stats[which]++; +} + +static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) +{ + const struct gfs2_sbd *sdp = gl->gl_sbd; + preempt_disable(); + this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++; + preempt_enable(); +} + #endif /* __INCORE_DOT_H__ */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a7d611b93f0f..c98a60ee6dfd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -391,10 +391,6 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) int error; int dblocks = 1; - error = gfs2_rindex_update(sdp); - if (error) - fs_warn(sdp, "rindex update returns %d\n", error); - error = gfs2_inplace_reserve(dip, RES_DINODE); if (error) goto out; @@ -1040,9 +1036,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); if (!rgd) goto out_inodes; + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); @@ -1258,7 +1255,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, * this is the case of the target file already existing * so we unlink before doing the rename */ - nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); + nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1); if (nrgd) gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 8944d1e32ab5..f8411bd1b805 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -18,14 +18,106 @@ #include "glock.h" #include "util.h" #include "sys.h" +#include "trace_gfs2.h" extern struct workqueue_struct *gfs2_control_wq; +/** + * gfs2_update_stats - Update time based stats + * @mv: Pointer to mean/variance structure to update + * @sample: New data to include + * + * @delta is the difference between the current rtt sample and the + * running average srtt. We add 1/8 of that to the srtt in order to + * update the current srtt estimate. The varience estimate is a bit + * more complicated. We subtract the abs value of the @delta from + * the current variance estimate and add 1/4 of that to the running + * total. + * + * Note that the index points at the array entry containing the smoothed + * mean value, and the variance is always in the following entry + * + * Reference: TCP/IP Illustrated, vol 2, p. 831,832 + * All times are in units of integer nanoseconds. Unlike the TCP/IP case, + * they are not scaled fixed point. + */ + +static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, + s64 sample) +{ + s64 delta = sample - s->stats[index]; + s->stats[index] += (delta >> 3); + index++; + s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2); +} + +/** + * gfs2_update_reply_times - Update locking statistics + * @gl: The glock to update + * + * This assumes that gl->gl_dstamp has been set earlier. + * + * The rtt (lock round trip time) is an estimate of the time + * taken to perform a dlm lock request. We update it on each + * reply from the dlm. + * + * The blocking flag is set on the glock for all dlm requests + * which may potentially block due to lock requests from other nodes. + * DLM requests where the current lock state is exclusive, the + * requested state is null (or unlocked) or where the TRY or + * TRY_1CB flags are set are classified as non-blocking. All + * other DLM requests are counted as (potentially) blocking. + */ +static inline void gfs2_update_reply_times(struct gfs2_glock *gl) +{ + struct gfs2_pcpu_lkstats *lks; + const unsigned gltype = gl->gl_name.ln_type; + unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ? + GFS2_LKS_SRTTB : GFS2_LKS_SRTT; + s64 rtt; + + preempt_disable(); + rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp)); + lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */ + gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */ + preempt_enable(); + + trace_gfs2_glock_lock_time(gl, rtt); +} + +/** + * gfs2_update_request_times - Update locking statistics + * @gl: The glock to update + * + * The irt (lock inter-request times) measures the average time + * between requests to the dlm. It is updated immediately before + * each dlm call. + */ + +static inline void gfs2_update_request_times(struct gfs2_glock *gl) +{ + struct gfs2_pcpu_lkstats *lks; + const unsigned gltype = gl->gl_name.ln_type; + ktime_t dstamp; + s64 irt; + + preempt_disable(); + dstamp = gl->gl_dstamp; + gl->gl_dstamp = ktime_get_real(); + irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp)); + lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */ + gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */ + preempt_enable(); +} + static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) @@ -111,7 +203,7 @@ static int make_mode(const unsigned int lmstate) static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, const int req) { - u32 lkf = 0; + u32 lkf = DLM_LKF_VALBLK; if (gfs_flags & LM_FLAG_TRY) lkf |= DLM_LKF_NOQUEUE; @@ -138,26 +230,43 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, if (lkid != 0) lkf |= DLM_LKF_CONVERT; - lkf |= DLM_LKF_VALBLK; - return lkf; } +static void gfs2_reverse_hex(char *c, u64 value) +{ + while (value) { + *c-- = hex_asc[value & 0x0f]; + value >>= 4; + } +} + static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; int req; u32 lkf; + char strname[GDLM_STRNAME_BYTES] = ""; req = make_mode(req_state); lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); - + gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); + gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); + if (gl->gl_lksb.sb_lkid) { + gfs2_update_request_times(gl); + } else { + memset(strname, ' ', GDLM_STRNAME_BYTES - 1); + strname[GDLM_STRNAME_BYTES - 1] = '\0'; + gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type); + gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number); + gl->gl_dstamp = ktime_get_real(); + } /* * Submit the actual lock request. */ - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, + return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); } @@ -172,6 +281,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl) return; } + clear_bit(GLF_BLOCKING, &gl->gl_flags); + gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); + gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); + gfs2_update_request_times(gl); error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); if (error) { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 756fae9eaf8f..4752eadc7f6e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -19,6 +19,7 @@ #include <linux/freezer.h> #include <linux/bio.h> #include <linux/writeback.h> +#include <linux/list_sort.h> #include "gfs2.h" #include "incore.h" @@ -358,7 +359,7 @@ retry: return 0; } -static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) +u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) { struct gfs2_journal_extent *je; @@ -467,8 +468,8 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) void gfs2_log_incr_head(struct gfs2_sbd *sdp) { - if (sdp->sd_log_flush_head == sdp->sd_log_tail) - BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); + BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) && + (sdp->sd_log_flush_head != sdp->sd_log_head)); if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { sdp->sd_log_flush_head = 0; @@ -476,99 +477,6 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp) } } -/** - * gfs2_log_write_endio - End of I/O for a log buffer - * @bh: The buffer head - * @uptodate: I/O Status - * - */ - -static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) -{ - struct gfs2_sbd *sdp = bh->b_private; - bh->b_private = NULL; - - end_buffer_write_sync(bh, uptodate); - if (atomic_dec_and_test(&sdp->sd_log_in_flight)) - wake_up(&sdp->sd_log_flush_wait); -} - -/** - * gfs2_log_get_buf - Get and initialize a buffer to use for log control data - * @sdp: The GFS2 superblock - * - * Returns: the buffer_head - */ - -struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) -{ - u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); - struct buffer_head *bh; - - bh = sb_getblk(sdp->sd_vfs, blkno); - lock_buffer(bh); - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - gfs2_log_incr_head(sdp); - atomic_inc(&sdp->sd_log_in_flight); - bh->b_private = sdp; - bh->b_end_io = gfs2_log_write_endio; - - return bh; -} - -/** - * gfs2_fake_write_endio - - * @bh: The buffer head - * @uptodate: The I/O Status - * - */ - -static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) -{ - struct buffer_head *real_bh = bh->b_private; - struct gfs2_bufdata *bd = real_bh->b_private; - struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; - - end_buffer_write_sync(bh, uptodate); - free_buffer_head(bh); - unlock_buffer(real_bh); - brelse(real_bh); - if (atomic_dec_and_test(&sdp->sd_log_in_flight)) - wake_up(&sdp->sd_log_flush_wait); -} - -/** - * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log - * @sdp: the filesystem - * @data: the data the buffer_head should point to - * - * Returns: the log buffer descriptor - */ - -struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, - struct buffer_head *real) -{ - u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); - struct buffer_head *bh; - - bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); - atomic_set(&bh->b_count, 1); - bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); - set_bh_page(bh, real->b_page, bh_offset(real)); - bh->b_blocknr = blkno; - bh->b_size = sdp->sd_sb.sb_bsize; - bh->b_bdev = sdp->sd_vfs->s_bdev; - bh->b_private = real; - bh->b_end_io = gfs2_fake_write_endio; - - gfs2_log_incr_head(sdp); - atomic_inc(&sdp->sd_log_in_flight); - - return bh; -} - static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) { unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); @@ -583,66 +491,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) sdp->sd_log_tail = new_tail; } -/** - * log_write_header - Get and initialize a journal header buffer - * @sdp: The GFS2 superblock - * - * Returns: the initialized log buffer descriptor - */ -static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) -{ - u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); - struct buffer_head *bh; - struct gfs2_log_header *lh; - unsigned int tail; - u32 hash; - - bh = sb_getblk(sdp->sd_vfs, blkno); - lock_buffer(bh); - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - - gfs2_ail1_empty(sdp); - tail = current_tail(sdp); - - lh = (struct gfs2_log_header *)bh->b_data; - memset(lh, 0, sizeof(struct gfs2_log_header)); - lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); - lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); - lh->lh_header.__pad0 = cpu_to_be64(0); - lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); - lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); - lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); - lh->lh_flags = cpu_to_be32(flags); - lh->lh_tail = cpu_to_be32(tail); - lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); - hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); - lh->lh_hash = cpu_to_be32(hash); - - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) - submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); - else - submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); - wait_on_buffer(bh); - - if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); - brelse(bh); - - if (sdp->sd_log_tail != tail) - log_pull_tail(sdp, tail); - else - gfs2_assert_withdraw(sdp, !pull); - - sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); - gfs2_log_incr_head(sdp); -} - -static void log_flush_commit(struct gfs2_sbd *sdp) +static void log_flush_wait(struct gfs2_sbd *sdp) { DEFINE_WAIT(wait); @@ -655,8 +505,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp) } while(atomic_read(&sdp->sd_log_in_flight)); finish_wait(&sdp->sd_log_flush_wait, &wait); } +} + +static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct gfs2_bufdata *bda, *bdb; - log_write_header(sdp, 0, 0); + bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list); + bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list); + + if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) + return -1; + if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) + return 1; + return 0; } static void gfs2_ordered_write(struct gfs2_sbd *sdp) @@ -666,6 +528,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) LIST_HEAD(written); gfs2_log_lock(sdp); + list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); while (!list_empty(&sdp->sd_log_le_ordered)) { bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); list_move(&bd->bd_le.le_list, &written); @@ -711,6 +574,68 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) } /** + * log_write_header - Get and initialize a journal header buffer + * @sdp: The GFS2 superblock + * + * Returns: the initialized log buffer descriptor + */ + +static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) +{ + u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); + struct buffer_head *bh; + struct gfs2_log_header *lh; + unsigned int tail; + u32 hash; + + bh = sb_getblk(sdp->sd_vfs, blkno); + lock_buffer(bh); + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + + gfs2_ail1_empty(sdp); + tail = current_tail(sdp); + + lh = (struct gfs2_log_header *)bh->b_data; + memset(lh, 0, sizeof(struct gfs2_log_header)); + lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); + lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); + lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); + lh->lh_flags = cpu_to_be32(flags); + lh->lh_tail = cpu_to_be32(tail); + lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); + hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); + lh->lh_hash = cpu_to_be32(hash); + + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { + gfs2_ordered_wait(sdp); + log_flush_wait(sdp); + submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); + } else { + submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); + } + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + brelse(bh); + + if (sdp->sd_log_tail != tail) + log_pull_tail(sdp, tail); + else + gfs2_assert_withdraw(sdp, !pull); + + sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); + gfs2_log_incr_head(sdp); +} + +/** * gfs2_log_flush - flush incore transaction(s) * @sdp: the filesystem * @gl: The glock structure to flush. If NULL, flush the whole incore log @@ -753,11 +678,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) gfs2_ordered_write(sdp); lops_before_commit(sdp); - gfs2_ordered_wait(sdp); - if (sdp->sd_log_head != sdp->sd_log_flush_head) - log_flush_commit(sdp); - else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ + if (sdp->sd_log_head != sdp->sd_log_flush_head) { + log_write_header(sdp, 0, 0); + } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ gfs2_log_lock(sdp); atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index ab0621698b73..ff07454b582c 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -53,10 +53,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); - -extern struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); -extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, - struct buffer_head *real); +extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn); extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 0301be655b12..6b1efb594d90 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -12,6 +12,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> +#include <linux/mempool.h> #include <linux/gfs2_ondisk.h> #include <linux/bio.h> #include <linux/fs.h> @@ -76,7 +77,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) if (bi->bi_clone == 0) return; if (sdp->sd_args.ar_discard) - gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); + gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); memcpy(bi->bi_clone + bi->bi_offset, bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); clear_bit(GBF_FULL, &bi->bi_flags); @@ -143,6 +144,98 @@ static inline __be64 *bh_ptr_end(struct buffer_head *bh) return (__force __be64 *)(bh->b_data + bh->b_size); } +/** + * gfs2_log_write_endio - End of I/O for a log buffer + * @bh: The buffer head + * @uptodate: I/O Status + * + */ + +static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) +{ + struct gfs2_sbd *sdp = bh->b_private; + bh->b_private = NULL; + + end_buffer_write_sync(bh, uptodate); + if (atomic_dec_and_test(&sdp->sd_log_in_flight)) + wake_up(&sdp->sd_log_flush_wait); +} + +/** + * gfs2_log_get_buf - Get and initialize a buffer to use for log control data + * @sdp: The GFS2 superblock + * + * tReturns: the buffer_head + */ + +static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) +{ + u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); + struct buffer_head *bh; + + bh = sb_getblk(sdp->sd_vfs, blkno); + lock_buffer(bh); + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + gfs2_log_incr_head(sdp); + atomic_inc(&sdp->sd_log_in_flight); + bh->b_private = sdp; + bh->b_end_io = gfs2_log_write_endio; + + return bh; +} + +/** + * gfs2_fake_write_endio - + * @bh: The buffer head + * @uptodate: The I/O Status + * + */ + +static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) +{ + struct buffer_head *real_bh = bh->b_private; + struct gfs2_bufdata *bd = real_bh->b_private; + struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; + + end_buffer_write_sync(bh, uptodate); + mempool_free(bh, gfs2_bh_pool); + unlock_buffer(real_bh); + brelse(real_bh); + if (atomic_dec_and_test(&sdp->sd_log_in_flight)) + wake_up(&sdp->sd_log_flush_wait); +} + +/** + * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log + * @sdp: the filesystem + * @data: the data the buffer_head should point to + * + * Returns: the log buffer descriptor + */ + +static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, + struct buffer_head *real) +{ + u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); + struct buffer_head *bh; + + bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS); + atomic_set(&bh->b_count, 1); + bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); + set_bh_page(bh, real->b_page, bh_offset(real)); + bh->b_blocknr = blkno; + bh->b_size = sdp->sd_sb.sb_bsize; + bh->b_bdev = sdp->sd_vfs->s_bdev; + bh->b_private = real; + bh->b_end_io = gfs2_fake_write_endio; + + gfs2_log_incr_head(sdp); + atomic_inc(&sdp->sd_log_in_flight); + + return bh; +} static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) { @@ -553,11 +646,11 @@ static void gfs2_check_magic(struct buffer_head *bh) __be32 *ptr; clear_buffer_escaped(bh); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); ptr = kaddr + bh_offset(bh); if (*ptr == cpu_to_be32(GFS2_MAGIC)) set_buffer_escaped(bh); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -594,10 +687,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, if (buffer_escaped(bd->bd_bh)) { void *kaddr; bh1 = gfs2_log_get_buf(sdp); - kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); + kaddr = kmap_atomic(bd->bd_bh->b_page); memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), bh1->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); *(__be32 *)bh1->b_data = 0; clear_buffer_escaped(bd->bd_bh); unlock_buffer(bd->bd_bh); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a8d9bcd0e19c..754426b1e52c 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -17,6 +17,7 @@ #include <linux/rcupdate.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> +#include <linux/mempool.h> #include "gfs2.h" #include "incore.h" @@ -69,6 +70,16 @@ static void gfs2_init_gl_aspace_once(void *foo) address_space_init_once(mapping); } +static void *gfs2_bh_alloc(gfp_t mask, void *data) +{ + return alloc_buffer_head(mask); +} + +static void gfs2_bh_free(void *ptr, void *data) +{ + return free_buffer_head(ptr); +} + /** * init_gfs2_fs - Register GFS2 as a filesystem * @@ -151,6 +162,10 @@ static int __init init_gfs2_fs(void) gfs2_control_wq = alloc_workqueue("gfs2_control", WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); if (!gfs2_control_wq) + goto fail_recovery; + + gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL); + if (!gfs2_bh_pool) goto fail_control; gfs2_register_debugfs(); @@ -160,6 +175,8 @@ static int __init init_gfs2_fs(void) return 0; fail_control: + destroy_workqueue(gfs2_control_wq); +fail_recovery: destroy_workqueue(gfs_recovery_wq); fail_wq: unregister_filesystem(&gfs2meta_fs_type); @@ -208,6 +225,7 @@ static void __exit exit_gfs2_fs(void) rcu_barrier(); + mempool_destroy(gfs2_bh_pool); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 6aacf3f230a2..6f3a18f9e176 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -68,6 +68,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) sb->s_fs_info = sdp; sdp->sd_vfs = sb; + sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats); + if (!sdp->sd_lkstats) { + kfree(sdp); + return NULL; + } + set_bit(SDF_NOJOURNALID, &sdp->sd_flags); gfs2_tune_init(&sdp->sd_tune); @@ -77,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); - mutex_init(&sdp->sd_rindex_mutex); sdp->sd_rindex_tree.rb_node = NULL; INIT_LIST_HEAD(&sdp->sd_jindex_list); @@ -431,10 +436,9 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); return PTR_ERR(inode); } - dentry = d_alloc_root(inode); + dentry = d_make_root(inode); if (!dentry) { fs_err(sdp, "can't alloc %s dentry\n", name); - iput(inode); return -ENOMEM; } *dptr = dentry; @@ -800,6 +804,11 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + + error = gfs2_rindex_update(sdp); + if (error) + goto fail_qinode; + return 0; fail_qinode: @@ -1216,6 +1225,7 @@ fail_sys: gfs2_sys_fs_del(sdp); fail: gfs2_delete_debugfs_file(sdp); + free_percpu(sdp->sd_lkstats); kfree(sdp); sb->s_fs_info = NULL; return error; @@ -1388,6 +1398,7 @@ static void gfs2_kill_sb(struct super_block *sb) shrink_dcache_sb(sb); kill_block_super(sb); gfs2_delete_debugfs_file(sdp); + free_percpu(sdp->sd_lkstats); kfree(sdp); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a45b21b03915..6019da3dcaed 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -681,7 +681,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, ptr = qp; nbytes = sizeof(struct gfs2_quota); get_a_page: - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) return -ENOMEM; @@ -720,12 +720,12 @@ get_a_page: gfs2_trans_add_bh(ip->i_gl, bh, 0); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) nbytes = PAGE_CACHE_SIZE - offset; memcpy(kaddr + offset, ptr, nbytes); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); unlock_page(page); page_cache_release(page); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 981bfa32121a..19bde40b4864 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -327,23 +327,34 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) * Returns: The resource group, or NULL if not found */ -struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) +struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) { - struct rb_node **newn; + struct rb_node *n, *next; struct gfs2_rgrpd *cur; + if (gfs2_rindex_update(sdp)) + return NULL; + spin_lock(&sdp->sd_rindex_spin); - newn = &sdp->sd_rindex_tree.rb_node; - while (*newn) { - cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node); + n = sdp->sd_rindex_tree.rb_node; + while (n) { + cur = rb_entry(n, struct gfs2_rgrpd, rd_node); + next = NULL; if (blk < cur->rd_addr) - newn = &((*newn)->rb_left); + next = n->rb_left; else if (blk >= cur->rd_data0 + cur->rd_data) - newn = &((*newn)->rb_right); - else { + next = n->rb_right; + if (next == NULL) { spin_unlock(&sdp->sd_rindex_spin); + if (exact) { + if (blk < cur->rd_addr) + return NULL; + if (blk >= cur->rd_data0 + cur->rd_data) + return NULL; + } return cur; } + n = next; } spin_unlock(&sdp->sd_rindex_spin); @@ -532,7 +543,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) struct file_ra_state ra_state; int error, rgrps; - mutex_lock(&sdp->sd_rindex_mutex); file_ra_state_init(&ra_state, inode->i_mapping); for (rgrps = 0;; rgrps++) { loff_t pos = rgrps * sizeof(struct gfs2_rindex); @@ -545,11 +555,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) break; total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); } - mutex_unlock(&sdp->sd_rindex_mutex); return total_data; } -static void rgd_insert(struct gfs2_rgrpd *rgd) +static int rgd_insert(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; @@ -565,11 +574,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd) else if (rgd->rd_addr > cur->rd_addr) newn = &((*newn)->rb_right); else - return; + return -EEXIST; } rb_link_node(&rgd->rd_node, parent, newn); rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); + sdp->sd_rgrps++; + return 0; } /** @@ -623,10 +634,12 @@ static int read_rindex_entry(struct gfs2_inode *ip, if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); - rgd_insert(rgd); - sdp->sd_rgrps++; + error = rgd_insert(rgd); spin_unlock(&sdp->sd_rindex_spin); - return error; + if (!error) + return 0; + + error = 0; /* someone else read in the rgrp; free it and ignore it */ fail: kfree(rgd->rd_bits); @@ -683,20 +696,22 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp) struct gfs2_glock *gl = ip->i_gl; struct gfs2_holder ri_gh; int error = 0; + int unlock_required = 0; /* Read new copy from disk if we don't have the latest */ if (!sdp->sd_rindex_uptodate) { - mutex_lock(&sdp->sd_rindex_mutex); - error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); - if (error) - return error; + if (!gfs2_glock_is_locked_by_me(gl)) { + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); + if (error) + return error; + unlock_required = 1; + } if (!sdp->sd_rindex_uptodate) error = gfs2_ri_update(ip); - gfs2_glock_dq_uninit(&ri_gh); - mutex_unlock(&sdp->sd_rindex_mutex); + if (unlock_required) + gfs2_glock_dq_uninit(&ri_gh); } - return error; } @@ -805,9 +820,9 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) } -void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, +int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, - const struct gfs2_bitmap *bi) + const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) { struct super_block *sb = sdp->sd_vfs; struct block_device *bdev = sb->s_bdev; @@ -818,11 +833,19 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, sector_t nr_sects = 0; int rv; unsigned int x; + u32 trimmed = 0; + u8 diff; for (x = 0; x < bi->bi_len; x++) { - const u8 *orig = bh->b_data + bi->bi_offset + x; - const u8 *clone = bi->bi_clone + bi->bi_offset + x; - u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); + const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; + clone += bi->bi_offset; + clone += x; + if (bh) { + const u8 *orig = bh->b_data + bi->bi_offset + x; + diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); + } else { + diff = ~(*clone | (*clone >> 1)); + } diff &= 0x55; if (diff == 0) continue; @@ -833,11 +856,14 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, if (nr_sects == 0) goto start_new_extent; if ((start + nr_sects) != blk) { - rv = blkdev_issue_discard(bdev, start, - nr_sects, GFP_NOFS, - 0); - if (rv) - goto fail; + if (nr_sects >= minlen) { + rv = blkdev_issue_discard(bdev, + start, nr_sects, + GFP_NOFS, 0); + if (rv) + goto fail; + trimmed += nr_sects; + } nr_sects = 0; start_new_extent: start = blk; @@ -848,15 +874,104 @@ start_new_extent: blk += sects_per_blk; } } - if (nr_sects) { + if (nr_sects >= minlen) { rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); if (rv) goto fail; + trimmed += nr_sects; } - return; + if (ptrimmed) + *ptrimmed = trimmed; + return 0; + fail: - fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); + if (sdp->sd_args.ar_discard) + fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); sdp->sd_args.ar_discard = 0; + return -EIO; +} + +/** + * gfs2_fitrim - Generate discard requests for unused bits of the filesystem + * @filp: Any file on the filesystem + * @argp: Pointer to the arguments (also used to pass result) + * + * Returns: 0 on success, otherwise error code + */ + +int gfs2_fitrim(struct file *filp, void __user *argp) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); + struct buffer_head *bh; + struct gfs2_rgrpd *rgd; + struct gfs2_rgrpd *rgd_end; + struct gfs2_holder gh; + struct fstrim_range r; + int ret = 0; + u64 amt; + u64 trimmed = 0; + unsigned int x; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + if (argp == NULL) { + r.start = 0; + r.len = ULLONG_MAX; + r.minlen = 0; + } else if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + rgd = gfs2_blk2rgrpd(sdp, r.start, 0); + rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); + + while (1) { + + ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (ret) + goto out; + + if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { + /* Trim each bitmap in the rgrp */ + for (x = 0; x < rgd->rd_length; x++) { + struct gfs2_bitmap *bi = rgd->rd_bits + x; + ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); + if (ret) { + gfs2_glock_dq_uninit(&gh); + goto out; + } + trimmed += amt; + } + + /* Mark rgrp as having been trimmed */ + ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); + if (ret == 0) { + bh = rgd->rd_bits[0].bi_bh; + rgd->rd_flags |= GFS2_RGF_TRIMMED; + gfs2_trans_add_bh(rgd->rd_gl, bh, 1); + gfs2_rgrp_out(rgd, bh->b_data); + gfs2_trans_end(sdp); + } + } + gfs2_glock_dq_uninit(&gh); + + if (rgd == rgd_end) + break; + + rgd = gfs2_rgrpd_get_next(rgd); + } + +out: + r.len = trimmed << 9; + if (argp && copy_to_user(argp, &r, sizeof(r))) + return -EFAULT; + + return ret; } /** @@ -1003,7 +1118,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) rgd = begin = ip->i_rgd; else - rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal); + rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); if (rgd == NULL) return -EBADSLT; @@ -1288,7 +1403,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 length, rgrp_blk, buf_blk; unsigned int buf; - rgd = gfs2_blk2rgrpd(sdp, bstart); + rgd = gfs2_blk2rgrpd(sdp, bstart, 1); if (!rgd) { if (gfs2_consist(sdp)) fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); @@ -1469,7 +1584,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) return; trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; - + rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); @@ -1555,14 +1670,9 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder rgd_gh; - int error; - - error = gfs2_rindex_update(sdp); - if (error) - return error; + int error = -EINVAL; - error = -EINVAL; - rgd = gfs2_blk2rgrpd(sdp, no_addr); + rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); if (!rgd) goto fail; @@ -1605,7 +1715,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) rgd = ip->i_rgd; else - rgd = gfs2_blk2rgrpd(sdp, block); + rgd = gfs2_blk2rgrpd(sdp, block, 1); if (!rgd) { fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); return; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index ceec9106cdf4..b4b10f4de25f 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -11,6 +11,7 @@ #define __RGRP_DOT_H__ #include <linux/slab.h> +#include <linux/uaccess.h> struct gfs2_rgrpd; struct gfs2_sbd; @@ -18,7 +19,7 @@ struct gfs2_holder; extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); -extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); +extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); @@ -62,8 +63,9 @@ extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); -extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, - struct buffer_head *bh, - const struct gfs2_bitmap *bi); +extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, + struct buffer_head *bh, + const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); +extern int gfs2_fitrim(struct file *filp, void __user *argp); #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4553ce515f62..6172fa77ad59 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1417,7 +1417,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (error) goto out; - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); if (!rgd) { gfs2_consist_inode(ip); error = -EIO; @@ -1557,6 +1557,7 @@ out: end_writeback(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; + flush_delayed_work_sync(&ip->i_gl->gl_work); gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 5d07609ec57d..dfa89cd75534 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -11,6 +11,7 @@ #include <linux/dlmconstants.h> #include <linux/gfs2_ondisk.h> #include <linux/writeback.h> +#include <linux/ktime.h> #include "incore.h" #include "glock.h" @@ -43,7 +44,8 @@ {(1UL << GLF_FROZEN), "F" }, \ {(1UL << GLF_QUEUED), "q" }, \ {(1UL << GLF_LRU), "L" }, \ - {(1UL << GLF_OBJECT), "o" }) + {(1UL << GLF_OBJECT), "o" }, \ + {(1UL << GLF_BLOCKING), "b" }) #ifndef NUMPTY #define NUMPTY @@ -236,6 +238,62 @@ TRACE_EVENT(gfs2_glock_queue, glock_trace_name(__entry->state)) ); +/* DLM sends a reply to GFS2 */ +TRACE_EVENT(gfs2_glock_lock_time, + + TP_PROTO(const struct gfs2_glock *gl, s64 tdiff), + + TP_ARGS(gl, tdiff), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, status ) + __field( char, flags ) + __field( s64, tdiff ) + __field( s64, srtt ) + __field( s64, srttvar ) + __field( s64, srttb ) + __field( s64, srttvarb ) + __field( s64, sirt ) + __field( s64, sirtvar ) + __field( s64, dcount ) + __field( s64, qcount ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gl->gl_name.ln_number; + __entry->gltype = gl->gl_name.ln_type; + __entry->status = gl->gl_lksb.sb_status; + __entry->flags = gl->gl_lksb.sb_flags; + __entry->tdiff = tdiff; + __entry->srtt = gl->gl_stats.stats[GFS2_LKS_SRTT]; + __entry->srttvar = gl->gl_stats.stats[GFS2_LKS_SRTTVAR]; + __entry->srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; + __entry->srttvarb = gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; + __entry->sirt = gl->gl_stats.stats[GFS2_LKS_SIRT]; + __entry->sirtvar = gl->gl_stats.stats[GFS2_LKS_SIRTVAR]; + __entry->dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; + __entry->qcount = gl->gl_stats.stats[GFS2_LKS_QCOUNT]; + ), + + TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->status, __entry->flags, + (long long)__entry->tdiff, + (long long)__entry->srtt, + (long long)__entry->srttvar, + (long long)__entry->srttb, + (long long)__entry->srttvarb, + (long long)__entry->sirt, + (long long)__entry->sirtvar, + (long long)__entry->dcount, + (long long)__entry->qcount) +); + /* Section 2 - Log/journal * * Objectives: diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 53511291fe36..9e7765e8e7b0 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -25,6 +25,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly; struct kmem_cache *gfs2_bufdata_cachep __read_mostly; struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; struct kmem_cache *gfs2_quotad_cachep __read_mostly; +mempool_t *gfs2_bh_pool __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index b432e04600de..a4ce76c67dbb 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -10,6 +10,8 @@ #ifndef __UTIL_DOT_H__ #define __UTIL_DOT_H__ +#include <linux/mempool.h> + #include "incore.h" #define fs_printk(level, fs, fmt, arg...) \ @@ -150,6 +152,7 @@ extern struct kmem_cache *gfs2_inode_cachep; extern struct kmem_cache *gfs2_bufdata_cachep; extern struct kmem_cache *gfs2_rgrpd_cachep; extern struct kmem_cache *gfs2_quotad_cachep; +extern mempool_t *gfs2_bh_pool; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, unsigned int *p) diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index e9636591b5d5..2e5ba425cae7 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -251,7 +251,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, if (!blks) return 0; - rgd = gfs2_blk2rgrpd(sdp, bn); + rgd = gfs2_blk2rgrpd(sdp, bn, 1); if (!rgd) { gfs2_consist_inode(ip); return -EIO; @@ -1439,7 +1439,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) struct gfs2_holder gh; int error; - rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); + rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1); if (!rgd) { gfs2_consist_inode(ip); return -EIO; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 8137fb3e6780..7b4c537d6e13 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -430,15 +430,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_d_op = &hfs_dentry_operations; res = -ENOMEM; - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (!sb->s_root) - goto bail_iput; + goto bail_no_root; /* everything's okay */ return 0; -bail_iput: - iput(root_inode); bail_no_root: printk(KERN_ERR "hfs: get root inode failed.\n"); bail: diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 21a5b7fc6db4..4e75ac646fea 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -317,6 +317,11 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) /* + * hfs+-specific ioctl for making the filesystem bootable + */ +#define HFSPLUS_IOC_BLESS _IO('h', 0x80) + +/* * Functions in any *.c used in other files */ diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 927cdd6d5bf5..921967e5abb1 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -117,7 +117,7 @@ struct hfsplus_vh { __be32 write_count; __be64 encodings_bmp; - u8 finder_info[32]; + u32 finder_info[8]; struct hfsplus_fork_raw alloc_file; struct hfsplus_fork_raw ext_file; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 6643b242bdd7..82b69ee4dacc 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -193,6 +193,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, mutex_init(&hip->extents_lock); hip->extent_state = 0; hip->flags = 0; + hip->userflags = 0; set_bit(HFSPLUS_I_RSRC, &hip->flags); err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); @@ -400,6 +401,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) atomic_set(&hip->opencnt, 0); hip->extent_state = 0; hip->flags = 0; + hip->userflags = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->alloc_blocks = 0; diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index f66c7655b3f7..c640ba57074b 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -20,6 +20,38 @@ #include <asm/uaccess.h> #include "hfsplus_fs.h" +/* + * "Blessing" an HFS+ filesystem writes metadata to the superblock informing + * the platform firmware which file to boot from + */ +static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); + struct hfsplus_vh *vh = sbi->s_vhdr; + struct hfsplus_vh *bvh = sbi->s_backup_vhdr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&sbi->vh_mutex); + + /* Directory containing the bootable system */ + vh->finder_info[0] = bvh->finder_info[0] = + cpu_to_be32(parent_ino(dentry)); + + /* Bootloader */ + vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino); + + /* Per spec, the OS X system folder - same as finder_info[0] here */ + vh->finder_info[5] = bvh->finder_info[5] = + cpu_to_be32(parent_ino(dentry)); + + mutex_unlock(&sbi->vh_mutex); + return 0; +} + static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) { struct inode *inode = file->f_path.dentry->d_inode; @@ -108,6 +140,8 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return hfsplus_ioctl_getflags(file, argp); case HFSPLUS_IOC_EXT2_SETFLAGS: return hfsplus_ioctl_setflags(file, argp); + case HFSPLUS_IOC_BLESS: + return hfsplus_ioctl_bless(file, argp); default: return -ENOTTY; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 427682ca9e48..ceb1c281eefb 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -465,6 +465,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) goto out_put_alloc_file; } + sb->s_d_op = &hfsplus_dentry_operations; + sb->s_root = d_make_root(root); + if (!sb->s_root) { + err = -ENOMEM; + goto out_put_alloc_file; + } + str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; err = hfs_find_init(sbi->cat_tree, &fd); @@ -515,13 +522,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) } } - sb->s_d_op = &hfsplus_dentry_operations; - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - err = -ENOMEM; - goto out_put_hidden_dir; - } - unload_nls(sbi->nls); sbi->nls = nls; return 0; @@ -529,7 +529,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) out_put_hidden_dir: iput(sbi->hidden_dir); out_put_root: - iput(root); + dput(sb->s_root); + sb->s_root = NULL; out_put_alloc_file: iput(sbi->alloc_file); out_close_cat_tree: diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e130bd46d671..588d45885a6f 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -966,9 +966,9 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) } err = -ENOMEM; - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (sb->s_root == NULL) - goto out_put; + goto out; return 0; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 3690467c944e..54f6eccb79d9 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -625,11 +625,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) hpfs_init_inode(root); hpfs_read_inode(root); unlock_new_inode(root); - s->s_root = d_alloc_root(root); - if (!s->s_root) { - iput(root); + s->s_root = d_make_root(root); + if (!s->s_root) goto bail0; - } /* * find the root directory's . pointer & finish filling in the inode diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index d92f4ce80925..a80e45a690ac 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -726,17 +726,12 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) err = -ENOMEM; root_inode = get_inode(sb, dget(proc_mnt->mnt_root)); - if (!root_inode) - goto out_mntput; - - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (!sb->s_root) - goto out_iput; + goto out_mntput; return 0; - out_iput: - iput(root_inode); out_mntput: mntput(proc_mnt); out: diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1e85a7ac0217..ea251749d9d5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -41,6 +41,25 @@ const struct file_operations hugetlbfs_file_operations; static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; +struct hugetlbfs_config { + uid_t uid; + gid_t gid; + umode_t mode; + long nr_blocks; + long nr_inodes; + struct hstate *hstate; +}; + +struct hugetlbfs_inode_info { + struct shared_policy policy; + struct inode vfs_inode; +}; + +static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) +{ + return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); +} + static struct backing_dev_info hugetlbfs_backing_dev_info = { .name = "hugetlbfs", .ra_pages = 0, /* No readahead */ @@ -154,10 +173,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return addr; } - start_addr = mm->free_area_cache; - - if (len <= mm->cached_hole_size) + if (len > mm->cached_hole_size) + start_addr = mm->free_area_cache; + else { start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } full_search: addr = ALIGN(start_addr, huge_page_size(h)); @@ -171,13 +192,18 @@ full_search: */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) + if (!vma || addr + len <= vma->vm_start) { + mm->free_area_cache = addr + len; return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; addr = ALIGN(vma->vm_end, huge_page_size(h)); } } @@ -238,17 +264,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, loff_t isize; ssize_t retval = 0; - mutex_lock(&inode->i_mutex); - /* validate length */ if (len == 0) goto out; - isize = i_size_read(inode); - if (!isize) - goto out; - - end_index = (isize - 1) >> huge_page_shift(h); for (;;) { struct page *page; unsigned long nr, ret; @@ -256,18 +275,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, /* nr is the maximum number of bytes to copy from this page */ nr = huge_page_size(h); + isize = i_size_read(inode); + if (!isize) + goto out; + end_index = (isize - 1) >> huge_page_shift(h); if (index >= end_index) { if (index > end_index) goto out; nr = ((isize - 1) & ~huge_page_mask(h)) + 1; - if (nr <= offset) { + if (nr <= offset) goto out; - } } nr = nr - offset; /* Find the page */ - page = find_get_page(mapping, index); + page = find_lock_page(mapping, index); if (unlikely(page == NULL)) { /* * We have a HOLE, zero out the user-buffer for the @@ -279,17 +301,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, else ra = 0; } else { + unlock_page(page); + /* * We have the page, copy it to user space buffer. */ ra = hugetlbfs_read_actor(page, offset, buf, len, nr); ret = ra; + page_cache_release(page); } if (ra < 0) { if (retval == 0) retval = ra; - if (page) - page_cache_release(page); goto out; } @@ -299,16 +322,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, index += offset >> huge_page_shift(h); offset &= ~huge_page_mask(h); - if (page) - page_cache_release(page); - /* short read or no more work */ if ((ret != nr) || (len == 0)) break; } out: *ppos = ((loff_t)index << huge_page_shift(h)) + offset; - mutex_unlock(&inode->i_mutex); return retval; } @@ -607,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) spin_lock(&sbinfo->stat_lock); /* If no limits set, just report 0 for max/free/used * blocks, like simple_statfs() */ - if (sbinfo->max_blocks >= 0) { - buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; + if (sbinfo->spool) { + long free_pages; + + spin_lock(&sbinfo->spool->lock); + buf->f_blocks = sbinfo->spool->max_hpages; + free_pages = sbinfo->spool->max_hpages + - sbinfo->spool->used_hpages; + buf->f_bavail = buf->f_bfree = free_pages; + spin_unlock(&sbinfo->spool->lock); buf->f_files = sbinfo->max_inodes; buf->f_ffree = sbinfo->free_inodes; } @@ -625,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb) if (sbi) { sb->s_fs_info = NULL; + + if (sbi->spool) + hugepage_put_subpool(sbi->spool); + kfree(sbi); } } @@ -831,8 +860,6 @@ bad_val: static int hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) { - struct inode * inode; - struct dentry * root; int ret; struct hugetlbfs_config config; struct hugetlbfs_sb_info *sbinfo; @@ -855,60 +882,31 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbinfo; sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_blocks = config.nr_blocks; - sbinfo->free_blocks = config.nr_blocks; sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; + sbinfo->spool = NULL; + if (config.nr_blocks != -1) { + sbinfo->spool = hugepage_new_subpool(config.nr_blocks); + if (!sbinfo->spool) + goto out_free; + } sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = huge_page_size(config.hstate); sb->s_blocksize_bits = huge_page_shift(config.hstate); sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; - inode = hugetlbfs_get_root(sb, &config); - if (!inode) - goto out_free; - - root = d_alloc_root(inode); - if (!root) { - iput(inode); + sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); + if (!sb->s_root) goto out_free; - } - sb->s_root = root; return 0; out_free: + if (sbinfo->spool) + kfree(sbinfo->spool); kfree(sbinfo); return -ENOMEM; } -int hugetlb_get_quota(struct address_space *mapping, long delta) -{ - int ret = 0; - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks - delta >= 0) - sbinfo->free_blocks -= delta; - else - ret = -ENOMEM; - spin_unlock(&sbinfo->stat_lock); - } - - return ret; -} - -void hugetlb_put_quota(struct address_space *mapping, long delta) -{ - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks += delta; - spin_unlock(&sbinfo->stat_lock); - } -} - static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -928,8 +926,8 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } -struct file *hugetlb_file_setup(const char *name, size_t size, - vm_flags_t acctflag, +struct file *hugetlb_file_setup(const char *name, unsigned long addr, + size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags) { int error = -ENOMEM; @@ -938,6 +936,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, struct path path; struct dentry *root; struct qstr quick_string; + struct hstate *hstate; + unsigned long num_pages; *user = NULL; if (!hugetlbfs_vfsmount) @@ -946,7 +946,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { *user = current_user(); if (user_shm_lock(size, *user)) { - printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n"); + task_lock(current); + printk_once(KERN_WARNING + "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", + current->comm, current->pid); + task_unlock(current); } else { *user = NULL; return ERR_PTR(-EPERM); @@ -967,10 +971,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, if (!inode) goto out_dentry; + hstate = hstate_inode(inode); + size += addr & ~huge_page_mask(hstate); + num_pages = ALIGN(size, huge_page_size(hstate)) >> + huge_page_shift(hstate); error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, - size >> huge_page_shift(hstate_inode(inode)), NULL, - acctflag)) + if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) goto out_inode; d_instantiate(path.dentry, inode); @@ -1006,6 +1012,7 @@ static int __init init_hugetlbfs_fs(void) if (error) return error; + error = -ENOMEM; hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", sizeof(struct hugetlbfs_inode_info), 0, 0, init_once); @@ -1024,10 +1031,10 @@ static int __init init_hugetlbfs_fs(void) } error = PTR_ERR(vfsmount); + unregister_filesystem(&hugetlbfs_fs_type); out: - if (error) - kmem_cache_destroy(hugetlbfs_inode_cachep); + kmem_cache_destroy(hugetlbfs_inode_cachep); out2: bdi_destroy(&hugetlbfs_backing_dev_info); return error; diff --git a/fs/inode.c b/fs/inode.c index fb10d86ffad7..9f4f5fecc096 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2,29 +2,19 @@ * (C) 1997 Linus Torvalds * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) */ +#include <linux/export.h> #include <linux/fs.h> #include <linux/mm.h> -#include <linux/dcache.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/writeback.h> -#include <linux/module.h> #include <linux/backing-dev.h> -#include <linux/wait.h> -#include <linux/rwsem.h> #include <linux/hash.h> #include <linux/swap.h> #include <linux/security.h> -#include <linux/pagemap.h> #include <linux/cdev.h> #include <linux/bootmem.h> #include <linux/fsnotify.h> #include <linux/mount.h> -#include <linux/async.h> #include <linux/posix_acl.h> #include <linux/prefetch.h> -#include <linux/ima.h> -#include <linux/cred.h> #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> #include "internal.h" @@ -938,8 +928,7 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode) struct file_system_type *type = inode->i_sb->s_type; /* Set new key only if filesystem hasn't already changed it */ - if (!lockdep_match_class(&inode->i_mutex, - &type->i_mutex_key)) { + if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) { /* * ensure nobody is actually holding i_mutex */ @@ -966,6 +955,7 @@ void unlock_new_inode(struct inode *inode) spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW; + smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } @@ -1369,17 +1359,6 @@ int generic_delete_inode(struct inode *inode) EXPORT_SYMBOL(generic_delete_inode); /* - * Normal UNIX filesystem behaviour: delete the - * inode when the usage count drops to zero, and - * i_nlink is zero. - */ -int generic_drop_inode(struct inode *inode) -{ - return !inode->i_nlink || inode_unhashed(inode); -} -EXPORT_SYMBOL_GPL(generic_drop_inode); - -/* * Called when we're dropping the last reference * to an inode. * @@ -1510,9 +1489,10 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -void touch_atime(struct vfsmount *mnt, struct dentry *dentry) +void touch_atime(struct path *path) { - struct inode *inode = dentry->d_inode; + struct vfsmount *mnt = path->mnt; + struct inode *inode = path->dentry->d_inode; struct timespec now; if (inode->i_flags & S_NOATIME) @@ -1651,7 +1631,7 @@ __setup("ihash_entries=", set_ihash_entries); */ void __init inode_init_early(void) { - int loop; + unsigned int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. @@ -1669,13 +1649,13 @@ void __init inode_init_early(void) &i_hash_mask, 0); - for (loop = 0; loop < (1 << i_hash_shift); loop++) + for (loop = 0; loop < (1U << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); } void __init inode_init(void) { - int loop; + unsigned int loop; /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", @@ -1699,7 +1679,7 @@ void __init inode_init(void) &i_hash_mask, 0); - for (loop = 0; loop < (1 << i_hash_shift); loop++) + for (loop = 0; loop < (1U << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); } diff --git a/fs/ioprio.c b/fs/ioprio.c index f84b380d65e5..0f1b9515213b 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_ioprio_changed(ioc, ioprio); - put_io_context(ioc, NULL); + put_io_context(ioc); } return err; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index bd62c76fb5df..29037c365ba4 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -947,9 +947,8 @@ root_found: s->s_d_op = &isofs_dentry_ops[table]; /* get the root dentry */ - s->s_root = d_alloc_root(inode); + s->s_root = d_make_root(inode); if (!(s->s_root)) { - iput(inode); error = -ENOMEM; goto out_no_inode; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 59c09f9541b5..0971e9217808 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -129,6 +129,8 @@ static int kjournald(void *arg) setup_timer(&journal->j_commit_timer, commit_timeout, (unsigned long)current); + set_freezable(); + /* Record that the journal thread is running */ journal->j_task = current; wake_up(&journal->j_wait_done_commit); @@ -328,7 +330,7 @@ repeat: new_offset = offset_in_page(jh2bh(jh_in)->b_data); } - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); /* * Check for escaping */ @@ -337,7 +339,7 @@ repeat: need_copy_out = 1; do_escape = 1; } - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); /* * Do we need to do a data copy? @@ -354,9 +356,9 @@ repeat: } jh_in->b_frozen_data = tmp; - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); new_offset = offset_in_page(tmp); @@ -368,9 +370,9 @@ repeat: * copying, we can finally do so. */ if (do_escape) { - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); *((unsigned int *)(mapped_data + new_offset)) = 0; - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); } set_bh_page(new_bh, new_page, new_offset); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 7fce94b04bc3..b2a7e5244e39 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -718,9 +718,9 @@ done: "Possible IO failure.\n"); page = jh2bh(jh)->b_page; offset = offset_in_page(jh2bh(jh)->b_data); - source = kmap_atomic(page, KM_USER0); + source = kmap_atomic(page); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); - kunmap_atomic(source, KM_USER0); + kunmap_atomic(source); } jbd_unlock_bh_state(bh); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 5069b8475150..c067a8cae63b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -286,10 +286,10 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) char *addr; __u32 checksum; - addr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page); checksum = crc32_be(crc32_sum, (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); - kunmap_atomic(addr, KM_USER0); + kunmap_atomic(addr); return checksum; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c0a5f9f1b127..839377e3d624 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -139,6 +139,8 @@ static int kjournald2(void *arg) setup_timer(&journal->j_commit_timer, commit_timeout, (unsigned long)current); + set_freezable(); + /* Record that the journal thread is running */ journal->j_task = current; wake_up(&journal->j_wait_done_commit); @@ -345,7 +347,7 @@ repeat: new_offset = offset_in_page(jh2bh(jh_in)->b_data); } - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); /* * Fire data frozen trigger if data already wasn't frozen. Do this * before checking for escaping, as the trigger may modify the magic @@ -364,7 +366,7 @@ repeat: need_copy_out = 1; do_escape = 1; } - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); /* * Do we need to do a data copy? @@ -385,9 +387,9 @@ repeat: } jh_in->b_frozen_data = tmp; - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); new_offset = offset_in_page(tmp); @@ -406,9 +408,9 @@ repeat: * copying, we can finally do so. */ if (do_escape) { - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); *((unsigned int *)(mapped_data + new_offset)) = 0; - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); } set_bh_page(new_bh, new_page, new_offset); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 35ae096bed5d..e5aba56e1fd5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -783,12 +783,12 @@ done: "Possible IO failure.\n"); page = jh2bh(jh)->b_page; offset = offset_in_page(jh2bh(jh)->b_data); - source = kmap_atomic(page, KM_USER0); + source = kmap_atomic(page); /* Fire data frozen trigger just before we copy the data */ jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); - kunmap_atomic(source, KM_USER0); + kunmap_atomic(source); /* * Now that the frozen data is saved off, we need to store diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index 5b6c9d1a2fb9..96ed3c9ec3fc 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c @@ -340,7 +340,7 @@ int jffs2_unregister_compressor(struct jffs2_compressor *comp) if (comp->usecount) { spin_unlock(&jffs2_compressor_list_lock); - printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n"); + printk(KERN_WARNING "JFFS2: Compressor module is in use. Unregister failed.\n"); return -1; } list_del(&comp->list); diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index a01cdad6aad1..eafb8d37a6fb 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -335,7 +335,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl void *ebuf; uint32_t ofs; size_t retlen; - int ret = -EIO; + int ret; unsigned long *wordebuf; ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 2e0123867cb1..c0d5c9d770da 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -561,9 +561,9 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n")); - sb->s_root = d_alloc_root(root_i); + sb->s_root = d_make_root(root_i); if (!sb->s_root) - goto out_root_i; + goto out_root; sb->s_maxbytes = 0xFFFFFFFF; sb->s_blocksize = PAGE_CACHE_SIZE; @@ -573,8 +573,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) jffs2_start_garbage_collect_thread(c); return 0; - out_root_i: - iput(root_i); out_root: jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 5f7c160ea64f..07c91ca6017d 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -220,12 +220,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) dquot_initialize(dip); - /* link count overflow on parent directory ? */ - if (dip->i_nlink == JFS_LINK_MAX) { - rc = -EMLINK; - goto out1; - } - /* * search parent directory for entry/freespace * (dtSearch() returns parent directory page pinned) @@ -806,9 +800,6 @@ static int jfs_link(struct dentry *old_dentry, jfs_info("jfs_link: %s %s", old_dentry->d_name.name, dentry->d_name.name); - if (ip->i_nlink == JFS_LINK_MAX) - return -EMLINK; - dquot_initialize(dir); tid = txBegin(ip->i_sb, 0); @@ -1138,10 +1129,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = -ENOTEMPTY; goto out3; } - } else if ((new_dir != old_dir) && - (new_dir->i_nlink == JFS_LINK_MAX)) { - rc = -EMLINK; - goto out3; } } else if (new_ip) { IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 682bca642f38..4a82950f412f 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -441,6 +441,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; sb->s_fs_info = sbi; + sb->s_max_links = JFS_LINK_MAX; sbi->sb = sb; sbi->uid = sbi->gid = sbi->umask = -1; @@ -521,7 +522,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) ret = PTR_ERR(inode); goto out_no_rw; } - sb->s_root = d_alloc_root(inode); + sb->s_root = d_make_root(inode); if (!sb->s_root) goto out_no_root; @@ -539,7 +540,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) out_no_root: jfs_err("jfs_read_super: get root dentry failed"); - iput(inode); out_no_rw: rc = jfs_umount(sb); @@ -860,8 +860,14 @@ static int __init init_jfs_fs(void) jfs_proc_init(); #endif - return register_filesystem(&jfs_fs_type); + rc = register_filesystem(&jfs_fs_type); + if (!rc) + return 0; +#ifdef PROC_FS_JFS + jfs_proc_clean(); +#endif + kthread_stop(jfsSyncThread); kill_committask: for (i = 0; i < commit_threads; i++) kthread_stop(jfsCommitThread[i]); diff --git a/fs/libfs.c b/fs/libfs.c index 5b2dbb3ba4fc..722e0d5ba182 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -491,11 +491,9 @@ int simple_fill_super(struct super_block *s, unsigned long magic, inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; set_nlink(inode, 2); - root = d_alloc_root(inode); - if (!root) { - iput(inode); + root = d_make_root(inode); + if (!root) return -ENOMEM; - } for (i = 0; !files->name || files->name[0]; i++, files++) { if (!files->name) continue; @@ -536,7 +534,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = vfs_kern_mount(type, 0, type->name, NULL); + mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 3de7a32cadbe..bea5d1b9954b 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -177,17 +177,17 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry) (filler_t *)logfs_readpage, NULL); if (IS_ERR(page)) return page; - dd = kmap_atomic(page, KM_USER0); + dd = kmap_atomic(page); BUG_ON(dd->namelen == 0); if (name->len != be16_to_cpu(dd->namelen) || memcmp(name->name, dd->name, name->len)) { - kunmap_atomic(dd, KM_USER0); + kunmap_atomic(dd); page_cache_release(page); continue; } - kunmap_atomic(dd, KM_USER0); + kunmap_atomic(dd); return page; } return NULL; @@ -365,9 +365,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry, return NULL; } index = page->index; - dd = kmap_atomic(page, KM_USER0); + dd = kmap_atomic(page); ino = be64_to_cpu(dd->ino); - kunmap_atomic(dd, KM_USER0); + kunmap_atomic(dd); page_cache_release(page); inode = logfs_iget(dir->i_sb, ino); @@ -402,12 +402,12 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry, if (!page) return -ENOMEM; - dd = kmap_atomic(page, KM_USER0); + dd = kmap_atomic(page); memset(dd, 0, sizeof(*dd)); dd->ino = cpu_to_be64(inode->i_ino); dd->type = logfs_type(inode); logfs_set_name(dd, &dentry->d_name); - kunmap_atomic(dd, KM_USER0); + kunmap_atomic(dd); err = logfs_write_buf(dir, page, WF_LOCK); unlock_page(page); @@ -558,9 +558,6 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir, { struct inode *inode = old_dentry->d_inode; - if (inode->i_nlink >= LOGFS_LINK_MAX) - return -EMLINK; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; ihold(inode); inc_nlink(inode); @@ -579,9 +576,9 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry, if (IS_ERR(page)) return PTR_ERR(page); *pos = page->index; - map = kmap_atomic(page, KM_USER0); + map = kmap_atomic(page); memcpy(dd, map, sizeof(*dd)); - kunmap_atomic(map, KM_USER0); + kunmap_atomic(map); page_cache_release(page); return 0; } diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 4153e65b0148..e3ab5e5a904c 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -517,9 +517,9 @@ static int indirect_write_alias(struct super_block *sb, ino = page->mapping->host->i_ino; logfs_unpack_index(page->index, &bix, &level); - child = kmap_atomic(page, KM_USER0); + child = kmap_atomic(page); val = child[pos]; - kunmap_atomic(child, KM_USER0); + kunmap_atomic(child); err = write_one_alias(sb, ino, bix, level, pos, val); if (err) return err; @@ -673,9 +673,9 @@ static void alloc_indirect_block(struct inode *inode, struct page *page, alloc_data_block(inode, page); block = logfs_block(page); - array = kmap_atomic(page, KM_USER0); + array = kmap_atomic(page); initialize_block_counters(page, block, array, page_is_empty); - kunmap_atomic(array, KM_USER0); + kunmap_atomic(array); } static void block_set_pointer(struct page *page, int index, u64 ptr) @@ -685,10 +685,10 @@ static void block_set_pointer(struct page *page, int index, u64 ptr) u64 oldptr; BUG_ON(!block); - array = kmap_atomic(page, KM_USER0); + array = kmap_atomic(page); oldptr = be64_to_cpu(array[index]); array[index] = cpu_to_be64(ptr); - kunmap_atomic(array, KM_USER0); + kunmap_atomic(array); SetPageUptodate(page); block->full += !!(ptr & LOGFS_FULLY_POPULATED) @@ -701,9 +701,9 @@ static u64 block_get_pointer(struct page *page, int index) __be64 *block; u64 ptr; - block = kmap_atomic(page, KM_USER0); + block = kmap_atomic(page); ptr = be64_to_cpu(block[index]); - kunmap_atomic(block, KM_USER0); + kunmap_atomic(block); return ptr; } @@ -850,7 +850,7 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data) } slot = get_bits(bix, SUBLEVEL(level)); - rblock = kmap_atomic(page, KM_USER0); + rblock = kmap_atomic(page); while (slot < LOGFS_BLOCK_FACTOR) { if (data && (rblock[slot] != 0)) break; @@ -861,12 +861,12 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data) bix &= ~(increment - 1); } if (slot >= LOGFS_BLOCK_FACTOR) { - kunmap_atomic(rblock, KM_USER0); + kunmap_atomic(rblock); logfs_put_read_page(page); return bix; } bofs = be64_to_cpu(rblock[slot]); - kunmap_atomic(rblock, KM_USER0); + kunmap_atomic(rblock); logfs_put_read_page(page); if (!bofs) { BUG_ON(data); @@ -1961,9 +1961,9 @@ int logfs_read_inode(struct inode *inode) if (IS_ERR(page)) return PTR_ERR(page); - di = kmap_atomic(page, KM_USER0); + di = kmap_atomic(page); logfs_disk_to_inode(di, inode); - kunmap_atomic(di, KM_USER0); + kunmap_atomic(di); move_page_to_inode(inode, page); page_cache_release(page); return 0; @@ -1982,9 +1982,9 @@ static struct page *inode_to_page(struct inode *inode) if (!page) return NULL; - di = kmap_atomic(page, KM_USER0); + di = kmap_atomic(page); logfs_inode_to_disk(inode, di); - kunmap_atomic(di, KM_USER0); + kunmap_atomic(di); move_inode_to_page(page, inode); return page; } @@ -2041,13 +2041,13 @@ static void logfs_mod_segment_entry(struct super_block *sb, u32 segno, if (write) alloc_indirect_block(inode, page, 0); - se = kmap_atomic(page, KM_USER0); + se = kmap_atomic(page); change_se(se + child_no, arg); if (write) { logfs_set_alias(sb, logfs_block(page), child_no); BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize); } - kunmap_atomic(se, KM_USER0); + kunmap_atomic(se); logfs_put_write_page(page); } @@ -2245,10 +2245,10 @@ int logfs_inode_write(struct inode *inode, const void *buf, size_t count, if (!page) return -ENOMEM; - pagebuf = kmap_atomic(page, KM_USER0); + pagebuf = kmap_atomic(page); memcpy(pagebuf, buf, count); flush_dcache_page(page); - kunmap_atomic(pagebuf, KM_USER0); + kunmap_atomic(pagebuf); if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE) i_size_write(inode, pos + LOGFS_BLOCKSIZE); diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index ab798ed1cc88..e28d090c98d6 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -543,9 +543,9 @@ void move_page_to_btree(struct page *page) BUG_ON(!item); /* mempool empty */ memset(item, 0, sizeof(*item)); - child = kmap_atomic(page, KM_USER0); + child = kmap_atomic(page); item->val = child[pos]; - kunmap_atomic(child, KM_USER0); + kunmap_atomic(child); item->child_no = pos; list_add(&item->list, &block->item_list); } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index c9ee7f5d1caf..97bca623d893 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -315,11 +315,9 @@ static int logfs_get_sb_final(struct super_block *sb) if (IS_ERR(rootdir)) goto fail; - sb->s_root = d_alloc_root(rootdir); - if (!sb->s_root) { - iput(rootdir); + sb->s_root = d_make_root(rootdir); + if (!sb->s_root) goto fail; - } /* at that point we know that ->put_super() will be called */ super->s_erase_page = alloc_pages(GFP_KERNEL, 0); @@ -542,6 +540,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, * the filesystem incompatible with 32bit systems. */ sb->s_maxbytes = (1ull << 43) - 1; + sb->s_max_links = LOGFS_LINK_MAX; sb->s_op = &logfs_super_operations; sb->s_flags = flags | MS_NOATIME; @@ -627,7 +626,10 @@ static int __init logfs_init(void) if (ret) goto out2; - return register_filesystem(&logfs_fs_type); + ret = register_filesystem(&logfs_fs_type); + if (!ret) + return 0; + logfs_destroy_inode_cache(); out2: logfs_compr_exit(); out1: diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 085a9262c692..685b2d981b87 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -335,7 +335,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir) goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, 0, PAGE_CACHE_SIZE); if (sbi->s_version == MINIX_V3) { @@ -355,7 +355,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir) de->inode = dir->i_ino; strcpy(de->name, ".."); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize); fail: diff --git a/fs/minix/inode.c b/fs/minix/inode.c index fa8b612b8ce2..fcb05d2c6b5f 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -190,24 +190,24 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_version = MINIX_V1; sbi->s_dirsize = 16; sbi->s_namelen = 14; - sbi->s_link_max = MINIX_LINK_MAX; + s->s_max_links = MINIX_LINK_MAX; } else if (s->s_magic == MINIX_SUPER_MAGIC2) { sbi->s_version = MINIX_V1; sbi->s_dirsize = 32; sbi->s_namelen = 30; - sbi->s_link_max = MINIX_LINK_MAX; + s->s_max_links = MINIX_LINK_MAX; } else if (s->s_magic == MINIX2_SUPER_MAGIC) { sbi->s_version = MINIX_V2; sbi->s_nzones = ms->s_zones; sbi->s_dirsize = 16; sbi->s_namelen = 14; - sbi->s_link_max = MINIX2_LINK_MAX; + s->s_max_links = MINIX2_LINK_MAX; } else if (s->s_magic == MINIX2_SUPER_MAGIC2) { sbi->s_version = MINIX_V2; sbi->s_nzones = ms->s_zones; sbi->s_dirsize = 32; sbi->s_namelen = 30; - sbi->s_link_max = MINIX2_LINK_MAX; + s->s_max_links = MINIX2_LINK_MAX; } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) { m3s = (struct minix3_super_block *) bh->b_data; s->s_magic = m3s->s_magic; @@ -221,9 +221,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_dirsize = 64; sbi->s_namelen = 60; sbi->s_version = MINIX_V3; - sbi->s_link_max = MINIX2_LINK_MAX; sbi->s_mount_state = MINIX_VALID_FS; sb_set_blocksize(s, m3s->s_blocksize); + s->s_max_links = MINIX2_LINK_MAX; } else goto out_no_fs; @@ -254,14 +254,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) minix_set_bit(0,sbi->s_imap[0]->b_data); minix_set_bit(0,sbi->s_zmap[0]->b_data); - /* set up enough so that it can read an inode */ - s->s_op = &minix_sops; - root_inode = minix_iget(s, MINIX_ROOT_INO); - if (IS_ERR(root_inode)) { - ret = PTR_ERR(root_inode); - goto out_no_root; - } - /* Apparently minix can create filesystems that allocate more blocks for * the bitmaps than needed. We simply ignore that, but verify it didn't * create one with not enough blocks and bail out if so. @@ -270,7 +262,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) if (sbi->s_imap_blocks < block) { printk("MINIX-fs: file system does not have enough " "imap blocks allocated. Refusing to mount\n"); - goto out_iput; + goto out_no_bitmap; } block = minix_blocks_needed( @@ -279,13 +271,21 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) if (sbi->s_zmap_blocks < block) { printk("MINIX-fs: file system does not have enough " "zmap blocks allocated. Refusing to mount.\n"); - goto out_iput; + goto out_no_bitmap; + } + + /* set up enough so that it can read an inode */ + s->s_op = &minix_sops; + root_inode = minix_iget(s, MINIX_ROOT_INO); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); + goto out_no_root; } ret = -ENOMEM; - s->s_root = d_alloc_root(root_inode); + s->s_root = d_make_root(root_inode); if (!s->s_root) - goto out_iput; + goto out_no_root; if (!(s->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ @@ -301,10 +301,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) return 0; -out_iput: - iput(root_inode); - goto out_freemap; - out_no_root: if (!silent) printk("MINIX-fs: get root inode failed\n"); diff --git a/fs/minix/minix.h b/fs/minix/minix.h index c889ef0aa571..1ebd11854622 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -34,7 +34,6 @@ struct minix_sb_info { unsigned long s_max_size; int s_dirsize; int s_namelen; - int s_link_max; struct buffer_head ** s_imap; struct buffer_head ** s_zmap; struct buffer_head * s_sbh; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 2f76e38c2065..2d0ee1786305 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -94,9 +94,6 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, { struct inode *inode = old_dentry->d_inode; - if (inode->i_nlink >= minix_sb(inode->i_sb)->s_link_max) - return -EMLINK; - inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); ihold(inode); @@ -106,10 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) { struct inode * inode; - int err = -EMLINK; - - if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max) - goto out; + int err; inode_inc_link_count(dir); @@ -181,7 +175,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir, struct dentry *new_dentry) { - struct minix_sb_info * info = minix_sb(old_dir->i_sb); struct inode * old_inode = old_dentry->d_inode; struct inode * new_inode = new_dentry->d_inode; struct page * dir_page = NULL; @@ -219,11 +212,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= info->s_link_max) - goto out_dir; - } err = minix_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/namei.c b/fs/namei.c index 208c6aa4a989..73ec863a9896 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -161,7 +161,7 @@ static char *getname_flags(const char __user *filename, int flags, int *empty) char *getname(const char __user * filename) { - return getname_flags(filename, 0, 0); + return getname_flags(filename, 0, NULL); } #ifdef CONFIG_AUDITSYSCALL @@ -642,7 +642,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) cond_resched(); current->total_link_count++; - touch_atime(link->mnt, dentry); + touch_atime(link); nd_set_link(nd, NULL); error = security_inode_follow_link(link->dentry, nd); @@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr struct dentry *old; /* Don't create child dentry for a dead directory. */ - if (unlikely(IS_DEADDIR(inode))) + if (unlikely(IS_DEADDIR(inode))) { + dput(dentry); return ERR_PTR(-ENOENT); + } old = inode->i_op->lookup(inode, dentry, nd); if (unlikely(old)) { @@ -1373,6 +1375,157 @@ static inline int can_lookup(struct inode *inode) } /* + * We can do the critical dentry name comparison and hashing + * operations one word at a time, but we are limited to: + * + * - Architectures with fast unaligned word accesses. We could + * do a "get_unaligned()" if this helps and is sufficiently + * fast. + * + * - Little-endian machines (so that we can generate the mask + * of low bytes efficiently). Again, we *could* do a byte + * swapping load on big-endian architectures if that is not + * expensive enough to make the optimization worthless. + * + * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we + * do not trap on the (extremely unlikely) case of a page + * crossing operation. + * + * - Furthermore, we need an efficient 64-bit compile for the + * 64-bit case in order to generate the "number of bytes in + * the final mask". Again, that could be replaced with a + * efficient population count instruction or similar. + */ +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +static inline unsigned int fold_hash(unsigned long hash) +{ + hash += hash >> (8*sizeof(int)); + return hash; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#define fold_hash(x) (x) + +#endif + +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long a, mask; + unsigned long hash = 0; + + for (;;) { + a = *(unsigned long *)name; + if (len < sizeof(unsigned long)) + break; + hash += a; + hash *= 9; + name += sizeof(unsigned long); + len -= sizeof(unsigned long); + if (!len) + goto done; + } + mask = ~(~0ul << len*8); + hash += mask & a; +done: + return fold_hash(hash); +} +EXPORT_SYMBOL(full_name_hash); + +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) +#define ONEBYTES REPEAT_BYTE(0x01) +#define SLASHBYTES REPEAT_BYTE('/') +#define HIGHBITS REPEAT_BYTE(0x80) + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - ONEBYTES) & ~a) & HIGHBITS; +} + +/* + * Calculate the length and hash of the path component, and + * return the length of the component; + */ +static inline unsigned long hash_name(const char *name, unsigned int *hashp) +{ + unsigned long a, mask, hash, len; + + hash = a = 0; + len = -sizeof(unsigned long); + do { + hash = (hash + a) * 9; + len += sizeof(unsigned long); + a = *(unsigned long *)(name+len); + /* Do we have any NUL or '/' bytes in this word? */ + mask = has_zero(a) | has_zero(a ^ SLASHBYTES); + } while (!mask); + + /* The mask *below* the first high bit set */ + mask = (mask - 1) & ~mask; + mask >>= 7; + hash += a & mask; + *hashp = fold_hash(hash); + + return len + count_masked_bytes(mask); +} + +#else + +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(); + while (len--) + hash = partial_name_hash(*name++, hash); + return end_name_hash(hash); +} +EXPORT_SYMBOL(full_name_hash); + +/* + * We know there's a real path component here of at least + * one character. + */ +static inline unsigned long hash_name(const char *name, unsigned int *hashp) +{ + unsigned long hash = init_name_hash(); + unsigned long len = 0, c; + + c = (unsigned char)*name; + do { + len++; + hash = partial_name_hash(c, hash); + c = (unsigned char)name[len]; + } while (c && c != '/'); + *hashp = end_name_hash(hash); + return len; +} + +#endif + +/* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. @@ -1392,31 +1545,22 @@ static int link_path_walk(const char *name, struct nameidata *nd) /* At this point we know we have a real path component. */ for(;;) { - unsigned long hash; struct qstr this; - unsigned int c; + long len; int type; err = may_lookup(nd); if (err) break; + len = hash_name(name, &this.hash); this.name = name; - c = *(const unsigned char *)name; - - hash = init_name_hash(); - do { - name++; - hash = partial_name_hash(c, hash); - c = *(const unsigned char *)name; - } while (c && (c != '/')); - this.len = name - (const char *) this.name; - this.hash = end_name_hash(hash); + this.len = len; type = LAST_NORM; - if (this.name[0] == '.') switch (this.len) { + if (name[0] == '.') switch (len) { case 2: - if (this.name[1] == '.') { + if (name[1] == '.') { type = LAST_DOTDOT; nd->flags |= LOOKUP_JUMPED; } @@ -1435,12 +1579,18 @@ static int link_path_walk(const char *name, struct nameidata *nd) } } - /* remove trailing slashes? */ - if (!c) + if (!name[len]) goto last_component; - while (*++name == '/'); - if (!*name) + /* + * If it wasn't NUL, we know it was '/'. Skip that + * slash, and continue until no more slashes. + */ + do { + len++; + } while (unlikely(name[len] == '/')); + if (!name[len]) goto last_component; + name += len; err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); if (err < 0) @@ -1773,24 +1923,21 @@ static struct dentry *lookup_hash(struct nameidata *nd) struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; - unsigned long hash; unsigned int c; WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); this.name = name; this.len = len; + this.hash = full_name_hash(name, len); if (!len) return ERR_PTR(-EACCES); - hash = init_name_hash(); while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') return ERR_PTR(-EACCES); - hash = partial_name_hash(c, hash); } - this.hash = end_name_hash(hash); /* * See if the low-level filesystem might want * to use its own hash.. @@ -1825,7 +1972,7 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) { - return user_path_at_empty(dfd, name, flags, path, 0); + return user_path_at_empty(dfd, name, flags, path, NULL); } static int user_path_parent(int dfd, const char __user *path, @@ -2138,7 +2285,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, /* sayonara */ error = complete_walk(nd); if (error) - return ERR_PTR(-ECHILD); + return ERR_PTR(error); error = -ENOTDIR; if (nd->flags & LOOKUP_DIRECTORY) { @@ -2237,7 +2384,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); if (error) - goto exit; + return ERR_PTR(error); error = -EISDIR; if (S_ISDIR(nd->inode->i_mode)) goto exit; @@ -2545,6 +2692,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int error = may_create(dir, dentry); + unsigned max_links = dir->i_sb->s_max_links; if (error) return error; @@ -2557,6 +2705,9 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (error) return error; + if (max_links && dir->i_nlink >= max_links) + return -EMLINK; + error = dir->i_op->mkdir(dir, dentry, mode); if (!error) fsnotify_mkdir(dir, dentry); @@ -2887,6 +3038,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { struct inode *inode = old_dentry->d_inode; + unsigned max_links = dir->i_sb->s_max_links; int error; if (!inode) @@ -2917,6 +3069,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de /* Make sure we don't allow creating hardlink to an unlinked file */ if (inode->i_nlink == 0) error = -ENOENT; + else if (max_links && inode->i_nlink >= max_links) + error = -EMLINK; else error = dir->i_op->link(old_dentry, dir, new_dentry); mutex_unlock(&inode->i_mutex); @@ -3026,6 +3180,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, { int error = 0; struct inode *target = new_dentry->d_inode; + unsigned max_links = new_dir->i_sb->s_max_links; /* * If we are going to change the parent - check write permissions, @@ -3049,6 +3204,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; + error = -EMLINK; + if (max_links && !target && new_dir != old_dir && + new_dir->i_nlink >= max_links) + goto out; + if (target) shrink_dcache_parent(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); @@ -3347,9 +3507,9 @@ retry: if (err) goto fail; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr, symname, len-1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, page, fsdata); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 3d1e34f8a68e..49df0e7f8379 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -716,13 +716,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) if (!root_inode) goto out_disconnect; DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber); - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (!sb->s_root) - goto out_no_root; + goto out_disconnect; return 0; -out_no_root: - iput(root_inode); out_disconnect: ncp_lock_server(server); ncp_disconnect(server); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2f378487ccde..4a108a0a2a60 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -36,6 +36,7 @@ #include <linux/inet.h> #include <linux/in6.h> #include <linux/slab.h> +#include <linux/idr.h> #include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <linux/sunrpc/bc_xprt.h> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9952170271b2..4aaf0316d76a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -260,10 +260,10 @@ void nfs_readdir_clear_array(struct page *page) struct nfs_cache_array *array; int i; - array = kmap_atomic(page, KM_USER0); + array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); - kunmap_atomic(array, KM_USER0); + kunmap_atomic(array); } /* @@ -1883,11 +1883,11 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym if (!page) return -ENOMEM; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr, symname, pathlen); if (pathlen < PAGE_SIZE) memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); if (error != 0) { diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index dcb61548887f..801d6d830787 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -49,11 +49,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i { /* The mntroot acts as the dummy root dentry for this superblock */ if (sb->s_root == NULL) { - sb->s_root = d_alloc_root(inode); - if (sb->s_root == NULL) { - iput(inode); + sb->s_root = d_make_root(inode); + if (sb->s_root == NULL) return -ENOMEM; - } ihold(inode); /* * Ensure that this dentry is invisible to d_find_alias(). diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a701a83047d3..b7f348bb618b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -200,6 +200,7 @@ static int nfs_idmap_init_keyring(void) if (ret < 0) goto failed_put_key; + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; id_resolver_cache = cred; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b76dd0efae75..e809d2305ebf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -196,7 +196,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent * when talking to the server, we always send cookie 0 * instead of 1 or 2. */ - start = p = kmap_atomic(*readdir->pages, KM_USER0); + start = p = kmap_atomic(*readdir->pages); if (cookie == 0) { *p++ = xdr_one; /* next */ @@ -224,7 +224,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; - kunmap_atomic(start, KM_USER0); + kunmap_atomic(start); } static int nfs4_wait_clnt_recover(struct nfs_client *clp) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index ce7f0758d84c..9559ce468732 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -72,7 +72,7 @@ int nfsd_fault_inject_init(void) { unsigned int i; struct nfsd_fault_inject_op *op; - mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; debug_dir = debugfs_create_dir("nfsd", NULL); if (!debug_dir) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index edf6d3ed8777..e59f71d0cf73 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1541,30 +1541,31 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) { - struct dentry *dentry; struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; + struct path path; err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; err = nfserr_inval; if (!inode->i_op->readlink) goto out; - touch_atime(fhp->fh_export->ex_path.mnt, dentry); + touch_atime(&path); /* N.B. Why does this call need a get_fs()?? * Remove the set_fs and watch the fireworks:-) --okir */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = inode->i_op->readlink(dentry, buf, *lenp); + host_err = inode->i_op->readlink(path.dentry, buf, *lenp); set_fs(oldfs); if (host_err < 0) diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index c9b342c8b503..dab5c4c6dfaf 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -218,11 +218,11 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, kaddr, 1); mark_buffer_dirty(cp_bh); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); } @@ -313,7 +313,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; } - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint( cpfile, cno, cp_bh, kaddr); nicps = 0; @@ -334,7 +334,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cp_bh, kaddr, nicps); if (count == 0) { /* make hole */ - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(cp_bh); ret = nilfs_cpfile_delete_checkpoint_block( @@ -349,18 +349,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(cp_bh); } if (tnicps > 0) { - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } brelse(header_bh); @@ -408,7 +408,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, continue; /* skip hole */ } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { if (!nilfs_checkpoint_invalid(cp)) { @@ -418,7 +418,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, n++; } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); } @@ -451,10 +451,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); if (curr == 0) { ret = 0; @@ -472,7 +472,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); while (n < nci) { cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ @@ -488,7 +488,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -496,12 +496,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); } curr = next; curr_blkoff = next_blkoff; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); *cnop = curr; ret = n; @@ -592,24 +592,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) goto out_cp; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); list = &header->ch_snapshot_list; curr_bh = header_bh; @@ -621,13 +621,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; if (curr_blkoff != prev_blkoff) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + kaddr = kmap_atomic(curr_bh->b_page); } curr_blkoff = prev_blkoff; cp = nilfs_cpfile_block_get_checkpoint( @@ -635,7 +635,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, @@ -647,29 +647,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + kaddr = kmap_atomic(curr_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, curr, curr_bh, kaddr); list->ssl_prev = cpu_to_le64(cno); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + kaddr = kmap_atomic(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(cno); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(prev_bh); mark_buffer_dirty(curr_bh); @@ -710,23 +710,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) @@ -750,29 +750,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(next_bh->b_page, KM_USER0); + kaddr = kmap_atomic(next_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, next, next_bh, kaddr); list->ssl_prev = cpu_to_le64(prev); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + kaddr = kmap_atomic(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(next); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + kaddr = kmap_atomic(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(next_bh); mark_buffer_dirty(prev_bh); @@ -829,13 +829,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); if (nilfs_checkpoint_invalid(cp)) ret = -ENOENT; else ret = nilfs_checkpoint_snapshot(cp); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); out: @@ -912,12 +912,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh); out_sem: diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fcc2f869af16..b5c13f3576b9 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -85,13 +85,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -109,13 +109,13 @@ static void nilfs_dat_commit_free(struct inode *dat, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); nilfs_palloc_commit_free_entry(dat, req); @@ -136,12 +136,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); } @@ -160,12 +160,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) return ret; } - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -186,7 +186,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); end = start = le64_to_cpu(entry->de_start); @@ -196,7 +196,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -211,12 +211,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); @@ -346,20 +346,20 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) } } - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); @@ -409,7 +409,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) } } - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { @@ -419,7 +419,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) *blocknrp = blocknr; out: - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); return ret; } @@ -440,7 +440,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(entry_bh->b_page); /* last virtual block number in this block */ first = vinfo->vi_vblocknr; do_div(first, entries_per_block); @@ -456,7 +456,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, vinfo->vi_end = le64_to_cpu(entry->de_end); vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(entry_bh); } diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index ca35b3a46d17..df1a7fb238d1 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -602,7 +602,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) unlock_page(page); goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; @@ -617,7 +617,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); nilfs_set_de_type(de, inode); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_commit_chunk(page, mapping, 0, chunk_size); fail: page_cache_release(page); diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 684d76300a80..5a48df79d674 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -122,11 +122,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); + kaddr = kmap_atomic(req.pr_entry_bh->b_page); raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, req.pr_entry_bh, kaddr); raw_inode->i_flags = 0; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 886649627c3d..2a70fce70c65 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; /* * argv[4] points to segment numbers this ioctl cleans. We diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 800e8d78a83b..f9897d09c693 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -58,12 +58,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_buffer_uptodate(bh); mark_buffer_dirty(bh); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1cd3f624dffc..fce2bbee66d4 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -193,9 +193,6 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, struct nilfs_transaction_info ti; int err; - if (inode->i_nlink >= NILFS_LINK_MAX) - return -EMLINK; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; @@ -219,9 +216,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct nilfs_transaction_info ti; int err; - if (dir->i_nlink >= NILFS_LINK_MAX) - return -EMLINK; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; @@ -400,11 +394,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(new_inode); nilfs_mark_inode_dirty(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= NILFS_LINK_MAX) - goto out_dir; - } err = nilfs_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 65221a04c6f0..3e7b2a0dc0c8 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -119,11 +119,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) struct page *spage = sbh->b_page, *dpage = dbh->b_page; struct buffer_head *bh; - kaddr0 = kmap_atomic(spage, KM_USER0); - kaddr1 = kmap_atomic(dpage, KM_USER1); + kaddr0 = kmap_atomic(spage); + kaddr1 = kmap_atomic(dpage); memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_atomic(kaddr1, KM_USER1); - kunmap_atomic(kaddr0, KM_USER0); + kunmap_atomic(kaddr1); + kunmap_atomic(kaddr0); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index a604ac0331b2..f1626f5011c5 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -493,9 +493,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(bh_org); return 0; } diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 850a7c0228fb..dc9a913784ab 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -227,9 +227,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } raw_sum->ss_datasum = cpu_to_le32(crc); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 0a0aba617d8a..c5b7653a4391 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -111,11 +111,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, struct nilfs_sufile_header *header; void *kaddr; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(header_bh); } @@ -319,11 +319,11 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; @@ -356,7 +356,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); @@ -367,14 +367,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); sui->ncleansegs--; mark_buffer_dirty(header_bh); @@ -385,7 +385,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) goto out_header; } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); } @@ -407,16 +407,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct nilfs_segment_usage *su; void *kaddr; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; @@ -433,11 +433,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, void *kaddr; int clean, dirty; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } clean = nilfs_segment_usage_clean(su); @@ -447,7 +447,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; @@ -464,12 +464,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, void *kaddr; int sudirty; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { printk(KERN_WARNING "%s: segment %llu is already clean\n", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } WARN_ON(nilfs_segment_usage_error(su)); @@ -477,7 +477,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, sudirty = nilfs_segment_usage_dirty(su); nilfs_segment_usage_set_clean(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); @@ -525,13 +525,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); WARN_ON(nilfs_segment_usage_error(su)); if (modtime) su->su_lastmod = cpu_to_le64(modtime); su->su_nblocks = cpu_to_le32(nblocks); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); @@ -572,7 +572,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); @@ -582,7 +582,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(header_bh); out_sem: @@ -598,15 +598,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, void *kaddr; int suclean; - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); @@ -675,7 +675,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, /* hole */ continue; } - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); su2 = su; @@ -684,7 +684,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); goto out_header; } @@ -696,7 +696,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, nc++; } } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; @@ -772,10 +772,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) sui->ncleansegs -= nsegs - newnsegs; } - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); @@ -840,7 +840,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, continue; } - kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); for (j = 0; j < n; @@ -853,7 +853,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, si->sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(su_bh); } ret = nsegs; @@ -902,10 +902,10 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, goto failed; sui = NILFS_SUI(sufile); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 08e3d4f9df18..1099a76cee59 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -917,9 +917,8 @@ static int nilfs_get_root_dentry(struct super_block *sb, if (root->cno == NILFS_CPTREE_CURRENT_CNO) { dentry = d_find_alias(inode); if (!dentry) { - dentry = d_alloc_root(inode); + dentry = d_make_root(inode); if (!dentry) { - iput(inode); ret = -ENOMEM; goto failed_dentry; } @@ -1059,6 +1058,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; + sb->s_max_links = NILFS_LINK_MAX; bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; sb->s_bdi = bdi ? : &default_backing_dev_info; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d32714094375..501b7f8b739f 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -409,6 +409,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); nilfs->ns_r_segments_percentage = le32_to_cpu(sbp->s_r_segments_percentage); + if (nilfs->ns_r_segments_percentage < 1 || + nilfs->ns_r_segments_percentage > 99) { + printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n"); + return -EINVAL; + } + nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; @@ -515,6 +521,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, brelse(sbh[1]); sbh[1] = NULL; sbp[1] = NULL; + valid[1] = 0; swp = 0; } if (!valid[swp]) { diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 0b1e885b8cf8..fa9c05f97af4 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -94,11 +94,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) if (file_ofs < init_size) ofs = init_size - file_ofs; local_irq_save(flags); - kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + kaddr = kmap_atomic(page); memset(kaddr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); + kunmap_atomic(kaddr); local_irq_restore(flags); } } else { @@ -147,11 +147,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) /* Should have been verified before we got here... */ BUG_ON(!recs); local_irq_save(flags); - kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + kaddr = kmap_atomic(page); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); - kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); + kunmap_atomic(kaddr); local_irq_restore(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) @@ -504,7 +504,7 @@ retry_readpage: /* Race with shrinking truncate. */ attr_len = i_size; } - addr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page); /* Copy the data to the page. */ memcpy(addr, (u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), @@ -512,7 +512,7 @@ retry_readpage: /* Zero the remainder of the page. */ memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); flush_dcache_page(page); - kunmap_atomic(addr, KM_USER0); + kunmap_atomic(addr); put_unm_err_out: ntfs_attr_put_search_ctx(ctx); unm_err_out: @@ -746,14 +746,14 @@ lock_retry_remap: unsigned long *bpos, *bend; /* Check if the buffer is zero. */ - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); bpos = (unsigned long *)(kaddr + bh_offset(bh)); bend = (unsigned long *)((u8*)bpos + blocksize); do { if (unlikely(*bpos)) break; } while (likely(++bpos < bend)); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (bpos == bend) { /* * Buffer is zero and sparse, no need to write @@ -1495,14 +1495,14 @@ retry_writepage: /* Shrinking cannot fail. */ BUG_ON(err); } - addr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page); /* Copy the data from the page to the mft record. */ memcpy((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), addr, attr_len); /* Zero out of bounds area in the page cache page. */ memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); - kunmap_atomic(addr, KM_USER0); + kunmap_atomic(addr); flush_dcache_page(page); flush_dcache_mft_record_page(ctx->ntfs_ino); /* We are done with the page. */ diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index f14fde2b03d6..a27e3fecefaf 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1,7 +1,7 @@ /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -345,10 +345,10 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, unsigned long flags; bool is_retry = false; + BUG_ON(!ni); ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", ni->mft_no, (unsigned long long)vcn, write_locked ? "write" : "read"); - BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); if (!ni->runlist.rl) { @@ -469,9 +469,9 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, int err = 0; bool is_retry = false; + BUG_ON(!ni); ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.", ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out"); - BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); if (!ni->runlist.rl) { @@ -1656,12 +1656,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) attr_size = le32_to_cpu(a->data.resident.value_length); BUG_ON(attr_size != data_size); if (page && !PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr, (u8*)a + le16_to_cpu(a->data.resident.value_offset), attr_size); memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); flush_dcache_page(page); SetPageUptodate(page); } @@ -1806,9 +1806,9 @@ undo_err_out: sizeof(a->data.resident.reserved)); /* Copy the data from the page back to the attribute value. */ if (page) { - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy((u8*)a + mp_ofs, kaddr, attr_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } /* Setup the allocated size in the ntfs inode in case it changed. */ write_lock_irqsave(&ni->size_lock, flags); @@ -2540,10 +2540,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) size = PAGE_CACHE_SIZE; if (idx == end) size = end_ofs; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr + start_ofs, val, size - start_ofs); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_page_dirty(page); page_cache_release(page); balance_dirty_pages_ratelimited(mapping); @@ -2561,10 +2561,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) "page (index 0x%lx).", idx); return -ENOMEM; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, val, PAGE_CACHE_SIZE); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); /* * If the page has buffers, mark them uptodate since buffer * state and not page state is definitive in 2.6 kernels. @@ -2598,10 +2598,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) "(error, index 0x%lx).", idx); return PTR_ERR(page); } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, val, end_ofs); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_page_dirty(page); page_cache_release(page); balance_dirty_pages_ratelimited(mapping); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index c587e2d27183..8639169221c7 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -704,7 +704,7 @@ map_buffer_cached: u8 *kaddr; unsigned pofs; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (bh_pos < pos) { pofs = bh_pos & ~PAGE_CACHE_MASK; memset(kaddr + pofs, 0, pos - bh_pos); @@ -713,7 +713,7 @@ map_buffer_cached: pofs = end & ~PAGE_CACHE_MASK; memset(kaddr + pofs, 0, bh_end - end); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); flush_dcache_page(page); } continue; @@ -1287,9 +1287,9 @@ static inline size_t ntfs_copy_from_user(struct page **pages, len = PAGE_CACHE_SIZE - ofs; if (len > bytes) len = bytes; - addr = kmap_atomic(*pages, KM_USER0); + addr = kmap_atomic(*pages); left = __copy_from_user_inatomic(addr + ofs, buf, len); - kunmap_atomic(addr, KM_USER0); + kunmap_atomic(addr); if (unlikely(left)) { /* Do it the slow way. */ addr = kmap(*pages); @@ -1401,10 +1401,10 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages, len = PAGE_CACHE_SIZE - ofs; if (len > bytes) len = bytes; - addr = kmap_atomic(*pages, KM_USER0); + addr = kmap_atomic(*pages); copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, *iov, *iov_ofs, len); - kunmap_atomic(addr, KM_USER0); + kunmap_atomic(addr); if (unlikely(copied != len)) { /* Do it the slow way. */ addr = kmap(*pages); @@ -1691,7 +1691,7 @@ static int ntfs_commit_pages_after_write(struct page **pages, BUG_ON(end > le32_to_cpu(a->length) - le16_to_cpu(a->data.resident.value_offset)); kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); /* Copy the received data from the page to the mft record. */ memcpy(kattr + pos, kaddr + pos, bytes); /* Update the attribute length if necessary. */ @@ -1713,7 +1713,7 @@ static int ntfs_commit_pages_after_write(struct page **pages, flush_dcache_page(page); SetPageUptodate(page); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); /* Update initialized_size/i_size if necessary. */ read_lock_irqsave(&ni->size_lock, flags); initialized_size = ni->initialized_size; diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index faece7190866..809c0e6d8e09 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -2008,14 +2008,14 @@ typedef struct { * * When a directory is small enough to fit inside the index root then this * is the only attribute describing the directory. When the directory is too - * large to fit in the index root, on the other hand, two aditional attributes + * large to fit in the index root, on the other hand, two additional attributes * are present: an index allocation attribute, containing sub-nodes of the B+ * directory tree (see below), and a bitmap attribute, describing which virtual * cluster numbers (vcns) in the index allocation attribute are in use by an * index block. * * NOTE: The root directory (FILE_root) contains an entry for itself. Other - * dircetories do not contain entries for themselves, though. + * directories do not contain entries for themselves, though. */ typedef struct { ATTR_TYPE type; /* Type of the indexed attribute. Is diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 382857f9c7db..3014a36a255b 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) ntfs_error(vol->sb, "Failed to merge runlists for mft " "bitmap."); if (ntfs_cluster_free_from_rl(vol, rl2)) { - ntfs_error(vol->sb, "Failed to dealocate " + ntfs_error(vol->sb, "Failed to deallocate " "allocated cluster.%s", es); NVolSetErrors(vol); } @@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) ntfs_error(vol->sb, "Failed to merge runlists for mft data " "attribute."); if (ntfs_cluster_free_from_rl(vol, rl2)) { - ntfs_error(vol->sb, "Failed to dealocate clusters " + ntfs_error(vol->sb, "Failed to deallocate clusters " "from the mft data attribute.%s", es); NVolSetErrors(vol); } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 5a4a8af5c406..b341492542ca 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1,7 +1,7 @@ /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2001,2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1239,7 +1239,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol) { MFT_REF mref; struct inode *vi; - ntfs_inode *ni; struct page *page; u32 *kaddr, *kend; ntfs_name *name = NULL; @@ -1290,7 +1289,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol) "is not the system volume.", i_size_read(vi)); goto iput_out; } - ni = NTFS_I(vi); page = ntfs_map_page(vi->i_mapping, 0); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read from hiberfil.sys."); @@ -2475,7 +2473,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol) nr_free -= PAGE_CACHE_SIZE * 8; continue; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); /* * Subtract the number of set bits. If this * is the last page and it is partial we don't really care as @@ -2485,7 +2483,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol) */ nr_free -= bitmap_weight(kaddr, PAGE_CACHE_SIZE * BITS_PER_BYTE); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); page_cache_release(page); } ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); @@ -2546,7 +2544,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, nr_free -= PAGE_CACHE_SIZE * 8; continue; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); /* * Subtract the number of set bits. If this * is the last page and it is partial we don't really care as @@ -2556,7 +2554,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, */ nr_free -= bitmap_weight(kaddr, PAGE_CACHE_SIZE * BITS_PER_BYTE); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); page_cache_release(page); } ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", @@ -2910,9 +2908,10 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_error(sb, "Failed to load system files."); goto unl_upcase_iput_tmp_ino_err_out_now; } - if ((sb->s_root = d_alloc_root(vol->root_ino))) { - /* We grab a reference, simulating an ntfs_iget(). */ - ihold(vol->root_ino); + + /* We grab a reference, simulating an ntfs_iget(). */ + ihold(vol->root_ino); + if ((sb->s_root = d_make_root(vol->root_ino))) { ntfs_debug("Exiting, status successful."); /* Release the default upcase if it has no users. */ mutex_lock(&ntfs_lock); @@ -3160,6 +3159,8 @@ static int __init init_ntfs_fs(void) } printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); + /* Unregister the ntfs sysctls. */ + ntfs_sysctl(0); sysctl_err_out: kmem_cache_destroy(ntfs_big_inode_cache); big_inode_err_out: diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 78b68af3b0e3..657743254eb9 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -102,7 +102,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, * copy, the data is still good. */ if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { - kaddr = kmap_atomic(bh_result->b_page, KM_USER0); + kaddr = kmap_atomic(bh_result->b_page); if (!kaddr) { mlog(ML_ERROR, "couldn't kmap!\n"); goto bail; @@ -110,7 +110,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, memcpy(kaddr + (bh_result->b_size * iblock), buffer_cache_bh->b_data, bh_result->b_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); @@ -236,13 +236,13 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, return -EROFS; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (size) memcpy(kaddr, di->id2.i_data.id_data, size); /* Clear the remaining part of the page */ memset(kaddr + size, 0, PAGE_CACHE_SIZE - size); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); SetPageUptodate(page); @@ -689,7 +689,7 @@ static void ocfs2_clear_page_regions(struct page *page, ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (from || to) { if (from > cluster_start) @@ -700,7 +700,7 @@ static void ocfs2_clear_page_regions(struct page *page, memset(kaddr + cluster_start, 0, cluster_end - cluster_start); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } /* @@ -1981,9 +1981,9 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, } } - kaddr = kmap_atomic(wc->w_target_page, KM_USER0); + kaddr = kmap_atomic(wc->w_target_page); memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); trace_ocfs2_write_end_inline( (unsigned long long)OCFS2_I(inode)->ip_blkno, diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index abfac0d7ae9c..3b5825ef3193 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -582,24 +582,14 @@ static int dlmfs_fill_super(struct super_block * sb, void * data, int silent) { - struct inode * inode; - struct dentry * root; - sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = DLMFS_MAGIC; sb->s_op = &dlmfs_ops; - inode = dlmfs_get_root_inode(sb); - if (!inode) - return -ENOMEM; - - root = d_alloc_root(inode); - if (!root) { - iput(inode); + sb->s_root = d_make_root(dlmfs_get_root_inode(sb)); + if (!sb->s_root) return -ENOMEM; - } - sb->s_root = root; return 0; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index be244692550d..a9856e3eaaf0 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir, handle_t *handle = NULL; struct buffer_head *old_dir_bh = NULL; struct buffer_head *new_dir_bh = NULL; - nlink_t old_dir_nlink = old_dir->i_nlink; + u32 old_dir_nlink = old_dir->i_nlink; struct ocfs2_dinode *old_di; struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, }; struct ocfs2_dir_lookup_result target_lookup_res = { NULL, }; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 604e12c4e979..68f4541c2db9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1154,19 +1154,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } status = ocfs2_mount_volume(sb); - if (osb->root_inode) - inode = igrab(osb->root_inode); - if (status < 0) goto read_super_error; + if (osb->root_inode) + inode = igrab(osb->root_inode); + if (!inode) { status = -EIO; mlog_errno(status); goto read_super_error; } - root = d_alloc_root(inode); + root = d_make_root(inode); if (!root) { status = -ENOMEM; mlog_errno(status); @@ -1220,9 +1220,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) read_super_error: brelse(bh); - if (inode) - iput(inode); - if (osb) { atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); @@ -1627,21 +1624,17 @@ static int __init ocfs2_init(void) init_waitqueue_head(&ocfs2__ioend_wq[i]); status = init_ocfs2_uptodate_cache(); - if (status < 0) { - mlog_errno(status); - goto leave; - } + if (status < 0) + goto out1; status = ocfs2_initialize_mem_caches(); - if (status < 0) { - mlog_errno(status); - goto leave; - } + if (status < 0) + goto out2; ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); if (!ocfs2_wq) { status = -ENOMEM; - goto leave; + goto out3; } ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); @@ -1653,17 +1646,23 @@ static int __init ocfs2_init(void) ocfs2_set_locking_protocol(); status = register_quota_format(&ocfs2_quota_format); -leave: - if (status < 0) { - ocfs2_free_mem_caches(); - exit_ocfs2_uptodate_cache(); - mlog_errno(status); - } + if (status < 0) + goto out4; + status = register_filesystem(&ocfs2_fs_type); + if (!status) + return 0; - if (status >= 0) { - return register_filesystem(&ocfs2_fs_type); - } else - return -1; + unregister_quota_format(&ocfs2_quota_format); +out4: + destroy_workqueue(ocfs2_wq); + debugfs_remove(ocfs2_debugfs_root); +out3: + ocfs2_free_mem_caches(); +out2: + exit_ocfs2_uptodate_cache(); +out1: + mlog_errno(status); + return status; } static void __exit ocfs2_exit(void) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 6065bb0ba207..dbc842222589 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -539,11 +539,9 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) goto out_brelse_bh2; } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); + sb->s_root = d_make_root(root); + if (!sb->s_root) goto out_brelse_bh2; - } printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); ret = 0; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index a88c03bc749d..bc49c975d501 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -408,13 +408,12 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) oi->type = op_inode_node; oi->u.node = of_find_node_by_path("/"); - s->s_root = d_alloc_root(root_inode); + s->s_root = d_make_root(root_inode); if (!s->s_root) goto out_no_root_dentry; return 0; out_no_root_dentry: - iput(root_inode); ret = -ENOMEM; out_no_root: printk("openprom_fill_super: get root inode failed\n"); diff --git a/fs/pipe.c b/fs/pipe.c index a932ced92a16..fe0502f9beb2 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -230,7 +230,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, { if (atomic) { buf->flags |= PIPE_BUF_FLAG_ATOMIC; - return kmap_atomic(buf->page, KM_USER0); + return kmap_atomic(buf->page); } return kmap(buf->page); @@ -251,7 +251,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, { if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; - kunmap_atomic(map_data, KM_USER0); + kunmap_atomic(map_data); } else kunmap(buf->page); } @@ -565,14 +565,14 @@ redo1: iov_fault_in_pages_read(iov, chars); redo2: if (atomic) - src = kmap_atomic(page, KM_USER0); + src = kmap_atomic(page); else src = kmap(page); error = pipe_iov_copy_from_user(src, iov, chars, atomic); if (atomic) - kunmap_atomic(src, KM_USER0); + kunmap_atomic(src); else kunmap(page); diff --git a/fs/proc/base.c b/fs/proc/base.c index d4548dd49b02..3b42c1418f31 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf, if (!p) return -ESRCH; - err = nice; - err = proc_sched_autogroup_set_nice(p, &err); + err = proc_sched_autogroup_set_nice(p, nice); if (err) count = err; @@ -2990,9 +2989,9 @@ static const struct pid_entry tgid_base_stuff[] = { INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), - REG("maps", S_IRUGO, proc_maps_operations), + REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, proc_numa_maps_operations), + REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), @@ -3003,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), - REG("smaps", S_IRUGO, proc_smaps_operations), + REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY @@ -3349,9 +3348,9 @@ static const struct pid_entry tid_base_stuff[] = { INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), - REG("maps", S_IRUGO, proc_maps_operations), + REG("maps", S_IRUGO, proc_tid_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, proc_numa_maps_operations), + REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), @@ -3361,7 +3360,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), - REG("smaps", S_IRUGO, proc_smaps_operations), + REG("smaps", S_IRUGO, proc_tid_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 84fd3235a590..8461a7b82fdb 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -486,8 +486,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) int proc_fill_super(struct super_block *s) { - struct inode * root_inode; - s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; @@ -496,19 +494,11 @@ int proc_fill_super(struct super_block *s) s->s_time_gran = 1; pde_get(&proc_root); - root_inode = proc_get_inode(s, &proc_root); - if (!root_inode) - goto out_no_root; - root_inode->i_uid = 0; - root_inode->i_gid = 0; - s->s_root = d_alloc_root(root_inode); - if (!s->s_root) - goto out_no_root; - return 0; + s->s_root = d_make_root(proc_get_inode(s, &proc_root)); + if (s->s_root) + return 0; -out_no_root: printk("proc_read_super: get root inode failed\n"); - iput(root_inode); pde_put(&proc_root); return -ENOMEM; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 292577531ad1..c44efe19798f 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -53,9 +53,12 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); -extern const struct file_operations proc_maps_operations; -extern const struct file_operations proc_numa_maps_operations; -extern const struct file_operations proc_smaps_operations; +extern const struct file_operations proc_pid_maps_operations; +extern const struct file_operations proc_tid_maps_operations; +extern const struct file_operations proc_pid_numa_maps_operations; +extern const struct file_operations proc_tid_numa_maps_operations; +extern const struct file_operations proc_pid_smaps_operations; +extern const struct file_operations proc_tid_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d245cb23dd72..e5e69aff6c69 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -513,7 +513,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) n = copy_to_user(buffer, (char *)start, tsz); /* - * We cannot distingush between fault on source + * We cannot distinguish between fault on source * and fault on destination. When this happens * we clear too and hope it will trigger the * EFAULT again. diff --git a/fs/proc/page.c b/fs/proc/page.c index 6d8e6a9e93ab..7fcd0d60a968 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,6 +115,8 @@ u64 stable_page_flags(struct page *page) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; + else if (PageTransCompound(page)) + u |= 1 << KPF_THP; /* * Caveats on high order pages: page->_count will only be set diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a6b62173d4c3..67bbf6e4e197 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -6,7 +6,9 @@ #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/security.h> +#include <linux/sched.h> #include <linux/namei.h> +#include <linux/mm.h> #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7dcd2a250495..9694cc283511 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file, return ret; } -static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) +static void +show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; int len; + const char *name = NULL; if (file) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; @@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) if (file) { pad_len_spaces(m, len); seq_path(m, &file->f_path, "\n"); - } else { - const char *name = arch_vma_name(vma); - if (!name) { - if (mm) { - if (vma->vm_start <= mm->brk && - vma->vm_end >= mm->start_brk) { - name = "[heap]"; - } else if (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack) { - name = "[stack]"; - } + goto done; + } + + name = arch_vma_name(vma); + if (!name) { + pid_t tid; + + if (!mm) { + name = "[vdso]"; + goto done; + } + + if (vma->vm_start <= mm->brk && + vma->vm_end >= mm->start_brk) { + name = "[heap]"; + goto done; + } + + tid = vm_is_stack(task, vma, is_pid); + + if (tid != 0) { + /* + * Thread stack in /proc/PID/task/TID/maps or + * the main process stack. + */ + if (!is_pid || (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack)) { + name = "[stack]"; } else { - name = "[vdso]"; + /* Thread stack in /proc/PID/maps */ + pad_len_spaces(m, len); + seq_printf(m, "[stack:%d]", tid); } } - if (name) { - pad_len_spaces(m, len); - seq_puts(m, name); - } + } + +done: + if (name) { + pad_len_spaces(m, len); + seq_puts(m, name); } seq_putc(m, '\n'); } -static int show_map(struct seq_file *m, void *v) +static int show_map(struct seq_file *m, void *v, int is_pid) { struct vm_area_struct *vma = v; struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; - show_map_vma(m, vma); + show_map_vma(m, vma, is_pid); if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) @@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v) return 0; } +static int show_pid_map(struct seq_file *m, void *v) +{ + return show_map(m, v, 1); +} + +static int show_tid_map(struct seq_file *m, void *v) +{ + return show_map(m, v, 0); +} + static const struct seq_operations proc_pid_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, - .show = show_map + .show = show_pid_map }; -static int maps_open(struct inode *inode, struct file *file) +static const struct seq_operations proc_tid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_tid_map +}; + +static int pid_maps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_pid_maps_op); } -const struct file_operations proc_maps_operations = { - .open = maps_open, +static int tid_maps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_tid_maps_op); +} + +const struct file_operations proc_pid_maps_operations = { + .open = pid_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +const struct file_operations proc_tid_maps_operations = { + .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, @@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - spin_lock(&walk->mm->page_table_lock); - if (pmd_trans_huge(*pmd)) { - if (pmd_trans_splitting(*pmd)) { - spin_unlock(&walk->mm->page_table_lock); - wait_split_huge_page(vma->anon_vma, pmd); - } else { - smaps_pte_entry(*(pte_t *)pmd, addr, - HPAGE_PMD_SIZE, walk); - spin_unlock(&walk->mm->page_table_lock); - mss->anonymous_thp += HPAGE_PMD_SIZE; - return 0; - } - } else { + if (pmd_trans_huge_lock(pmd, vma) == 1) { + smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); spin_unlock(&walk->mm->page_table_lock); + mss->anonymous_thp += HPAGE_PMD_SIZE; + return 0; } + + if (pmd_trans_unstable(pmd)) + return 0; /* * The mmap_sem held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things @@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } -static int show_smap(struct seq_file *m, void *v) +static int show_smap(struct seq_file *m, void *v, int is_pid) { struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; @@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v) if (vma->vm_mm && !is_vm_hugetlb_page(vma)) walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); - show_map_vma(m, vma); + show_map_vma(m, vma, is_pid); seq_printf(m, "Size: %8lu kB\n" @@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v) return 0; } +static int show_pid_smap(struct seq_file *m, void *v) +{ + return show_smap(m, v, 1); +} + +static int show_tid_smap(struct seq_file *m, void *v) +{ + return show_smap(m, v, 0); +} + static const struct seq_operations proc_pid_smaps_op = { .start = m_start, .next = m_next, .stop = m_stop, - .show = show_smap + .show = show_pid_smap +}; + +static const struct seq_operations proc_tid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_tid_smap }; -static int smaps_open(struct inode *inode, struct file *file) +static int pid_smaps_open(struct inode *inode, struct file *file) { return do_maps_open(inode, file, &proc_pid_smaps_op); } -const struct file_operations proc_smaps_operations = { - .open = smaps_open, +static int tid_smaps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_tid_smaps_op); +} + +const struct file_operations proc_pid_smaps_operations = { + .open = pid_smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +const struct file_operations proc_tid_smaps_operations = { + .open = tid_smaps_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, @@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, struct page *page; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -598,11 +677,18 @@ const struct file_operations proc_clear_refs_operations = { .llseek = noop_llseek, }; +typedef struct { + u64 pme; +} pagemap_entry_t; + struct pagemapread { int pos, len; - u64 *buffer; + pagemap_entry_t *buffer; }; +#define PAGEMAP_WALK_SIZE (PMD_SIZE) +#define PAGEMAP_WALK_MASK (PMD_MASK) + #define PM_ENTRY_BYTES sizeof(u64) #define PM_STATUS_BITS 3 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) @@ -620,10 +706,15 @@ struct pagemapread { #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) #define PM_END_OF_BUFFER 1 -static int add_to_pagemap(unsigned long addr, u64 pfn, +static inline pagemap_entry_t make_pme(u64 val) +{ + return (pagemap_entry_t) { .pme = val }; +} + +static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, struct pagemapread *pm) { - pm->buffer[pm->pos++] = pfn; + pm->buffer[pm->pos++] = *pme; if (pm->pos >= pm->len) return PM_END_OF_BUFFER; return 0; @@ -635,8 +726,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + for (addr = start; addr < end; addr += PAGE_SIZE) { - err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); + err = add_to_pagemap(addr, &pme, pm); if (err) break; } @@ -649,17 +742,35 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -static u64 pte_to_pagemap_entry(pte_t pte) +static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte) { - u64 pme = 0; if (is_swap_pte(pte)) - pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; + *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP); else if (pte_present(pte)) - pme = PM_PFRAME(pte_pfn(pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; - return pme; + *pme = make_pme(PM_PFRAME(pte_pfn(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, + pmd_t pmd, int offset) +{ + /* + * Currently pmd for thp is always present because thp can not be + * swapped-out, migrated, or HWPOISONed (split in such cases instead.) + * This if-check is just to prepare for future implementation. + */ + if (pmd_present(pmd)) + *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); } +#else +static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, + pmd_t pmd, int offset) +{ +} +#endif static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -668,13 +779,30 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); - split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge_lock(pmd, vma) == 1) { + for (; addr != end; addr += PAGE_SIZE) { + unsigned long offset; + + offset = (addr & ~PAGEMAP_WALK_MASK) >> + PAGE_SHIFT; + thp_pmd_to_pagemap_entry(&pme, *pmd, offset); + err = add_to_pagemap(addr, &pme, pm); + if (err) + break; + } + spin_unlock(&walk->mm->page_table_lock); + return err; + } + for (; addr != end; addr += PAGE_SIZE) { - u64 pfn = PM_NOT_PRESENT; /* check to see if we've left 'vma' behind * and need a new, higher one */ @@ -686,11 +814,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { pte = pte_offset_map(pmd, addr); - pfn = pte_to_pagemap_entry(*pte); + pte_to_pagemap_entry(&pme, *pte); /* unmap before userspace copy */ pte_unmap(pte); } - err = add_to_pagemap(addr, pfn, pm); + err = add_to_pagemap(addr, &pme, pm); if (err) return err; } @@ -701,13 +829,12 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } #ifdef CONFIG_HUGETLB_PAGE -static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) +static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, + pte_t pte, int offset) { - u64 pme = 0; if (pte_present(pte)) - pme = PM_PFRAME(pte_pfn(pte) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; - return pme; + *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); } /* This function walks within one hugetlb entry in the single call */ @@ -717,12 +844,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, { struct pagemapread *pm = walk->private; int err = 0; - u64 pfn; + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - pfn = huge_pte_to_pagemap_entry(*pte, offset); - err = add_to_pagemap(addr, pfn, pm); + huge_pte_to_pagemap_entry(&pme, *pte, offset); + err = add_to_pagemap(addr, &pme, pm); if (err) return err; } @@ -757,8 +884,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, * determine which areas of memory are actually mapped and llseek to * skip over unmapped regions. */ -#define PAGEMAP_WALK_SIZE (PMD_SIZE) -#define PAGEMAP_WALK_MASK (PMD_MASK) static ssize_t pagemap_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -941,26 +1066,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *pte; md = walk->private; - spin_lock(&walk->mm->page_table_lock); - if (pmd_trans_huge(*pmd)) { - if (pmd_trans_splitting(*pmd)) { - spin_unlock(&walk->mm->page_table_lock); - wait_split_huge_page(md->vma->anon_vma, pmd); - } else { - pte_t huge_pte = *(pte_t *)pmd; - struct page *page; - - page = can_gather_numa_stats(huge_pte, md->vma, addr); - if (page) - gather_stats(page, md, pte_dirty(huge_pte), - HPAGE_PMD_SIZE/PAGE_SIZE); - spin_unlock(&walk->mm->page_table_lock); - return 0; - } - } else { + + if (pmd_trans_huge_lock(pmd, md->vma) == 1) { + pte_t huge_pte = *(pte_t *)pmd; + struct page *page; + + page = can_gather_numa_stats(huge_pte, md->vma, addr); + if (page) + gather_stats(page, md, pte_dirty(huge_pte), + HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(&walk->mm->page_table_lock); + return 0; } + if (pmd_trans_unstable(pmd)) + return 0; orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { struct page *page = can_gather_numa_stats(*pte, md->vma, addr); @@ -1002,7 +1122,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, /* * Display pages allocated per node and memory policy via /proc. */ -static int show_numa_map(struct seq_file *m, void *v) +static int show_numa_map(struct seq_file *m, void *v, int is_pid) { struct numa_maps_private *numa_priv = m->private; struct proc_maps_private *proc_priv = &numa_priv->proc_maps; @@ -1039,9 +1159,19 @@ static int show_numa_map(struct seq_file *m, void *v) seq_path(m, &file->f_path, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_printf(m, " heap"); - } else if (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack) { - seq_printf(m, " stack"); + } else { + pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); + if (tid != 0) { + /* + * Thread stack in /proc/PID/task/TID/maps or + * the main process stack. + */ + if (!is_pid || (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack)) + seq_printf(m, " stack"); + else + seq_printf(m, " stack:%d", tid); + } } if (is_vm_hugetlb_page(vma)) @@ -1084,21 +1214,39 @@ out: return 0; } +static int show_pid_numa_map(struct seq_file *m, void *v) +{ + return show_numa_map(m, v, 1); +} + +static int show_tid_numa_map(struct seq_file *m, void *v) +{ + return show_numa_map(m, v, 0); +} + static const struct seq_operations proc_pid_numa_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_numa_map, + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_pid_numa_map, +}; + +static const struct seq_operations proc_tid_numa_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_tid_numa_map, }; -static int numa_maps_open(struct inode *inode, struct file *file) +static int numa_maps_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) { struct numa_maps_private *priv; int ret = -ENOMEM; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv) { priv->proc_maps.pid = proc_pid(inode); - ret = seq_open(file, &proc_pid_numa_maps_op); + ret = seq_open(file, ops); if (!ret) { struct seq_file *m = file->private_data; m->private = priv; @@ -1109,8 +1257,25 @@ static int numa_maps_open(struct inode *inode, struct file *file) return ret; } -const struct file_operations proc_numa_maps_operations = { - .open = numa_maps_open, +static int pid_numa_maps_open(struct inode *inode, struct file *file) +{ + return numa_maps_open(inode, file, &proc_pid_numa_maps_op); +} + +static int tid_numa_maps_open(struct inode *inode, struct file *file) +{ + return numa_maps_open(inode, file, &proc_tid_numa_maps_op); +} + +const struct file_operations proc_pid_numa_maps_operations = { + .open = pid_numa_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +const struct file_operations proc_tid_numa_maps_operations = { + .open = tid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 980de547c070..74fe164d1b23 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -134,9 +134,11 @@ static void pad_len_spaces(struct seq_file *m, int len) /* * display a single VMA to a sequenced file */ -static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) +static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, + int is_pid) { struct mm_struct *mm = vma->vm_mm; + struct proc_maps_private *priv = m->private; unsigned long ino = 0; struct file *file; dev_t dev = 0; @@ -168,10 +170,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) pad_len_spaces(m, len); seq_path(m, &file->f_path, ""); } else if (mm) { - if (vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack) { + pid_t tid = vm_is_stack(priv->task, vma, is_pid); + + if (tid != 0) { pad_len_spaces(m, len); - seq_puts(m, "[stack]"); + /* + * Thread stack in /proc/PID/task/TID/maps or + * the main process stack. + */ + if (!is_pid || (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack)) + seq_printf(m, "[stack]"); + else + seq_printf(m, "[stack:%d]", tid); } } @@ -182,11 +193,22 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) /* * display mapping lines for a particular process's /proc/pid/maps */ -static int show_map(struct seq_file *m, void *_p) +static int show_map(struct seq_file *m, void *_p, int is_pid) { struct rb_node *p = _p; - return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); + return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb), + is_pid); +} + +static int show_pid_map(struct seq_file *m, void *_p) +{ + return show_map(m, _p, 1); +} + +static int show_tid_map(struct seq_file *m, void *_p) +{ + return show_map(m, _p, 0); } static void *m_start(struct seq_file *m, loff_t *pos) @@ -240,10 +262,18 @@ static const struct seq_operations proc_pid_maps_ops = { .start = m_start, .next = m_next, .stop = m_stop, - .show = show_map + .show = show_pid_map +}; + +static const struct seq_operations proc_tid_maps_ops = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_tid_map }; -static int maps_open(struct inode *inode, struct file *file) +static int maps_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) { struct proc_maps_private *priv; int ret = -ENOMEM; @@ -251,7 +281,7 @@ static int maps_open(struct inode *inode, struct file *file) priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv) { priv->pid = proc_pid(inode); - ret = seq_open(file, &proc_pid_maps_ops); + ret = seq_open(file, ops); if (!ret) { struct seq_file *m = file->private_data; m->private = priv; @@ -262,8 +292,25 @@ static int maps_open(struct inode *inode, struct file *file) return ret; } -const struct file_operations proc_maps_operations = { - .open = maps_open, +static int pid_maps_open(struct inode *inode, struct file *file) +{ + return maps_open(inode, file, &proc_pid_maps_ops); +} + +static int tid_maps_open(struct inode *inode, struct file *file) +{ + return maps_open(inode, file, &proc_tid_maps_ops); +} + +const struct file_operations proc_pid_maps_operations = { + .open = pid_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +const struct file_operations proc_tid_maps_operations = { + .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index b0f450a2bb7c..0d5071d29985 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -700,3 +700,26 @@ static int __init vmcore_init(void) return 0; } module_init(vmcore_init) + +/* Cleanup function for vmcore module. */ +void vmcore_cleanup(void) +{ + struct list_head *pos, *next; + + if (proc_vmcore) { + remove_proc_entry(proc_vmcore->name, proc_vmcore->parent); + proc_vmcore = NULL; + } + + /* clear the vmcore list. */ + list_for_each_safe(pos, next, &vmcore_list) { + struct vmcore *m; + + m = list_entry(pos, struct vmcore, list); + list_del(&m->list); + kfree(m); + } + kfree(elfcorebuf); + elfcorebuf = NULL; +} +EXPORT_SYMBOL_GPL(vmcore_cleanup); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index b3b426edb2fd..f37c32b94525 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -278,9 +278,7 @@ fail: int pstore_fill_super(struct super_block *sb, void *data, int silent) { - struct inode *inode = NULL; - struct dentry *root; - int err; + struct inode *inode; save_mount_options(sb, data); @@ -296,26 +294,17 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent) parse_options(data); inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); - if (!inode) { - err = -ENOMEM; - goto fail; - } - /* override ramfs "dir" options so we catch unlink(2) */ - inode->i_op = &pstore_dir_inode_operations; - - root = d_alloc_root(inode); - sb->s_root = root; - if (!root) { - err = -ENOMEM; - goto fail; + if (inode) { + /* override ramfs "dir" options so we catch unlink(2) */ + inode->i_op = &pstore_dir_inode_operations; } + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; pstore_get_records(0); return 0; -fail: - iput(inode); - return err; } static struct dentry *pstore_mount(struct file_system_type *fs_type, diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 6b009548d2e0..552e994e3aa1 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -52,38 +52,6 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) return 0; } -static struct buffer_head *qnx4_getblk(struct inode *inode, int nr, - int create) -{ - struct buffer_head *result = NULL; - - if ( nr >= 0 ) - nr = qnx4_block_map( inode, nr ); - if (nr) { - result = sb_getblk(inode->i_sb, nr); - return result; - } - return NULL; -} - -struct buffer_head *qnx4_bread(struct inode *inode, int block, int create) -{ - struct buffer_head *bh; - - bh = qnx4_getblk(inode, block, create); - if (!bh || buffer_uptodate(bh)) { - return bh; - } - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) { - return bh; - } - brelse(bh); - - return NULL; -} - static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create ) { unsigned long phys; @@ -98,23 +66,31 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h return 0; } +static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset) +{ + u32 size = le32_to_cpu(extent->xtnt_size); + if (*offset < size) + return le32_to_cpu(extent->xtnt_blk) + *offset - 1; + *offset -= size; + return 0; +} + unsigned long qnx4_block_map( struct inode *inode, long iblock ) { int ix; - long offset, i_xblk; - unsigned long block = 0; + long i_xblk; struct buffer_head *bh = NULL; struct qnx4_xblk *xblk = NULL; struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); + u32 offset = iblock; + u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset); - if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { + if (block) { // iblock is in the first extent. This is easy. - block = le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_blk) + iblock - 1; } else { // iblock is beyond first extent. We have to follow the extent chain. i_xblk = le32_to_cpu(qnx4_inode->di_xblk); - offset = iblock - le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size); ix = 0; while ( --nxtnt > 0 ) { if ( ix == 0 ) { @@ -130,12 +106,11 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock ) return -EIO; } } - if ( offset < le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size) ) { + block = try_extent(&xblk->xblk_xtnts[ix], &offset); + if (block) { // got it! - block = le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_blk) + offset - 1; break; } - offset -= le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size); if ( ++ix >= xblk->xblk_num_xtnts ) { i_xblk = le32_to_cpu(xblk->xblk_next_xblk); ix = 0; @@ -260,15 +235,13 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) } ret = -ENOMEM; - s->s_root = d_alloc_root(root); + s->s_root = d_make_root(root); if (s->s_root == NULL) - goto outi; + goto outb; brelse(bh); return 0; - outi: - iput(root); outb: kfree(qs->BitMap); out: @@ -288,44 +261,17 @@ static void qnx4_put_super(struct super_block *sb) return; } -static int qnx4_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page,qnx4_get_block, wbc); -} - static int qnx4_readpage(struct file *file, struct page *page) { return block_read_full_page(page,qnx4_get_block); } -static int qnx4_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); - int ret; - - *pagep = NULL; - ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - qnx4_get_block, - &qnx4_inode->mmu_private); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,qnx4_get_block); } static const struct address_space_operations qnx4_aops = { .readpage = qnx4_readpage, - .writepage = qnx4_writepage, - .write_begin = qnx4_write_begin, - .write_end = generic_write_end, .bmap = qnx4_bmap }; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 275327b5615e..a512c0b30e8e 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -39,10 +39,6 @@ static int qnx4_match(int len, const char *name, } else { namelen = QNX4_SHORT_NAME_MAX; } - /* "" means "." ---> so paths like "/usr/lib//libc.a" work */ - if (!len && (de->di_fname[0] == '.') && (de->di_fname[1] == '\0')) { - return 1; - } thislen = strlen( de->di_fname ); if ( thislen > namelen ) thislen = namelen; @@ -72,7 +68,9 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, block = offset = blkofs = 0; while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { if (!bh) { - bh = qnx4_bread(dir, blkofs, 0); + block = qnx4_block_map(dir, blkofs); + if (block) + bh = sb_bread(dir->i_sb, block); if (!bh) { blkofs++; continue; @@ -80,7 +78,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, } *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset); if (qnx4_match(len, name, bh, &offset)) { - block = qnx4_block_map( dir, blkofs ); *ino = block * QNX4_INODES_PER_BLOCK + (offset / QNX4_DIR_ENTRY_SIZE) - 1; return bh; diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h index 33a60858203b..244d4620189b 100644 --- a/fs/qnx4/qnx4.h +++ b/fs/qnx4/qnx4.h @@ -27,8 +27,6 @@ extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, stru extern unsigned long qnx4_count_free_blocks(struct super_block *sb); extern unsigned long qnx4_block_map(struct inode *inode, long iblock); -extern struct buffer_head *qnx4_bread(struct inode *, int, int); - extern const struct inode_operations qnx4_dir_inode_operations; extern const struct file_operations qnx4_dir_operations; extern int qnx4_is_free(struct super_block *sb, long block); diff --git a/fs/qnx6/Kconfig b/fs/qnx6/Kconfig new file mode 100644 index 000000000000..edbba5c17cc8 --- /dev/null +++ b/fs/qnx6/Kconfig @@ -0,0 +1,26 @@ +config QNX6FS_FS + tristate "QNX6 file system support (read only)" + depends on BLOCK && CRC32 + help + This is the file system used by the real-time operating systems + QNX 6 (also called QNX RTP). + Further information is available at <http://www.qnx.com/>. + Say Y if you intend to mount QNX hard disks or floppies formatted + with a mkqnx6fs. + However, keep in mind that this currently is a readonly driver! + + To compile this file system support as a module, choose M here: the + module will be called qnx6. + + If you don't know whether you need it, then you don't need it: + answer N. + +config QNX6FS_DEBUG + bool "QNX6 debugging information" + depends on QNX6FS_FS + help + Turns on extended debugging output. + + If you are not a developer working on the QNX6FS, you probably don't + want this: + answer N. diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile new file mode 100644 index 000000000000..9dd06199afc9 --- /dev/null +++ b/fs/qnx6/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux qnx4-filesystem routines. +# + +obj-$(CONFIG_QNX6FS_FS) += qnx6.o + +qnx6-objs := inode.o dir.o namei.o super_mmi.o diff --git a/fs/qnx6/README b/fs/qnx6/README new file mode 100644 index 000000000000..116d622026cc --- /dev/null +++ b/fs/qnx6/README @@ -0,0 +1,8 @@ + + This is a snapshot of the QNX6 filesystem for Linux. + Please send diffs and remarks to <chaosman@ontika.net> . + +Credits : + +Al Viro <viro@ZenIV.linux.org.uk> (endless patience with me & support ;)) +Kai Bankett <chaosman@ontika.net> (Maintainer) diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c new file mode 100644 index 000000000000..dc597353db3b --- /dev/null +++ b/fs/qnx6/dir.c @@ -0,0 +1,291 @@ +/* + * QNX6 file system, Linux implementation. + * + * Version : 1.0.0 + * + * History : + * + * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. + * 16-02-2012 pagemap extension by Al Viro + * + */ + +#include "qnx6.h" + +static unsigned qnx6_lfile_checksum(char *name, unsigned size) +{ + unsigned crc = 0; + char *end = name + size; + while (name < end) { + crc = ((crc >> 1) + *(name++)) ^ + ((crc & 0x00000001) ? 0x80000000 : 0); + } + return crc; +} + +static struct page *qnx6_get_page(struct inode *dir, unsigned long n) +{ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_mapping_page(mapping, n, NULL); + if (!IS_ERR(page)) + kmap(page); + return page; +} + +static inline unsigned long dir_pages(struct inode *inode) +{ + return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; +} + +static unsigned last_entry(struct inode *inode, unsigned long page_nr) +{ + unsigned long last_byte = inode->i_size; + last_byte -= page_nr << PAGE_CACHE_SHIFT; + if (last_byte > PAGE_CACHE_SIZE) + last_byte = PAGE_CACHE_SIZE; + return last_byte / QNX6_DIR_ENTRY_SIZE; +} + +static struct qnx6_long_filename *qnx6_longname(struct super_block *sb, + struct qnx6_long_dir_entry *de, + struct page **p) +{ + struct qnx6_sb_info *sbi = QNX6_SB(sb); + u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */ + u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */ + /* within page */ + u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK; + struct address_space *mapping = sbi->longfile->i_mapping; + struct page *page = read_mapping_page(mapping, n, NULL); + if (IS_ERR(page)) + return ERR_CAST(page); + kmap(*p = page); + return (struct qnx6_long_filename *)(page_address(page) + offs); +} + +static int qnx6_dir_longfilename(struct inode *inode, + struct qnx6_long_dir_entry *de, + void *dirent, loff_t pos, + unsigned de_inode, filldir_t filldir) +{ + struct qnx6_long_filename *lf; + struct super_block *s = inode->i_sb; + struct qnx6_sb_info *sbi = QNX6_SB(s); + struct page *page; + int lf_size; + + if (de->de_size != 0xff) { + /* error - long filename entries always have size 0xff + in direntry */ + printk(KERN_ERR "qnx6: invalid direntry size (%i).\n", + de->de_size); + return 0; + } + lf = qnx6_longname(s, de, &page); + if (IS_ERR(lf)) { + printk(KERN_ERR "qnx6:Error reading longname\n"); + return 0; + } + + lf_size = fs16_to_cpu(sbi, lf->lf_size); + + if (lf_size > QNX6_LONG_NAME_MAX) { + QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname)); + printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size); + qnx6_put_page(page); + return 0; + } + + /* calc & validate longfilename checksum + mmi 3g filesystem does not have that checksum */ + if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) != + qnx6_lfile_checksum(lf->lf_fname, lf_size)) + printk(KERN_INFO "qnx6: long filename checksum error.\n"); + + QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", + lf_size, lf->lf_fname, de_inode)); + if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode, + DT_UNKNOWN) < 0) { + qnx6_put_page(page); + return 0; + } + + qnx6_put_page(page); + /* success */ + return 1; +} + +static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *s = inode->i_sb; + struct qnx6_sb_info *sbi = QNX6_SB(s); + loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1); + unsigned long npages = dir_pages(inode); + unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; + bool done = false; + + if (filp->f_pos >= inode->i_size) + return 0; + + for ( ; !done && n < npages; n++, start = 0) { + struct page *page = qnx6_get_page(inode, n); + int limit = last_entry(inode, n); + struct qnx6_dir_entry *de; + int i = start; + + if (IS_ERR(page)) { + printk(KERN_ERR "qnx6_readdir: read failed\n"); + filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT; + return PTR_ERR(page); + } + de = ((struct qnx6_dir_entry *)page_address(page)) + start; + for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) { + int size = de->de_size; + u32 no_inode = fs32_to_cpu(sbi, de->de_inode); + + if (!no_inode || !size) + continue; + + if (size > QNX6_SHORT_NAME_MAX) { + /* long filename detected + get the filename from long filename + structure / block */ + if (!qnx6_dir_longfilename(inode, + (struct qnx6_long_dir_entry *)de, + dirent, pos, no_inode, + filldir)) { + done = true; + break; + } + } else { + QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" + " inode:%u\n", size, de->de_fname, + no_inode)); + if (filldir(dirent, de->de_fname, size, + pos, no_inode, DT_UNKNOWN) + < 0) { + done = true; + break; + } + } + } + qnx6_put_page(page); + } + filp->f_pos = pos; + return 0; +} + +/* + * check if the long filename is correct. + */ +static unsigned qnx6_long_match(int len, const char *name, + struct qnx6_long_dir_entry *de, struct inode *dir) +{ + struct super_block *s = dir->i_sb; + struct qnx6_sb_info *sbi = QNX6_SB(s); + struct page *page; + int thislen; + struct qnx6_long_filename *lf = qnx6_longname(s, de, &page); + + if (IS_ERR(lf)) + return 0; + + thislen = fs16_to_cpu(sbi, lf->lf_size); + if (len != thislen) { + qnx6_put_page(page); + return 0; + } + if (memcmp(name, lf->lf_fname, len) == 0) { + qnx6_put_page(page); + return fs32_to_cpu(sbi, de->de_inode); + } + qnx6_put_page(page); + return 0; +} + +/* + * check if the filename is correct. + */ +static unsigned qnx6_match(struct super_block *s, int len, const char *name, + struct qnx6_dir_entry *de) +{ + struct qnx6_sb_info *sbi = QNX6_SB(s); + if (memcmp(name, de->de_fname, len) == 0) + return fs32_to_cpu(sbi, de->de_inode); + return 0; +} + + +unsigned qnx6_find_entry(int len, struct inode *dir, const char *name, + struct page **res_page) +{ + struct super_block *s = dir->i_sb; + struct qnx6_inode_info *ei = QNX6_I(dir); + struct page *page = NULL; + unsigned long start, n; + unsigned long npages = dir_pages(dir); + unsigned ino; + struct qnx6_dir_entry *de; + struct qnx6_long_dir_entry *lde; + + *res_page = NULL; + + if (npages == 0) + return 0; + start = ei->i_dir_start_lookup; + if (start >= npages) + start = 0; + n = start; + + do { + page = qnx6_get_page(dir, n); + if (!IS_ERR(page)) { + int limit = last_entry(dir, n); + int i; + + de = (struct qnx6_dir_entry *)page_address(page); + for (i = 0; i < limit; i++, de++) { + if (len <= QNX6_SHORT_NAME_MAX) { + /* short filename */ + if (len != de->de_size) + continue; + ino = qnx6_match(s, len, name, de); + if (ino) + goto found; + } else if (de->de_size == 0xff) { + /* deal with long filename */ + lde = (struct qnx6_long_dir_entry *)de; + ino = qnx6_long_match(len, + name, lde, dir); + if (ino) + goto found; + } else + printk(KERN_ERR "qnx6: undefined " + "filename size in inode.\n"); + } + qnx6_put_page(page); + } + + if (++n >= npages) + n = 0; + } while (n != start); + return 0; + +found: + *res_page = page; + ei->i_dir_start_lookup = n; + return ino; +} + +const struct file_operations qnx6_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = qnx6_readdir, + .fsync = generic_file_fsync, +}; + +const struct inode_operations qnx6_dir_inode_operations = { + .lookup = qnx6_lookup, +}; diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c new file mode 100644 index 000000000000..e44012dc5645 --- /dev/null +++ b/fs/qnx6/inode.c @@ -0,0 +1,698 @@ +/* + * QNX6 file system, Linux implementation. + * + * Version : 1.0.0 + * + * History : + * + * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. + * 16-02-2012 pagemap extension by Al Viro + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/highuid.h> +#include <linux/pagemap.h> +#include <linux/buffer_head.h> +#include <linux/writeback.h> +#include <linux/statfs.h> +#include <linux/parser.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/crc32.h> +#include <linux/mpage.h> +#include "qnx6.h" + +static const struct super_operations qnx6_sops; + +static void qnx6_put_super(struct super_block *sb); +static struct inode *qnx6_alloc_inode(struct super_block *sb); +static void qnx6_destroy_inode(struct inode *inode); +static int qnx6_remount(struct super_block *sb, int *flags, char *data); +static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf); +static int qnx6_show_options(struct seq_file *seq, struct dentry *root); + +static const struct super_operations qnx6_sops = { + .alloc_inode = qnx6_alloc_inode, + .destroy_inode = qnx6_destroy_inode, + .put_super = qnx6_put_super, + .statfs = qnx6_statfs, + .remount_fs = qnx6_remount, + .show_options = qnx6_show_options, +}; + +static int qnx6_show_options(struct seq_file *seq, struct dentry *root) +{ + struct super_block *sb = root->d_sb; + struct qnx6_sb_info *sbi = QNX6_SB(sb); + + if (sbi->s_mount_opt & QNX6_MOUNT_MMI_FS) + seq_puts(seq, ",mmi_fs"); + return 0; +} + +static int qnx6_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_RDONLY; + return 0; +} + +static unsigned qnx6_get_devblock(struct super_block *sb, __fs32 block) +{ + struct qnx6_sb_info *sbi = QNX6_SB(sb); + return fs32_to_cpu(sbi, block) + sbi->s_blks_off; +} + +static unsigned qnx6_block_map(struct inode *inode, unsigned iblock); + +static int qnx6_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + unsigned phys; + + QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n", + inode->i_ino, (unsigned long)iblock)); + + phys = qnx6_block_map(inode, iblock); + if (phys) { + /* logical block is before EOF */ + map_bh(bh, inode->i_sb, phys); + } + return 0; +} + +static int qnx6_check_blockptr(__fs32 ptr) +{ + if (ptr == ~(__fs32)0) { + printk(KERN_ERR "qnx6: hit unused blockpointer.\n"); + return 0; + } + return 1; +} + +static int qnx6_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, qnx6_get_block); +} + +static int qnx6_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block); +} + +/* + * returns the block number for the no-th element in the tree + * inodebits requred as there are multiple inodes in one inode block + */ +static unsigned qnx6_block_map(struct inode *inode, unsigned no) +{ + struct super_block *s = inode->i_sb; + struct qnx6_sb_info *sbi = QNX6_SB(s); + struct qnx6_inode_info *ei = QNX6_I(inode); + unsigned block = 0; + struct buffer_head *bh; + __fs32 ptr; + int levelptr; + int ptrbits = sbi->s_ptrbits; + int bitdelta; + u32 mask = (1 << ptrbits) - 1; + int depth = ei->di_filelevels; + int i; + + bitdelta = ptrbits * depth; + levelptr = no >> bitdelta; + + if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) { + printk(KERN_ERR "qnx6:Requested file block number (%u) too big.", + no); + return 0; + } + + block = qnx6_get_devblock(s, ei->di_block_ptr[levelptr]); + + for (i = 0; i < depth; i++) { + bh = sb_bread(s, block); + if (!bh) { + printk(KERN_ERR "qnx6:Error reading block (%u)\n", + block); + return 0; + } + bitdelta -= ptrbits; + levelptr = (no >> bitdelta) & mask; + ptr = ((__fs32 *)bh->b_data)[levelptr]; + + if (!qnx6_check_blockptr(ptr)) + return 0; + + block = qnx6_get_devblock(s, ptr); + brelse(bh); + } + return block; +} + +static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct qnx6_sb_info *sbi = QNX6_SB(sb); + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + + buf->f_type = sb->s_magic; + buf->f_bsize = sb->s_blocksize; + buf->f_blocks = fs32_to_cpu(sbi, sbi->sb->sb_num_blocks); + buf->f_bfree = fs32_to_cpu(sbi, sbi->sb->sb_free_blocks); + buf->f_files = fs32_to_cpu(sbi, sbi->sb->sb_num_inodes); + buf->f_ffree = fs32_to_cpu(sbi, sbi->sb->sb_free_inodes); + buf->f_bavail = buf->f_bfree; + buf->f_namelen = QNX6_LONG_NAME_MAX; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + + return 0; +} + +/* + * Check the root directory of the filesystem to make sure + * it really _is_ a qnx6 filesystem, and to check the size + * of the directory entry. + */ +static const char *qnx6_checkroot(struct super_block *s) +{ + static char match_root[2][3] = {".\0\0", "..\0"}; + int i, error = 0; + struct qnx6_dir_entry *dir_entry; + struct inode *root = s->s_root->d_inode; + struct address_space *mapping = root->i_mapping; + struct page *page = read_mapping_page(mapping, 0, NULL); + if (IS_ERR(page)) + return "error reading root directory"; + kmap(page); + dir_entry = page_address(page); + for (i = 0; i < 2; i++) { + /* maximum 3 bytes - due to match_root limitation */ + if (strncmp(dir_entry[i].de_fname, match_root[i], 3)) + error = 1; + } + qnx6_put_page(page); + if (error) + return "error reading root directory."; + return NULL; +} + +#ifdef CONFIG_QNX6FS_DEBUG +void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s) +{ + struct qnx6_sb_info *sbi = QNX6_SB(s); + + QNX6DEBUG((KERN_INFO "magic: %08x\n", + fs32_to_cpu(sbi, sb->sb_magic))); + QNX6DEBUG((KERN_INFO "checksum: %08x\n", + fs32_to_cpu(sbi, sb->sb_checksum))); + QNX6DEBUG((KERN_INFO "serial: %llx\n", + fs64_to_cpu(sbi, sb->sb_serial))); + QNX6DEBUG((KERN_INFO "flags: %08x\n", + fs32_to_cpu(sbi, sb->sb_flags))); + QNX6DEBUG((KERN_INFO "blocksize: %08x\n", + fs32_to_cpu(sbi, sb->sb_blocksize))); + QNX6DEBUG((KERN_INFO "num_inodes: %08x\n", + fs32_to_cpu(sbi, sb->sb_num_inodes))); + QNX6DEBUG((KERN_INFO "free_inodes: %08x\n", + fs32_to_cpu(sbi, sb->sb_free_inodes))); + QNX6DEBUG((KERN_INFO "num_blocks: %08x\n", + fs32_to_cpu(sbi, sb->sb_num_blocks))); + QNX6DEBUG((KERN_INFO "free_blocks: %08x\n", + fs32_to_cpu(sbi, sb->sb_free_blocks))); + QNX6DEBUG((KERN_INFO "inode_levels: %02x\n", + sb->Inode.levels)); +} +#endif + +enum { + Opt_mmifs, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_mmifs, "mmi_fs"}, + {Opt_err, NULL} +}; + +static int qnx6_parse_options(char *options, struct super_block *sb) +{ + char *p; + struct qnx6_sb_info *sbi = QNX6_SB(sb); + substring_t args[MAX_OPT_ARGS]; + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_mmifs: + set_opt(sbi->s_mount_opt, MMI_FS); + break; + default: + return 0; + } + } + return 1; +} + +static struct buffer_head *qnx6_check_first_superblock(struct super_block *s, + int offset, int silent) +{ + struct qnx6_sb_info *sbi = QNX6_SB(s); + struct buffer_head *bh; + struct qnx6_super_block *sb; + + /* Check the superblock signatures + start with the first superblock */ + bh = sb_bread(s, offset); + if (!bh) { + printk(KERN_ERR "qnx6: unable to read the first superblock\n"); + return NULL; + } + sb = (struct qnx6_super_block *)bh->b_data; + if (fs32_to_cpu(sbi, sb->sb_magic) != QNX6_SUPER_MAGIC) { + sbi->s_bytesex = BYTESEX_BE; + if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { + /* we got a big endian fs */ + QNX6DEBUG((KERN_INFO "qnx6: fs got different" + " endianess.\n")); + return bh; + } else + sbi->s_bytesex = BYTESEX_LE; + if (!silent) { + if (offset == 0) { + printk(KERN_ERR "qnx6: wrong signature (magic)" + " in superblock #1.\n"); + } else { + printk(KERN_INFO "qnx6: wrong signature (magic)" + " at position (0x%lx) - will try" + " alternative position (0x0000).\n", + offset * s->s_blocksize); + } + } + brelse(bh); + return NULL; + } + return bh; +} + +static struct inode *qnx6_private_inode(struct super_block *s, + struct qnx6_root_node *p); + +static int qnx6_fill_super(struct super_block *s, void *data, int silent) +{ + struct buffer_head *bh1 = NULL, *bh2 = NULL; + struct qnx6_super_block *sb1 = NULL, *sb2 = NULL; + struct qnx6_sb_info *sbi; + struct inode *root; + const char *errmsg; + struct qnx6_sb_info *qs; + int ret = -EINVAL; + u64 offset; + int bootblock_offset = QNX6_BOOTBLOCK_SIZE; + + qs = kzalloc(sizeof(struct qnx6_sb_info), GFP_KERNEL); + if (!qs) + return -ENOMEM; + s->s_fs_info = qs; + + /* Superblock always is 512 Byte long */ + if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) { + printk(KERN_ERR "qnx6: unable to set blocksize\n"); + goto outnobh; + } + + /* parse the mount-options */ + if (!qnx6_parse_options((char *) data, s)) { + printk(KERN_ERR "qnx6: invalid mount options.\n"); + goto outnobh; + } + if (test_opt(s, MMI_FS)) { + sb1 = qnx6_mmi_fill_super(s, silent); + if (sb1) + goto mmi_success; + else + goto outnobh; + } + sbi = QNX6_SB(s); + sbi->s_bytesex = BYTESEX_LE; + /* Check the superblock signatures + start with the first superblock */ + bh1 = qnx6_check_first_superblock(s, + bootblock_offset / QNX6_SUPERBLOCK_SIZE, silent); + if (!bh1) { + /* try again without bootblock offset */ + bh1 = qnx6_check_first_superblock(s, 0, silent); + if (!bh1) { + printk(KERN_ERR "qnx6: unable to read the first superblock\n"); + goto outnobh; + } + /* seems that no bootblock at partition start */ + bootblock_offset = 0; + } + sb1 = (struct qnx6_super_block *)bh1->b_data; + +#ifdef CONFIG_QNX6FS_DEBUG + qnx6_superblock_debug(sb1, s); +#endif + + /* checksum check - start at byte 8 and end at byte 512 */ + if (fs32_to_cpu(sbi, sb1->sb_checksum) != + crc32_be(0, (char *)(bh1->b_data + 8), 504)) { + printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); + goto out; + } + + /* set new blocksize */ + if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) { + printk(KERN_ERR "qnx6: unable to set blocksize\n"); + goto out; + } + /* blocksize invalidates bh - pull it back in */ + brelse(bh1); + bh1 = sb_bread(s, bootblock_offset >> s->s_blocksize_bits); + if (!bh1) + goto outnobh; + sb1 = (struct qnx6_super_block *)bh1->b_data; + + /* calculate second superblock blocknumber */ + offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) + + (bootblock_offset >> s->s_blocksize_bits) + + (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits); + + /* set bootblock offset */ + sbi->s_blks_off = (bootblock_offset >> s->s_blocksize_bits) + + (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits); + + /* next the second superblock */ + bh2 = sb_bread(s, offset); + if (!bh2) { + printk(KERN_ERR "qnx6: unable to read the second superblock\n"); + goto out; + } + sb2 = (struct qnx6_super_block *)bh2->b_data; + if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) { + if (!silent) + printk(KERN_ERR "qnx6: wrong signature (magic)" + " in superblock #2.\n"); + goto out; + } + + /* checksum check - start at byte 8 and end at byte 512 */ + if (fs32_to_cpu(sbi, sb2->sb_checksum) != + crc32_be(0, (char *)(bh2->b_data + 8), 504)) { + printk(KERN_ERR "qnx6: superblock #2 checksum error\n"); + goto out; + } + + if (fs64_to_cpu(sbi, sb1->sb_serial) >= + fs64_to_cpu(sbi, sb2->sb_serial)) { + /* superblock #1 active */ + sbi->sb_buf = bh1; + sbi->sb = (struct qnx6_super_block *)bh1->b_data; + brelse(bh2); + printk(KERN_INFO "qnx6: superblock #1 active\n"); + } else { + /* superblock #2 active */ + sbi->sb_buf = bh2; + sbi->sb = (struct qnx6_super_block *)bh2->b_data; + brelse(bh1); + printk(KERN_INFO "qnx6: superblock #2 active\n"); + } +mmi_success: + /* sanity check - limit maximum indirect pointer levels */ + if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) { + printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n", + QNX6_PTR_MAX_LEVELS, sb1->Inode.levels); + goto out; + } + if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) { + printk(KERN_ERR "qnx6: too many longfilename levels" + " (max %i, sb %i)\n", + QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels); + goto out; + } + s->s_op = &qnx6_sops; + s->s_magic = QNX6_SUPER_MAGIC; + s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ + + /* ease the later tree level calculations */ + sbi = QNX6_SB(s); + sbi->s_ptrbits = ilog2(s->s_blocksize / 4); + sbi->inodes = qnx6_private_inode(s, &sb1->Inode); + if (!sbi->inodes) + goto out; + sbi->longfile = qnx6_private_inode(s, &sb1->Longfile); + if (!sbi->longfile) + goto out1; + + /* prefetch root inode */ + root = qnx6_iget(s, QNX6_ROOT_INO); + if (IS_ERR(root)) { + printk(KERN_ERR "qnx6: get inode failed\n"); + ret = PTR_ERR(root); + goto out2; + } + + ret = -ENOMEM; + s->s_root = d_make_root(root); + if (!s->s_root) + goto out2; + + ret = -EINVAL; + errmsg = qnx6_checkroot(s); + if (errmsg != NULL) { + if (!silent) + printk(KERN_ERR "qnx6: %s\n", errmsg); + goto out3; + } + return 0; + +out3: + dput(s->s_root); + s->s_root = NULL; +out2: + iput(sbi->longfile); +out1: + iput(sbi->inodes); +out: + if (bh1) + brelse(bh1); + if (bh2) + brelse(bh2); +outnobh: + kfree(qs); + s->s_fs_info = NULL; + return ret; +} + +static void qnx6_put_super(struct super_block *sb) +{ + struct qnx6_sb_info *qs = QNX6_SB(sb); + brelse(qs->sb_buf); + iput(qs->longfile); + iput(qs->inodes); + kfree(qs); + sb->s_fs_info = NULL; + return; +} + +static sector_t qnx6_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping, block, qnx6_get_block); +} +static const struct address_space_operations qnx6_aops = { + .readpage = qnx6_readpage, + .readpages = qnx6_readpages, + .bmap = qnx6_bmap +}; + +static struct inode *qnx6_private_inode(struct super_block *s, + struct qnx6_root_node *p) +{ + struct inode *inode = new_inode(s); + if (inode) { + struct qnx6_inode_info *ei = QNX6_I(inode); + struct qnx6_sb_info *sbi = QNX6_SB(s); + inode->i_size = fs64_to_cpu(sbi, p->size); + memcpy(ei->di_block_ptr, p->ptr, sizeof(p->ptr)); + ei->di_filelevels = p->levels; + inode->i_mode = S_IFREG | S_IRUSR; /* probably wrong */ + inode->i_mapping->a_ops = &qnx6_aops; + } + return inode; +} + +struct inode *qnx6_iget(struct super_block *sb, unsigned ino) +{ + struct qnx6_sb_info *sbi = QNX6_SB(sb); + struct qnx6_inode_entry *raw_inode; + struct inode *inode; + struct qnx6_inode_info *ei; + struct address_space *mapping; + struct page *page; + u32 n, offs; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = QNX6_I(inode); + + inode->i_mode = 0; + + if (ino == 0) { + printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is " + "out of range\n", + sb->s_id, ino); + iget_failed(inode); + return ERR_PTR(-EIO); + } + n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS); + offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS); + mapping = sbi->inodes->i_mapping; + page = read_mapping_page(mapping, n, NULL); + if (IS_ERR(page)) { + printk(KERN_ERR "qnx6: major problem: unable to read inode from " + "dev %s\n", sb->s_id); + iget_failed(inode); + return ERR_CAST(page); + } + kmap(page); + raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs; + + inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode); + inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid); + inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid); + inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size); + inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime); + inode->i_mtime.tv_nsec = 0; + inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_atime); + inode->i_atime.tv_nsec = 0; + inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_ctime); + inode->i_ctime.tv_nsec = 0; + + /* calc blocks based on 512 byte blocksize */ + inode->i_blocks = (inode->i_size + 511) >> 9; + + memcpy(&ei->di_block_ptr, &raw_inode->di_block_ptr, + sizeof(raw_inode->di_block_ptr)); + ei->di_filelevels = raw_inode->di_filelevels; + + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &generic_ro_fops; + inode->i_mapping->a_ops = &qnx6_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &qnx6_dir_inode_operations; + inode->i_fop = &qnx6_dir_operations; + inode->i_mapping->a_ops = &qnx6_aops; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &qnx6_aops; + } else + init_special_inode(inode, inode->i_mode, 0); + qnx6_put_page(page); + unlock_new_inode(inode); + return inode; +} + +static struct kmem_cache *qnx6_inode_cachep; + +static struct inode *qnx6_alloc_inode(struct super_block *sb) +{ + struct qnx6_inode_info *ei; + ei = kmem_cache_alloc(qnx6_inode_cachep, GFP_KERNEL); + if (!ei) + return NULL; + return &ei->vfs_inode; +} + +static void qnx6_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode)); +} + +static void qnx6_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, qnx6_i_callback); +} + +static void init_once(void *foo) +{ + struct qnx6_inode_info *ei = (struct qnx6_inode_info *) foo; + + inode_init_once(&ei->vfs_inode); +} + +static int init_inodecache(void) +{ + qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache", + sizeof(struct qnx6_inode_info), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once); + if (!qnx6_inode_cachep) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(qnx6_inode_cachep); +} + +static struct dentry *qnx6_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super); +} + +static struct file_system_type qnx6_fs_type = { + .owner = THIS_MODULE, + .name = "qnx6", + .mount = qnx6_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_qnx6_fs(void) +{ + int err; + + err = init_inodecache(); + if (err) + return err; + + err = register_filesystem(&qnx6_fs_type); + if (err) { + destroy_inodecache(); + return err; + } + + printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n"); + return 0; +} + +static void __exit exit_qnx6_fs(void) +{ + unregister_filesystem(&qnx6_fs_type); + destroy_inodecache(); +} + +module_init(init_qnx6_fs) +module_exit(exit_qnx6_fs) +MODULE_LICENSE("GPL"); diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c new file mode 100644 index 000000000000..8a97289e04ad --- /dev/null +++ b/fs/qnx6/namei.c @@ -0,0 +1,42 @@ +/* + * QNX6 file system, Linux implementation. + * + * Version : 1.0.0 + * + * History : + * + * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. + * 16-02-2012 pagemap extension by Al Viro + * + */ + +#include "qnx6.h" + +struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + unsigned ino; + struct page *page; + struct inode *foundinode = NULL; + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + + if (len > QNX6_LONG_NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + + ino = qnx6_find_entry(len, dir, name, &page); + if (ino) { + foundinode = qnx6_iget(dir->i_sb, ino); + qnx6_put_page(page); + if (IS_ERR(foundinode)) { + QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> " + " error %ld\n", PTR_ERR(foundinode))); + return ERR_CAST(foundinode); + } + } else { + QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name)); + return NULL; + } + d_add(dentry, foundinode); + return NULL; +} diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h new file mode 100644 index 000000000000..6c5e02a0b6a8 --- /dev/null +++ b/fs/qnx6/qnx6.h @@ -0,0 +1,135 @@ +/* + * QNX6 file system, Linux implementation. + * + * Version : 1.0.0 + * + * History : + * + * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. + * 16-02-2012 page map extension by Al Viro + * + */ + +#include <linux/fs.h> +#include <linux/pagemap.h> + +typedef __u16 __bitwise __fs16; +typedef __u32 __bitwise __fs32; +typedef __u64 __bitwise __fs64; + +#include <linux/qnx6_fs.h> + +#ifdef CONFIG_QNX6FS_DEBUG +#define QNX6DEBUG(X) printk X +#else +#define QNX6DEBUG(X) (void) 0 +#endif + +struct qnx6_sb_info { + struct buffer_head *sb_buf; /* superblock buffer */ + struct qnx6_super_block *sb; /* our superblock */ + int s_blks_off; /* blkoffset fs-startpoint */ + int s_ptrbits; /* indirect pointer bitfield */ + unsigned long s_mount_opt; /* all mount options */ + int s_bytesex; /* holds endianess info */ + struct inode * inodes; + struct inode * longfile; +}; + +struct qnx6_inode_info { + __fs32 di_block_ptr[QNX6_NO_DIRECT_POINTERS]; + __u8 di_filelevels; + __u32 i_dir_start_lookup; + struct inode vfs_inode; +}; + +extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino); +extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd); + +#ifdef CONFIG_QNX6FS_DEBUG +extern void qnx6_superblock_debug(struct qnx6_super_block *, + struct super_block *); +#endif + +extern const struct inode_operations qnx6_dir_inode_operations; +extern const struct file_operations qnx6_dir_operations; + +static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct qnx6_inode_info *QNX6_I(struct inode *inode) +{ + return container_of(inode, struct qnx6_inode_info, vfs_inode); +} + +#define clear_opt(o, opt) (o &= ~(QNX6_MOUNT_##opt)) +#define set_opt(o, opt) (o |= (QNX6_MOUNT_##opt)) +#define test_opt(sb, opt) (QNX6_SB(sb)->s_mount_opt & \ + QNX6_MOUNT_##opt) +enum { + BYTESEX_LE, + BYTESEX_BE, +}; + +static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n) +{ + if (sbi->s_bytesex == BYTESEX_LE) + return le64_to_cpu((__force __le64)n); + else + return be64_to_cpu((__force __be64)n); +} + +static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n) +{ + if (sbi->s_bytesex == BYTESEX_LE) + return (__force __fs64)cpu_to_le64(n); + else + return (__force __fs64)cpu_to_be64(n); +} + +static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n) +{ + if (sbi->s_bytesex == BYTESEX_LE) + return le32_to_cpu((__force __le32)n); + else + return be32_to_cpu((__force __be32)n); +} + +static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n) +{ + if (sbi->s_bytesex == BYTESEX_LE) + return (__force __fs32)cpu_to_le32(n); + else + return (__force __fs32)cpu_to_be32(n); +} + +static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n) +{ + if (sbi->s_bytesex == BYTESEX_LE) + return le16_to_cpu((__force __le16)n); + else + return be16_to_cpu((__force __be16)n); +} + +static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n) +{ + if (sbi->s_bytesex == BYTESEX_LE) + return (__force __fs16)cpu_to_le16(n); + else + return (__force __fs16)cpu_to_be16(n); +} + +extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, + int silent); + +static inline void qnx6_put_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name, + struct page **res_page); diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c new file mode 100644 index 000000000000..29c32cba62d6 --- /dev/null +++ b/fs/qnx6/super_mmi.c @@ -0,0 +1,150 @@ +/* + * QNX6 file system, Linux implementation. + * + * Version : 1.0.0 + * + * History : + * + * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release. + * + */ + +#include <linux/buffer_head.h> +#include <linux/slab.h> +#include <linux/crc32.h> +#include "qnx6.h" + +static void qnx6_mmi_copy_sb(struct qnx6_super_block *qsb, + struct qnx6_mmi_super_block *sb) +{ + qsb->sb_magic = sb->sb_magic; + qsb->sb_checksum = sb->sb_checksum; + qsb->sb_serial = sb->sb_serial; + qsb->sb_blocksize = sb->sb_blocksize; + qsb->sb_num_inodes = sb->sb_num_inodes; + qsb->sb_free_inodes = sb->sb_free_inodes; + qsb->sb_num_blocks = sb->sb_num_blocks; + qsb->sb_free_blocks = sb->sb_free_blocks; + + /* the rest of the superblock is the same */ + memcpy(&qsb->Inode, &sb->Inode, sizeof(sb->Inode)); + memcpy(&qsb->Bitmap, &sb->Bitmap, sizeof(sb->Bitmap)); + memcpy(&qsb->Longfile, &sb->Longfile, sizeof(sb->Longfile)); +} + +struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent) +{ + struct buffer_head *bh1, *bh2 = NULL; + struct qnx6_mmi_super_block *sb1, *sb2; + struct qnx6_super_block *qsb = NULL; + struct qnx6_sb_info *sbi; + __u64 offset; + + /* Check the superblock signatures + start with the first superblock */ + bh1 = sb_bread(s, 0); + if (!bh1) { + printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n"); + return NULL; + } + sb1 = (struct qnx6_mmi_super_block *)bh1->b_data; + sbi = QNX6_SB(s); + if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) { + if (!silent) { + printk(KERN_ERR "qnx6: wrong signature (magic) in" + " superblock #1.\n"); + goto out; + } + } + + /* checksum check - start at byte 8 and end at byte 512 */ + if (fs32_to_cpu(sbi, sb1->sb_checksum) != + crc32_be(0, (char *)(bh1->b_data + 8), 504)) { + printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); + goto out; + } + + /* calculate second superblock blocknumber */ + offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) + QNX6_SUPERBLOCK_AREA / + fs32_to_cpu(sbi, sb1->sb_blocksize); + + /* set new blocksize */ + if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) { + printk(KERN_ERR "qnx6: unable to set blocksize\n"); + goto out; + } + /* blocksize invalidates bh - pull it back in */ + brelse(bh1); + bh1 = sb_bread(s, 0); + if (!bh1) + goto out; + sb1 = (struct qnx6_mmi_super_block *)bh1->b_data; + + /* read second superblock */ + bh2 = sb_bread(s, offset); + if (!bh2) { + printk(KERN_ERR "qnx6: unable to read the second superblock\n"); + goto out; + } + sb2 = (struct qnx6_mmi_super_block *)bh2->b_data; + if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) { + if (!silent) + printk(KERN_ERR "qnx6: wrong signature (magic) in" + " superblock #2.\n"); + goto out; + } + + /* checksum check - start at byte 8 and end at byte 512 */ + if (fs32_to_cpu(sbi, sb2->sb_checksum) + != crc32_be(0, (char *)(bh2->b_data + 8), 504)) { + printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); + goto out; + } + + qsb = kmalloc(sizeof(*qsb), GFP_KERNEL); + if (!qsb) { + printk(KERN_ERR "qnx6: unable to allocate memory.\n"); + goto out; + } + + if (fs64_to_cpu(sbi, sb1->sb_serial) > + fs64_to_cpu(sbi, sb2->sb_serial)) { + /* superblock #1 active */ + qnx6_mmi_copy_sb(qsb, sb1); +#ifdef CONFIG_QNX6FS_DEBUG + qnx6_superblock_debug(qsb, s); +#endif + memcpy(bh1->b_data, qsb, sizeof(struct qnx6_super_block)); + + sbi->sb_buf = bh1; + sbi->sb = (struct qnx6_super_block *)bh1->b_data; + brelse(bh2); + printk(KERN_INFO "qnx6: superblock #1 active\n"); + } else { + /* superblock #2 active */ + qnx6_mmi_copy_sb(qsb, sb2); +#ifdef CONFIG_QNX6FS_DEBUG + qnx6_superblock_debug(qsb, s); +#endif + memcpy(bh2->b_data, qsb, sizeof(struct qnx6_super_block)); + + sbi->sb_buf = bh2; + sbi->sb = (struct qnx6_super_block *)bh2->b_data; + brelse(bh1); + printk(KERN_INFO "qnx6: superblock #2 active\n"); + } + kfree(qsb); + + /* offset for mmi_fs is just SUPERBLOCK_AREA bytes */ + sbi->s_blks_off = QNX6_SUPERBLOCK_AREA / s->s_blocksize; + + /* success */ + return sbi->sb; + +out: + if (bh1 != NULL) + brelse(bh1); + if (bh2 != NULL) + brelse(bh2); + return NULL; +} diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 46741970371b..8b4f12b33f57 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -71,6 +71,7 @@ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/security.h> +#include <linux/sched.h> #include <linux/kmod.h> #include <linux/namei.h> #include <linux/capability.h> diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 7898cd688a00..fc2c4388d126 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, } } +/* Return 1 if 'cmd' will block on frozen filesystem */ +static int quotactl_cmd_write(int cmd) +{ + switch (cmd) { + case Q_GETFMT: + case Q_GETINFO: + case Q_SYNC: + case Q_XGETQSTAT: + case Q_XGETQUOTA: + case Q_XQUOTASYNC: + return 0; + } + return 1; +} + /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon */ -static struct super_block *quotactl_block(const char __user *special) +static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK struct block_device *bdev; @@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special) putname(tmp); if (IS_ERR(bdev)) return ERR_CAST(bdev); - sb = get_super(bdev); + if (quotactl_cmd_write(cmd)) + sb = get_super_thawed(bdev); + else + sb = get_super(bdev); bdput(bdev); if (!sb) return ERR_PTR(-ENODEV); @@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, pathp = &path; } - sb = quotactl_block(special); + sb = quotactl_block(special, cmds); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto out; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index aec766abe3af..a1fdabe21dec 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -209,22 +209,19 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts) int ramfs_fill_super(struct super_block *sb, void *data, int silent) { struct ramfs_fs_info *fsi; - struct inode *inode = NULL; - struct dentry *root; + struct inode *inode; int err; save_mount_options(sb, data); fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); sb->s_fs_info = fsi; - if (!fsi) { - err = -ENOMEM; - goto fail; - } + if (!fsi) + return -ENOMEM; err = ramfs_parse_options(data, &fsi->mount_opts); if (err) - goto fail; + return err; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; @@ -234,24 +231,11 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); - if (!inode) { - err = -ENOMEM; - goto fail; - } - - root = d_alloc_root(inode); - sb->s_root = root; - if (!root) { - err = -ENOMEM; - goto fail; - } + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; return 0; -fail: - kfree(fsi); - sb->s_fs_info = NULL; - iput(inode); - return err; } struct dentry *ramfs_mount(struct file_system_type *fs_type, diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h new file mode 100644 index 000000000000..f096b80e73d8 --- /dev/null +++ b/fs/reiserfs/acl.h @@ -0,0 +1,76 @@ +#include <linux/init.h> +#include <linux/posix_acl.h> + +#define REISERFS_ACL_VERSION 0x0001 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} reiserfs_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} reiserfs_acl_entry_short; + +typedef struct { + __le32 a_version; +} reiserfs_acl_header; + +static inline size_t reiserfs_acl_size(int count) +{ + if (count <= 4) { + return sizeof(reiserfs_acl_header) + + count * sizeof(reiserfs_acl_entry_short); + } else { + return sizeof(reiserfs_acl_header) + + 4 * sizeof(reiserfs_acl_entry_short) + + (count - 4) * sizeof(reiserfs_acl_entry); + } +} + +static inline int reiserfs_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(reiserfs_acl_header); + s = size - 4 * sizeof(reiserfs_acl_entry_short); + if (s < 0) { + if (size % sizeof(reiserfs_acl_entry_short)) + return -1; + return size / sizeof(reiserfs_acl_entry_short); + } else { + if (s % sizeof(reiserfs_acl_entry)) + return -1; + return s / sizeof(reiserfs_acl_entry) + 4; + } +} + +#ifdef CONFIG_REISERFS_FS_POSIX_ACL +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); +int reiserfs_acl_chmod(struct inode *inode); +int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, + struct inode *dir, struct dentry *dentry, + struct inode *inode); +int reiserfs_cache_default_acl(struct inode *dir); +extern const struct xattr_handler reiserfs_posix_acl_default_handler; +extern const struct xattr_handler reiserfs_posix_acl_access_handler; + +#else + +#define reiserfs_cache_default_acl(inode) 0 +#define reiserfs_get_acl NULL + +static inline int reiserfs_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int +reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, + const struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + return 0; +} +#endif diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 70de42f09f1d..4c0c7d163d15 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -4,14 +4,12 @@ /* Reiserfs block (de)allocator, bitmap-based. */ #include <linux/time.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/errno.h> #include <linux/buffer_head.h> #include <linux/kernel.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> -#include <linux/reiserfs_fs_sb.h> -#include <linux/reiserfs_fs_i.h> #include <linux/quotaops.h> #include <linux/seq_file.h> diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 133e9355dc6f..66c53b642a88 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -5,7 +5,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/fs.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/stat.h> #include <linux/buffer_head.h> #include <linux/slab.h> diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 60c080440661..2b7882b508db 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -17,7 +17,7 @@ #include <asm/uaccess.h> #include <linux/time.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/buffer_head.h> #include <linux/kernel.h> diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index ace635053a36..8375c922c0d5 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -3,9 +3,9 @@ */ #include <linux/time.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_acl.h> -#include <linux/reiserfs_xattr.h> +#include "reiserfs.h" +#include "acl.h" +#include "xattr.h" #include <asm/uaccess.h> #include <linux/pagemap.h> #include <linux/swap.h> diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 1e4250bc3a6f..430e0658704c 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -37,7 +37,7 @@ #include <linux/time.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/buffer_head.h> /* To make any changes in the tree we find a node, that contains item diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c index 6471c670743e..91b0cc1242a2 100644 --- a/fs/reiserfs/hashes.c +++ b/fs/reiserfs/hashes.c @@ -19,7 +19,7 @@ // #include <linux/kernel.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <asm/types.h> #define DELTA 0x9E3779B9 diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c index 2074fd95046b..e1978fd895f5 100644 --- a/fs/reiserfs/ibalance.c +++ b/fs/reiserfs/ibalance.c @@ -5,7 +5,7 @@ #include <asm/uaccess.h> #include <linux/string.h> #include <linux/time.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/buffer_head.h> /* this is one and only function that is used outside (do_balance.c) */ diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9e8cd5acd79c..494c315c7417 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -4,9 +4,9 @@ #include <linux/time.h> #include <linux/fs.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_acl.h> -#include <linux/reiserfs_xattr.h> +#include "reiserfs.h" +#include "acl.h" +#include "xattr.h" #include <linux/exportfs.h> #include <linux/pagemap.h> #include <linux/highmem.h> diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 950e3d1b5c9e..0c2185042d5f 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -5,7 +5,7 @@ #include <linux/capability.h> #include <linux/fs.h> #include <linux/mount.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/time.h> #include <asm/uaccess.h> #include <linux/pagemap.h> diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index 72cb1cc51b87..ee382ef3d300 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -3,7 +3,7 @@ */ #include <linux/time.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" // this contains item handlers for old item types: sd, direct, // indirect, directory diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c3cf54fd4de3..cf9f4de00a95 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -37,7 +37,7 @@ #include <linux/time.h> #include <linux/semaphore.h> #include <linux/vmalloc.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fcntl.h> diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 03d85cbf90bf..79e5a8b4c226 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -5,7 +5,7 @@ #include <asm/uaccess.h> #include <linux/string.h> #include <linux/time.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/buffer_head.h> /* these are used in do_balance.c */ @@ -975,7 +975,7 @@ static int leaf_cut_entries(struct buffer_head *bh, remove */ RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); RFALSE(I_ENTRY_COUNT(ih) < from + del_count, - "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", + "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", I_ENTRY_COUNT(ih), from, del_count); if (del_count == 0) diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c index 7df1ce48203a..d735bc8470e3 100644 --- a/fs/reiserfs/lock.c +++ b/fs/reiserfs/lock.c @@ -1,4 +1,4 @@ -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/mutex.h> /* diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 146378865239..84e8a69cee9d 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -14,9 +14,9 @@ #include <linux/time.h> #include <linux/bitops.h> #include <linux/slab.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_acl.h> -#include <linux/reiserfs_xattr.h> +#include "reiserfs.h" +#include "acl.h" +#include "xattr.h" #include <linux/quotaops.h> #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index 3a6de810bd61..f732d6a5251d 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c @@ -5,8 +5,7 @@ #include <linux/string.h> #include <linux/random.h> #include <linux/time.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_fs_sb.h> +#include "reiserfs.h" // find where objectid map starts #define objectid_map(s,rs) (old_format_only (s) ? \ diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 45de98b59466..c0b1112ab7e3 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -4,7 +4,7 @@ #include <linux/time.h> #include <linux/fs.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/string.h> #include <linux/buffer_head.h> @@ -329,7 +329,7 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it pointless complexity): - panics in reiserfs_fs.h have numbers from 1000 to 1999 + panics in reiserfs.h have numbers from 1000 to 1999 super.c 2000 to 2999 preserve.c (unused) 3000 to 3999 bitmap.c 4000 to 4999 diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 7a9981196c1c..2c1ade692cc8 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -12,8 +12,7 @@ #include <linux/time.h> #include <linux/seq_file.h> #include <asm/uaccess.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_fs_sb.h> +#include "reiserfs.h" #include <linux/init.h> #include <linux/proc_fs.h> diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h new file mode 100644 index 000000000000..445d768eea44 --- /dev/null +++ b/fs/reiserfs/reiserfs.h @@ -0,0 +1,2922 @@ +/* + * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details + */ + +#include <linux/reiserfs_fs.h> + +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <asm/unaligned.h> +#include <linux/bitops.h> +#include <linux/proc_fs.h> +#include <linux/buffer_head.h> + +/* the 32 bit compat definitions with int argument */ +#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) +#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION +#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION + +struct reiserfs_journal_list; + +/** bitmasks for i_flags field in reiserfs-specific part of inode */ +typedef enum { + /** this says what format of key do all items (but stat data) of + an object have. If this is set, that format is 3.6 otherwise + - 3.5 */ + i_item_key_version_mask = 0x0001, + /** If this is unset, object has 3.5 stat data, otherwise, it has + 3.6 stat data with 64bit size, 32bit nlink etc. */ + i_stat_data_version_mask = 0x0002, + /** file might need tail packing on close */ + i_pack_on_close_mask = 0x0004, + /** don't pack tail of file */ + i_nopack_mask = 0x0008, + /** If those is set, "safe link" was created for this file during + truncate or unlink. Safe link is used to avoid leakage of disk + space on crash with some files open, but unlinked. */ + i_link_saved_unlink_mask = 0x0010, + i_link_saved_truncate_mask = 0x0020, + i_has_xattr_dir = 0x0040, + i_data_log = 0x0080, +} reiserfs_inode_flags; + +struct reiserfs_inode_info { + __u32 i_key[4]; /* key is still 4 32 bit integers */ + /** transient inode flags that are never stored on disk. Bitmasks + for this field are defined above. */ + __u32 i_flags; + + __u32 i_first_direct_byte; // offset of first byte stored in direct item. + + /* copy of persistent inode flags read from sd_attrs. */ + __u32 i_attrs; + + int i_prealloc_block; /* first unused block of a sequence of unused blocks */ + int i_prealloc_count; /* length of that sequence */ + struct list_head i_prealloc_list; /* per-transaction list of inodes which + * have preallocated blocks */ + + unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks + * for the contents of this directory should be + * displaced */ + + /* we use these for fsync or O_SYNC to decide which transaction + ** needs to be committed in order for this inode to be properly + ** flushed */ + unsigned int i_trans_id; + struct reiserfs_journal_list *i_jl; + atomic_t openers; + struct mutex tailpack; +#ifdef CONFIG_REISERFS_FS_XATTR + struct rw_semaphore i_xattr_sem; +#endif + struct inode vfs_inode; +}; + +typedef enum { + reiserfs_attrs_cleared = 0x00000001, +} reiserfs_super_block_flags; + +/* struct reiserfs_super_block accessors/mutators + * since this is a disk structure, it will always be in + * little endian format. */ +#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) +#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) +#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) +#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v)) +#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block)) +#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v)) + +#define sb_jp_journal_1st_block(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block)) +#define set_sb_jp_journal_1st_block(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v)) +#define sb_jp_journal_dev(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev)) +#define set_sb_jp_journal_dev(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v)) +#define sb_jp_journal_size(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size)) +#define set_sb_jp_journal_size(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v)) +#define sb_jp_journal_trans_max(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max)) +#define set_sb_jp_journal_trans_max(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v)) +#define sb_jp_journal_magic(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic)) +#define set_sb_jp_journal_magic(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v)) +#define sb_jp_journal_max_batch(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch)) +#define set_sb_jp_journal_max_batch(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v)) +#define sb_jp_jourmal_max_commit_age(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age)) +#define set_sb_jp_journal_max_commit_age(sbp,v) \ + ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v)) + +#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize)) +#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v)) +#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize)) +#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v)) +#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize)) +#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v)) +#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state)) +#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v)) +#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state)) +#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) +#define sb_hash_function_code(sbp) \ + (le32_to_cpu((sbp)->s_v1.s_hash_function_code)) +#define set_sb_hash_function_code(sbp,v) \ + ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v)) +#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height)) +#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v)) +#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr)) +#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v)) +#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version)) +#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v)) + +#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count)) +#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v)) + +#define sb_reserved_for_journal(sbp) \ + (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal)) +#define set_sb_reserved_for_journal(sbp,v) \ + ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v)) + +/* LOGGING -- */ + +/* These all interelate for performance. +** +** If the journal block count is smaller than n transactions, you lose speed. +** I don't know what n is yet, I'm guessing 8-16. +** +** typical transaction size depends on the application, how often fsync is +** called, and how many metadata blocks you dirty in a 30 second period. +** The more small files (<16k) you use, the larger your transactions will +** be. +** +** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal +** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough +** to prevent wrapping before dirty meta blocks get to disk. +** +** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal +** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. +** +** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. +** +*/ + +/* don't mess with these for a while */ + /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ +#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ +#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ +#define JOURNAL_HASH_SIZE 8192 +#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ + +/* One of these for every block in every transaction +** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a +** hash of all the in memory transactions. +** next and prev are used by the current transaction (journal_hash). +** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash +** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging +** to a given transaction. +*/ +struct reiserfs_journal_cnode { + struct buffer_head *bh; /* real buffer head */ + struct super_block *sb; /* dev of real buffer head */ + __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ + unsigned long state; + struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ + struct reiserfs_journal_cnode *next; /* next in transaction list */ + struct reiserfs_journal_cnode *prev; /* prev in transaction list */ + struct reiserfs_journal_cnode *hprev; /* prev in hash list */ + struct reiserfs_journal_cnode *hnext; /* next in hash list */ +}; + +struct reiserfs_bitmap_node { + int id; + char *data; + struct list_head list; +}; + +struct reiserfs_list_bitmap { + struct reiserfs_journal_list *journal_list; + struct reiserfs_bitmap_node **bitmaps; +}; + +/* +** one of these for each transaction. The most important part here is the j_realblock. +** this list of cnodes is used to hash all the blocks in all the commits, to mark all the +** real buffer heads dirty once all the commits hit the disk, +** and to make sure every real block in a transaction is on disk before allowing the log area +** to be overwritten */ +struct reiserfs_journal_list { + unsigned long j_start; + unsigned long j_state; + unsigned long j_len; + atomic_t j_nonzerolen; + atomic_t j_commit_left; + atomic_t j_older_commits_done; /* all commits older than this on disk */ + struct mutex j_commit_mutex; + unsigned int j_trans_id; + time_t j_timestamp; + struct reiserfs_list_bitmap *j_list_bitmap; + struct buffer_head *j_commit_bh; /* commit buffer head */ + struct reiserfs_journal_cnode *j_realblock; + struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */ + /* time ordered list of all active transactions */ + struct list_head j_list; + + /* time ordered list of all transactions we haven't tried to flush yet */ + struct list_head j_working_list; + + /* list of tail conversion targets in need of flush before commit */ + struct list_head j_tail_bh_list; + /* list of data=ordered buffers in need of flush before commit */ + struct list_head j_bh_list; + int j_refcount; +}; + +struct reiserfs_journal { + struct buffer_head **j_ap_blocks; /* journal blocks on disk */ + struct reiserfs_journal_cnode *j_last; /* newest journal block */ + struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ + + struct block_device *j_dev_bd; + fmode_t j_dev_mode; + int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ + + unsigned long j_state; + unsigned int j_trans_id; + unsigned long j_mount_id; + unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ + unsigned long j_len; /* length of current waiting commit */ + unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ + atomic_t j_wcount; /* count of writers for current commit */ + unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ + unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ + unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ + struct buffer_head *j_header_bh; + + time_t j_trans_start_time; /* time this transaction started */ + struct mutex j_mutex; + struct mutex j_flush_mutex; + wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ + atomic_t j_jlock; /* lock for j_join_wait */ + int j_list_bitmap_index; /* number of next list bitmap to use */ + int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ + int j_next_full_flush; /* next journal_end will flush all journal list */ + int j_next_async_flush; /* next journal_end will flush all async commits */ + + int j_cnode_used; /* number of cnodes on the used list */ + int j_cnode_free; /* number of cnodes on the free list */ + + unsigned int j_trans_max; /* max number of blocks in a transaction. */ + unsigned int j_max_batch; /* max number of blocks to batch into a trans */ + unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ + unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ + unsigned int j_default_max_commit_age; /* the default for the max commit age */ + + struct reiserfs_journal_cnode *j_cnode_free_list; + struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ + + struct reiserfs_journal_list *j_current_jl; + int j_free_bitmap_nodes; + int j_used_bitmap_nodes; + + int j_num_lists; /* total number of active transactions */ + int j_num_work_lists; /* number that need attention from kreiserfsd */ + + /* debugging to make sure things are flushed in order */ + unsigned int j_last_flush_id; + + /* debugging to make sure things are committed in order */ + unsigned int j_last_commit_id; + + struct list_head j_bitmap_nodes; + struct list_head j_dirty_buffers; + spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */ + + /* list of all active transactions */ + struct list_head j_journal_list; + /* lists that haven't been touched by writeback attempts */ + struct list_head j_working_list; + + struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ + struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ + struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all + the transactions */ + struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ + int j_persistent_trans; + unsigned long j_max_trans_size; + unsigned long j_max_batch_size; + + int j_errno; + + /* when flushing ordered buffers, throttle new ordered writers */ + struct delayed_work j_work; + struct super_block *j_work_sb; + atomic_t j_async_throttle; +}; + +enum journal_state_bits { + J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ + J_WRITERS_QUEUED, /* set when log is full due to too many writers */ + J_ABORTED, /* set when log is aborted */ +}; + +#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ + +typedef __u32(*hashf_t) (const signed char *, int); + +struct reiserfs_bitmap_info { + __u32 free_count; +}; + +struct proc_dir_entry; + +#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) +typedef unsigned long int stat_cnt_t; +typedef struct reiserfs_proc_info_data { + spinlock_t lock; + int exiting; + int max_hash_collisions; + + stat_cnt_t breads; + stat_cnt_t bread_miss; + stat_cnt_t search_by_key; + stat_cnt_t search_by_key_fs_changed; + stat_cnt_t search_by_key_restarted; + + stat_cnt_t insert_item_restarted; + stat_cnt_t paste_into_item_restarted; + stat_cnt_t cut_from_item_restarted; + stat_cnt_t delete_solid_item_restarted; + stat_cnt_t delete_item_restarted; + + stat_cnt_t leaked_oid; + stat_cnt_t leaves_removable; + + /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ + stat_cnt_t balance_at[5]; /* XXX */ + /* sbk == search_by_key */ + stat_cnt_t sbk_read_at[5]; /* XXX */ + stat_cnt_t sbk_fs_changed[5]; + stat_cnt_t sbk_restarted[5]; + stat_cnt_t items_at[5]; /* XXX */ + stat_cnt_t free_at[5]; /* XXX */ + stat_cnt_t can_node_be_removed[5]; /* XXX */ + long int lnum[5]; /* XXX */ + long int rnum[5]; /* XXX */ + long int lbytes[5]; /* XXX */ + long int rbytes[5]; /* XXX */ + stat_cnt_t get_neighbors[5]; + stat_cnt_t get_neighbors_restart[5]; + stat_cnt_t need_l_neighbor[5]; + stat_cnt_t need_r_neighbor[5]; + + stat_cnt_t free_block; + struct __scan_bitmap_stats { + stat_cnt_t call; + stat_cnt_t wait; + stat_cnt_t bmap; + stat_cnt_t retry; + stat_cnt_t in_journal_hint; + stat_cnt_t in_journal_nohint; + stat_cnt_t stolen; + } scan_bitmap; + struct __journal_stats { + stat_cnt_t in_journal; + stat_cnt_t in_journal_bitmap; + stat_cnt_t in_journal_reusable; + stat_cnt_t lock_journal; + stat_cnt_t lock_journal_wait; + stat_cnt_t journal_being; + stat_cnt_t journal_relock_writers; + stat_cnt_t journal_relock_wcount; + stat_cnt_t mark_dirty; + stat_cnt_t mark_dirty_already; + stat_cnt_t mark_dirty_notjournal; + stat_cnt_t restore_prepared; + stat_cnt_t prepare; + stat_cnt_t prepare_retry; + } journal; +} reiserfs_proc_info_data_t; +#else +typedef struct reiserfs_proc_info_data { +} reiserfs_proc_info_data_t; +#endif + +/* reiserfs union of in-core super block data */ +struct reiserfs_sb_info { + struct buffer_head *s_sbh; /* Buffer containing the super block */ + /* both the comment and the choice of + name are unclear for s_rs -Hans */ + struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ + struct reiserfs_bitmap_info *s_ap_bitmap; + struct reiserfs_journal *s_journal; /* pointer to journal information */ + unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ + + /* Serialize writers access, replace the old bkl */ + struct mutex lock; + /* Owner of the lock (can be recursive) */ + struct task_struct *lock_owner; + /* Depth of the lock, start from -1 like the bkl */ + int lock_depth; + + /* Comment? -Hans */ + void (*end_io_handler) (struct buffer_head *, int); + hashf_t s_hash_function; /* pointer to function which is used + to sort names in directory. Set on + mount */ + unsigned long s_mount_opt; /* reiserfs's mount options are set + here (currently - NOTAIL, NOLOG, + REPLAYONLY) */ + + struct { /* This is a structure that describes block allocator options */ + unsigned long bits; /* Bitfield for enable/disable kind of options */ + unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ + int border; /* percentage of disk, border takes */ + int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ + int preallocsize; /* Number of blocks we try to prealloc when file + reaches preallocmin size (in blocks) or + prealloc_list is empty. */ + } s_alloc_options; + + /* Comment? -Hans */ + wait_queue_head_t s_wait; + /* To be obsoleted soon by per buffer seals.. -Hans */ + atomic_t s_generation_counter; // increased by one every time the + // tree gets re-balanced + unsigned long s_properties; /* File system properties. Currently holds + on-disk FS format */ + + /* session statistics */ + int s_disk_reads; + int s_disk_writes; + int s_fix_nodes; + int s_do_balance; + int s_unneeded_left_neighbor; + int s_good_search_by_key_reada; + int s_bmaps; + int s_bmaps_without_search; + int s_direct2indirect; + int s_indirect2direct; + /* set up when it's ok for reiserfs_read_inode2() to read from + disk inode with nlink==0. Currently this is only used during + finish_unfinished() processing at mount time */ + int s_is_unlinked_ok; + reiserfs_proc_info_data_t s_proc_info_data; + struct proc_dir_entry *procdir; + int reserved_blocks; /* amount of blocks reserved for further allocations */ + spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ + struct dentry *priv_root; /* root of /.reiserfs_priv */ + struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ + int j_errno; +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; +#endif + char *s_jdev; /* Stored jdev for mount option showing */ +#ifdef CONFIG_REISERFS_CHECK + + struct tree_balance *cur_tb; /* + * Detects whether more than one + * copy of tb exists per superblock + * as a means of checking whether + * do_balance is executing concurrently + * against another tree reader/writer + * on a same mount point. + */ +#endif +}; + +/* Definitions of reiserfs on-disk properties: */ +#define REISERFS_3_5 0 +#define REISERFS_3_6 1 +#define REISERFS_OLD_FORMAT 2 + +enum reiserfs_mount_options { +/* Mount options */ + REISERFS_LARGETAIL, /* large tails will be created in a session */ + REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ + REPLAYONLY, /* replay journal and return 0. Use by fsck */ + REISERFS_CONVERT, /* -o conv: causes conversion of old + format super block to the new + format. If not specified - old + partition will be dealt with in a + manner of 3.5.x */ + +/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting +** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option +** is not required. If the normal autodection code can't determine which +** hash to use (because both hashes had the same value for a file) +** use this option to force a specific hash. It won't allow you to override +** the existing hash on the FS, so if you have a tea hash disk, and mount +** with -o hash=rupasov, the mount will fail. +*/ + FORCE_TEA_HASH, /* try to force tea hash on mount */ + FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ + FORCE_R5_HASH, /* try to force rupasov hash on mount */ + FORCE_HASH_DETECT, /* try to detect hash function on mount */ + + REISERFS_DATA_LOG, + REISERFS_DATA_ORDERED, + REISERFS_DATA_WRITEBACK, + +/* used for testing experimental features, makes benchmarking new + features with and without more convenient, should never be used by + users in any code shipped to users (ideally) */ + + REISERFS_NO_BORDER, + REISERFS_NO_UNHASHED_RELOCATION, + REISERFS_HASHED_RELOCATION, + REISERFS_ATTRS, + REISERFS_XATTRS_USER, + REISERFS_POSIXACL, + REISERFS_EXPOSE_PRIVROOT, + REISERFS_BARRIER_NONE, + REISERFS_BARRIER_FLUSH, + + /* Actions on error */ + REISERFS_ERROR_PANIC, + REISERFS_ERROR_RO, + REISERFS_ERROR_CONTINUE, + + REISERFS_USRQUOTA, /* User quota option specified */ + REISERFS_GRPQUOTA, /* Group quota option specified */ + + REISERFS_TEST1, + REISERFS_TEST2, + REISERFS_TEST3, + REISERFS_TEST4, + REISERFS_UNSUPPORTED_OPT, +}; + +#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH)) +#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH)) +#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH)) +#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT)) +#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER)) +#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) +#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) +#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) + +#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL)) +#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL)) +#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) +#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS)) +#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) +#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT)) +#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) +#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) +#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) +#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) +#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) +#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) +#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) +#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) +#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) + +#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC)) +#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO)) + +void reiserfs_file_buffer(struct buffer_head *bh, int list); +extern struct file_system_type reiserfs_fs_type; +int reiserfs_resize(struct super_block *, unsigned long); + +#define CARRY_ON 0 +#define SCHEDULE_OCCURRED 1 + +#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh) +#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal) +#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) +#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) +#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap) + +#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->) + +/* A safe version of the "bdevname", which returns the "s_id" field of + * a superblock or else "Null superblock" if the super block is NULL. + */ +static inline char *reiserfs_bdevname(struct super_block *s) +{ + return (s == NULL) ? "Null superblock" : s->s_id; +} + +#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal))) +static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal + *journal) +{ + return test_bit(J_ABORTED, &journal->j_state); +} + +/* + * Locking primitives. The write lock is a per superblock + * special mutex that has properties close to the Big Kernel Lock + * which was used in the previous locking scheme. + */ +void reiserfs_write_lock(struct super_block *s); +void reiserfs_write_unlock(struct super_block *s); +int reiserfs_write_lock_once(struct super_block *s); +void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); + +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *s); +#else +static inline void reiserfs_lock_check_recursive(struct super_block *s) { } +#endif + +/* + * Several mutexes depend on the write lock. + * However sometimes we want to relax the write lock while we hold + * these mutexes, according to the release/reacquire on schedule() + * properties of the Bkl that were used. + * Reiserfs performances and locking were based on this scheme. + * Now that the write lock is a mutex and not the bkl anymore, doing so + * may result in a deadlock: + * + * A acquire write_lock + * A acquire j_commit_mutex + * A release write_lock and wait for something + * B acquire write_lock + * B can't acquire j_commit_mutex and sleep + * A can't acquire write lock anymore + * deadlock + * + * What we do here is avoiding such deadlock by playing the same game + * than the Bkl: if we can't acquire a mutex that depends on the write lock, + * we release the write lock, wait a bit and then retry. + * + * The mutexes concerned by this hack are: + * - The commit mutex of a journal list + * - The flush mutex + * - The journal lock + * - The inode mutex + */ +static inline void reiserfs_mutex_lock_safe(struct mutex *m, + struct super_block *s) +{ + reiserfs_lock_check_recursive(s); + reiserfs_write_unlock(s); + mutex_lock(m); + reiserfs_write_lock(s); +} + +static inline void +reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, + struct super_block *s) +{ + reiserfs_lock_check_recursive(s); + reiserfs_write_unlock(s); + mutex_lock_nested(m, subclass); + reiserfs_write_lock(s); +} + +static inline void +reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) +{ + reiserfs_lock_check_recursive(s); + reiserfs_write_unlock(s); + down_read(sem); + reiserfs_write_lock(s); +} + +/* + * When we schedule, we usually want to also release the write lock, + * according to the previous bkl based locking scheme of reiserfs. + */ +static inline void reiserfs_cond_resched(struct super_block *s) +{ + if (need_resched()) { + reiserfs_write_unlock(s); + schedule(); + reiserfs_write_lock(s); + } +} + +struct fid; + +/* in reading the #defines, it may help to understand that they employ + the following abbreviations: + + B = Buffer + I = Item header + H = Height within the tree (should be changed to LEV) + N = Number of the item in the node + STAT = stat data + DEH = Directory Entry Header + EC = Entry Count + E = Entry number + UL = Unsigned Long + BLKH = BLocK Header + UNFM = UNForMatted node + DC = Disk Child + P = Path + + These #defines are named by concatenating these abbreviations, + where first comes the arguments, and last comes the return value, + of the macro. + +*/ + +#define USE_INODE_GENERATION_COUNTER + +#define REISERFS_PREALLOCATE +#define DISPLACE_NEW_PACKING_LOCALITIES +#define PREALLOCATION_SIZE 9 + +/* n must be power of 2 */ +#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) + +// to be ok for alpha and others we have to align structures to 8 byte +// boundary. +// FIXME: do not change 4 by anything else: there is code which relies on that +#define ROUND_UP(x) _ROUND_UP(x,8LL) + +/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug +** messages. +*/ +#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ + +void __reiserfs_warning(struct super_block *s, const char *id, + const char *func, const char *fmt, ...); +#define reiserfs_warning(s, id, fmt, args...) \ + __reiserfs_warning(s, id, __func__, fmt, ##args) +/* assertions handling */ + +/** always check a condition and panic if it's false. */ +#define __RASSERT(cond, scond, format, args...) \ +do { \ + if (!(cond)) \ + reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ + __FILE__ ":%i:%s: " format "\n", \ + in_interrupt() ? -1 : task_pid_nr(current), \ + __LINE__, __func__ , ##args); \ +} while (0) + +#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) + +#if defined( CONFIG_REISERFS_CHECK ) +#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args) +#else +#define RFALSE( cond, format, args... ) do {;} while( 0 ) +#endif + +#define CONSTF __attribute_const__ +/* + * Disk Data Structures + */ + +/***************************************************************************/ +/* SUPER BLOCK */ +/***************************************************************************/ + +/* + * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs + * the version in RAM is part of a larger structure containing fields never written to disk. + */ +#define UNSET_HASH 0 // read_super will guess about, what hash names + // in directories were sorted with +#define TEA_HASH 1 +#define YURA_HASH 2 +#define R5_HASH 3 +#define DEFAULT_HASH R5_HASH + +struct journal_params { + __le32 jp_journal_1st_block; /* where does journal start from on its + * device */ + __le32 jp_journal_dev; /* journal device st_rdev */ + __le32 jp_journal_size; /* size of the journal */ + __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ + __le32 jp_journal_magic; /* random value made on fs creation (this + * was sb_journal_block_count) */ + __le32 jp_journal_max_batch; /* max number of blocks to batch into a + * trans */ + __le32 jp_journal_max_commit_age; /* in seconds, how old can an async + * commit be */ + __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction + * be */ +}; + +/* this is the super from 3.5.X, where X >= 10 */ +struct reiserfs_super_block_v1 { + __le32 s_block_count; /* blocks count */ + __le32 s_free_blocks; /* free blocks count */ + __le32 s_root_block; /* root block number */ + struct journal_params s_journal; + __le16 s_blocksize; /* block size */ + __le16 s_oid_maxsize; /* max size of object id array, see + * get_objectid() commentary */ + __le16 s_oid_cursize; /* current size of object id array */ + __le16 s_umount_state; /* this is set to 1 when filesystem was + * umounted, to 2 - when not */ + char s_magic[10]; /* reiserfs magic string indicates that + * file system is reiserfs: + * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ + __le16 s_fs_state; /* it is set to used by fsck to mark which + * phase of rebuilding is done */ + __le32 s_hash_function_code; /* indicate, what hash function is being use + * to sort names in a directory*/ + __le16 s_tree_height; /* height of disk tree */ + __le16 s_bmap_nr; /* amount of bitmap blocks needed to address + * each block of file system */ + __le16 s_version; /* this field is only reliable on filesystem + * with non-standard journal */ + __le16 s_reserved_for_journal; /* size in blocks of journal area on main + * device, we need to keep after + * making fs with non-standard journal */ +} __attribute__ ((__packed__)); + +#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) + +/* this is the on disk super block */ +struct reiserfs_super_block { + struct reiserfs_super_block_v1 s_v1; + __le32 s_inode_generation; + __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ + unsigned char s_uuid[16]; /* filesystem unique identifier */ + unsigned char s_label[16]; /* filesystem volume label */ + __le16 s_mnt_count; /* Count of mounts since last fsck */ + __le16 s_max_mnt_count; /* Maximum mounts before check */ + __le32 s_lastcheck; /* Timestamp of last fsck */ + __le32 s_check_interval; /* Interval between checks */ + char s_unused[76]; /* zero filled by mkreiserfs and + * reiserfs_convert_objectid_map_v1() + * so any additions must be updated + * there as well. */ +} __attribute__ ((__packed__)); + +#define SB_SIZE (sizeof(struct reiserfs_super_block)) + +#define REISERFS_VERSION_1 0 +#define REISERFS_VERSION_2 2 + +// on-disk super block fields converted to cpu form +#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) +#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) +#define SB_BLOCKSIZE(s) \ + le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) +#define SB_BLOCK_COUNT(s) \ + le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) +#define SB_FREE_BLOCKS(s) \ + le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks)) +#define SB_REISERFS_MAGIC(s) \ + (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) +#define SB_ROOT_BLOCK(s) \ + le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block)) +#define SB_TREE_HEIGHT(s) \ + le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height)) +#define SB_REISERFS_STATE(s) \ + le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state)) +#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version)) +#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr)) + +#define PUT_SB_BLOCK_COUNT(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0) +#define PUT_SB_FREE_BLOCKS(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0) +#define PUT_SB_ROOT_BLOCK(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0) +#define PUT_SB_TREE_HEIGHT(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) +#define PUT_SB_REISERFS_STATE(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) +#define PUT_SB_VERSION(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) +#define PUT_SB_BMAP_NR(s, val) \ + do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) + +#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal) +#define SB_ONDISK_JOURNAL_SIZE(s) \ + le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size)) +#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \ + le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block)) +#define SB_ONDISK_JOURNAL_DEVICE(s) \ + le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev)) +#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \ + le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal)) + +#define is_block_in_log_or_reserved_area(s, block) \ + block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \ + && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \ + ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ + SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) + +int is_reiserfs_3_5(struct reiserfs_super_block *rs); +int is_reiserfs_3_6(struct reiserfs_super_block *rs); +int is_reiserfs_jr(struct reiserfs_super_block *rs); + +/* ReiserFS leaves the first 64k unused, so that partition labels have + enough space. If someone wants to write a fancy bootloader that + needs more than 64k, let us know, and this will be increased in size. + This number must be larger than than the largest block size on any + platform, or code will break. -Hans */ +#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) +#define REISERFS_FIRST_BLOCK unused_define +#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES + +/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ +#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) + +/* reiserfs internal error code (used by search_by_key and fix_nodes)) */ +#define CARRY_ON 0 +#define REPEAT_SEARCH -1 +#define IO_ERROR -2 +#define NO_DISK_SPACE -3 +#define NO_BALANCING_NEEDED (-4) +#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) +#define QUOTA_EXCEEDED -6 + +typedef __u32 b_blocknr_t; +typedef __le32 unp_t; + +struct unfm_nodeinfo { + unp_t unfm_nodenum; + unsigned short unfm_freespace; +}; + +/* there are two formats of keys: 3.5 and 3.6 + */ +#define KEY_FORMAT_3_5 0 +#define KEY_FORMAT_3_6 1 + +/* there are two stat datas */ +#define STAT_DATA_V1 0 +#define STAT_DATA_V2 1 + +static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode) +{ + return container_of(inode, struct reiserfs_inode_info, vfs_inode); +} + +static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) +{ + return sb->s_fs_info; +} + +/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 + * which overflows on large file systems. */ +static inline __u32 reiserfs_bmap_count(struct super_block *sb) +{ + return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; +} + +static inline int bmap_would_wrap(unsigned bmap_nr) +{ + return bmap_nr > ((1LL << 16) - 1); +} + +/** this says about version of key of all items (but stat data) the + object consists of */ +#define get_inode_item_key_version( inode ) \ + ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) + +#define set_inode_item_key_version( inode, version ) \ + ({ if((version)==KEY_FORMAT_3_6) \ + REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \ + else \ + REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; }) + +#define get_inode_sd_version(inode) \ + ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1) + +#define set_inode_sd_version(inode, version) \ + ({ if((version)==STAT_DATA_V2) \ + REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \ + else \ + REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) + +/* This is an aggressive tail suppression policy, I am hoping it + improves our benchmarks. The principle behind it is that percentage + space saving is what matters, not absolute space saving. This is + non-intuitive, but it helps to understand it if you consider that the + cost to access 4 blocks is not much more than the cost to access 1 + block, if you have to do a seek and rotate. A tail risks a + non-linear disk access that is significant as a percentage of total + time cost for a 4 block file and saves an amount of space that is + less significant as a percentage of space, or so goes the hypothesis. + -Hans */ +#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ +(\ + (!(n_tail_size)) || \ + (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ + ( (n_file_size) >= (n_block_size) * 4 ) || \ + ( ( (n_file_size) >= (n_block_size) * 3 ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ + ( ( (n_file_size) >= (n_block_size) * 2 ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ + ( ( (n_file_size) >= (n_block_size) ) && \ + ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ +) + +/* Another strategy for tails, this one means only create a tail if all the + file would fit into one DIRECT item. + Primary intention for this one is to increase performance by decreasing + seeking. +*/ +#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ +(\ + (!(n_tail_size)) || \ + (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ +) + +/* + * values for s_umount_state field + */ +#define REISERFS_VALID_FS 1 +#define REISERFS_ERROR_FS 2 + +// +// there are 5 item types currently +// +#define TYPE_STAT_DATA 0 +#define TYPE_INDIRECT 1 +#define TYPE_DIRECT 2 +#define TYPE_DIRENTRY 3 +#define TYPE_MAXTYPE 3 +#define TYPE_ANY 15 // FIXME: comment is required + +/***************************************************************************/ +/* KEY & ITEM HEAD */ +/***************************************************************************/ + +// +// directories use this key as well as old files +// +struct offset_v1 { + __le32 k_offset; + __le32 k_uniqueness; +} __attribute__ ((__packed__)); + +struct offset_v2 { + __le64 v; +} __attribute__ ((__packed__)); + +static inline __u16 offset_v2_k_type(const struct offset_v2 *v2) +{ + __u8 type = le64_to_cpu(v2->v) >> 60; + return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY; +} + +static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type) +{ + v2->v = + (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60); +} + +static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2) +{ + return le64_to_cpu(v2->v) & (~0ULL >> 4); +} + +static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) +{ + offset &= (~0ULL >> 4); + v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); +} + +/* Key of an item determines its location in the S+tree, and + is composed of 4 components */ +struct reiserfs_key { + __le32 k_dir_id; /* packing locality: by default parent + directory object id */ + __le32 k_objectid; /* object identifier */ + union { + struct offset_v1 k_offset_v1; + struct offset_v2 k_offset_v2; + } __attribute__ ((__packed__)) u; +} __attribute__ ((__packed__)); + +struct in_core_key { + __u32 k_dir_id; /* packing locality: by default parent + directory object id */ + __u32 k_objectid; /* object identifier */ + __u64 k_offset; + __u8 k_type; +}; + +struct cpu_key { + struct in_core_key on_disk_key; + int version; + int key_length; /* 3 in all cases but direct2indirect and + indirect2direct conversion */ +}; + +/* Our function for comparing keys can compare keys of different + lengths. It takes as a parameter the length of the keys it is to + compare. These defines are used in determining what is to be passed + to it as that parameter. */ +#define REISERFS_FULL_KEY_LEN 4 +#define REISERFS_SHORT_KEY_LEN 2 + +/* The result of the key compare */ +#define FIRST_GREATER 1 +#define SECOND_GREATER -1 +#define KEYS_IDENTICAL 0 +#define KEY_FOUND 1 +#define KEY_NOT_FOUND 0 + +#define KEY_SIZE (sizeof(struct reiserfs_key)) +#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32)) + +/* return values for search_by_key and clones */ +#define ITEM_FOUND 1 +#define ITEM_NOT_FOUND 0 +#define ENTRY_FOUND 1 +#define ENTRY_NOT_FOUND 0 +#define DIRECTORY_NOT_FOUND -1 +#define REGULAR_FILE_FOUND -2 +#define DIRECTORY_FOUND -3 +#define BYTE_FOUND 1 +#define BYTE_NOT_FOUND 0 +#define FILE_NOT_FOUND -1 + +#define POSITION_FOUND 1 +#define POSITION_NOT_FOUND 0 + +// return values for reiserfs_find_entry and search_by_entry_key +#define NAME_FOUND 1 +#define NAME_NOT_FOUND 0 +#define GOTO_PREVIOUS_ITEM 2 +#define NAME_FOUND_INVISIBLE 3 + +/* Everything in the filesystem is stored as a set of items. The + item head contains the key of the item, its free space (for + indirect items) and specifies the location of the item itself + within the block. */ + +struct item_head { + /* Everything in the tree is found by searching for it based on + * its key.*/ + struct reiserfs_key ih_key; + union { + /* The free space in the last unformatted node of an + indirect item if this is an indirect item. This + equals 0xFFFF iff this is a direct item or stat data + item. Note that the key, not this field, is used to + determine the item type, and thus which field this + union contains. */ + __le16 ih_free_space_reserved; + /* Iff this is a directory item, this field equals the + number of directory entries in the directory item. */ + __le16 ih_entry_count; + } __attribute__ ((__packed__)) u; + __le16 ih_item_len; /* total size of the item body */ + __le16 ih_item_location; /* an offset to the item body + * within the block */ + __le16 ih_version; /* 0 for all old items, 2 for new + ones. Highest bit is set by fsck + temporary, cleaned after all + done */ +} __attribute__ ((__packed__)); +/* size of item header */ +#define IH_SIZE (sizeof(struct item_head)) + +#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved) +#define ih_version(ih) le16_to_cpu((ih)->ih_version) +#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count) +#define ih_location(ih) le16_to_cpu((ih)->ih_item_location) +#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len) + +#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0) +#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0) +#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0) +#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) +#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) + +#define unreachable_item(ih) (ih_version(ih) & (1 << 15)) + +#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) +#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) + +/* these operate on indirect items, where you've got an array of ints +** at a possibly unaligned location. These are a noop on ia32 +** +** p is the array of __u32, i is the index into the array, v is the value +** to store there. +*/ +#define get_block_num(p, i) get_unaligned_le32((p) + (i)) +#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) + +// +// in old version uniqueness field shows key type +// +#define V1_SD_UNIQUENESS 0 +#define V1_INDIRECT_UNIQUENESS 0xfffffffe +#define V1_DIRECT_UNIQUENESS 0xffffffff +#define V1_DIRENTRY_UNIQUENESS 500 +#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required + +// +// here are conversion routines +// +static inline int uniqueness2type(__u32 uniqueness) CONSTF; +static inline int uniqueness2type(__u32 uniqueness) +{ + switch ((int)uniqueness) { + case V1_SD_UNIQUENESS: + return TYPE_STAT_DATA; + case V1_INDIRECT_UNIQUENESS: + return TYPE_INDIRECT; + case V1_DIRECT_UNIQUENESS: + return TYPE_DIRECT; + case V1_DIRENTRY_UNIQUENESS: + return TYPE_DIRENTRY; + case V1_ANY_UNIQUENESS: + default: + return TYPE_ANY; + } +} + +static inline __u32 type2uniqueness(int type) CONSTF; +static inline __u32 type2uniqueness(int type) +{ + switch (type) { + case TYPE_STAT_DATA: + return V1_SD_UNIQUENESS; + case TYPE_INDIRECT: + return V1_INDIRECT_UNIQUENESS; + case TYPE_DIRECT: + return V1_DIRECT_UNIQUENESS; + case TYPE_DIRENTRY: + return V1_DIRENTRY_UNIQUENESS; + case TYPE_ANY: + default: + return V1_ANY_UNIQUENESS; + } +} + +// +// key is pointer to on disk key which is stored in le, result is cpu, +// there is no way to get version of object from key, so, provide +// version to these defines +// +static inline loff_t le_key_k_offset(int version, + const struct reiserfs_key *key) +{ + return (version == KEY_FORMAT_3_5) ? + le32_to_cpu(key->u.k_offset_v1.k_offset) : + offset_v2_k_offset(&(key->u.k_offset_v2)); +} + +static inline loff_t le_ih_k_offset(const struct item_head *ih) +{ + return le_key_k_offset(ih_version(ih), &(ih->ih_key)); +} + +static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) +{ + return (version == KEY_FORMAT_3_5) ? + uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) : + offset_v2_k_type(&(key->u.k_offset_v2)); +} + +static inline loff_t le_ih_k_type(const struct item_head *ih) +{ + return le_key_k_type(ih_version(ih), &(ih->ih_key)); +} + +static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, + loff_t offset) +{ + (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */ + (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset)); +} + +static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) +{ + set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); +} + +static inline void set_le_key_k_type(int version, struct reiserfs_key *key, + int type) +{ + (version == KEY_FORMAT_3_5) ? + (void)(key->u.k_offset_v1.k_uniqueness = + cpu_to_le32(type2uniqueness(type))) + : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type)); +} + +static inline void set_le_ih_k_type(struct item_head *ih, int type) +{ + set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); +} + +static inline int is_direntry_le_key(int version, struct reiserfs_key *key) +{ + return le_key_k_type(version, key) == TYPE_DIRENTRY; +} + +static inline int is_direct_le_key(int version, struct reiserfs_key *key) +{ + return le_key_k_type(version, key) == TYPE_DIRECT; +} + +static inline int is_indirect_le_key(int version, struct reiserfs_key *key) +{ + return le_key_k_type(version, key) == TYPE_INDIRECT; +} + +static inline int is_statdata_le_key(int version, struct reiserfs_key *key) +{ + return le_key_k_type(version, key) == TYPE_STAT_DATA; +} + +// +// item header has version. +// +static inline int is_direntry_le_ih(struct item_head *ih) +{ + return is_direntry_le_key(ih_version(ih), &ih->ih_key); +} + +static inline int is_direct_le_ih(struct item_head *ih) +{ + return is_direct_le_key(ih_version(ih), &ih->ih_key); +} + +static inline int is_indirect_le_ih(struct item_head *ih) +{ + return is_indirect_le_key(ih_version(ih), &ih->ih_key); +} + +static inline int is_statdata_le_ih(struct item_head *ih) +{ + return is_statdata_le_key(ih_version(ih), &ih->ih_key); +} + +// +// key is pointer to cpu key, result is cpu +// +static inline loff_t cpu_key_k_offset(const struct cpu_key *key) +{ + return key->on_disk_key.k_offset; +} + +static inline loff_t cpu_key_k_type(const struct cpu_key *key) +{ + return key->on_disk_key.k_type; +} + +static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset) +{ + key->on_disk_key.k_offset = offset; +} + +static inline void set_cpu_key_k_type(struct cpu_key *key, int type) +{ + key->on_disk_key.k_type = type; +} + +static inline void cpu_key_k_offset_dec(struct cpu_key *key) +{ + key->on_disk_key.k_offset--; +} + +#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) +#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) +#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) +#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) + +/* are these used ? */ +#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) +#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) +#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) +#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) + +#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \ + (!COMP_SHORT_KEYS(ih, key) && \ + I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize)) + +/* maximal length of item */ +#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) +#define MIN_ITEM_LEN 1 + +/* object identifier for root dir */ +#define REISERFS_ROOT_OBJECTID 2 +#define REISERFS_ROOT_PARENT_OBJECTID 1 + +extern struct reiserfs_key root_key; + +/* + * Picture represents a leaf of the S+tree + * ______________________________________________________ + * | | Array of | | | + * |Block | Object-Item | F r e e | Objects- | + * | head | Headers | S p a c e | Items | + * |______|_______________|___________________|___________| + */ + +/* Header of a disk block. More precisely, header of a formatted leaf + or internal node, and not the header of an unformatted node. */ +struct block_head { + __le16 blk_level; /* Level of a block in the tree. */ + __le16 blk_nr_item; /* Number of keys/items in a block. */ + __le16 blk_free_space; /* Block free space in bytes. */ + __le16 blk_reserved; + /* dump this in v4/planA */ + struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ +}; + +#define BLKH_SIZE (sizeof(struct block_head)) +#define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level)) +#define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item)) +#define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space)) +#define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved)) +#define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val)) +#define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val)) +#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val)) +#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val)) +#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) +#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) + +/* + * values for blk_level field of the struct block_head + */ + +#define FREE_LEVEL 0 /* when node gets removed from the tree its + blk_level is set to FREE_LEVEL. It is then + used to see whether the node is still in the + tree */ + +#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ + +/* Given the buffer head of a formatted node, resolve to the block head of that node. */ +#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) +/* Number of items that are in buffer. */ +#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) +#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh))) +#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh))) + +#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0) +#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0) +#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0) + +/* Get right delimiting key. -- little endian */ +#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh)))) + +/* Does the buffer contain a disk leaf. */ +#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL) + +/* Does the buffer contain a disk internal node */ +#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ + && B_LEVEL(bh) <= MAX_HEIGHT) + +/***************************************************************************/ +/* STAT DATA */ +/***************************************************************************/ + +// +// old stat data is 32 bytes long. We are going to distinguish new one by +// different size +// +struct stat_data_v1 { + __le16 sd_mode; /* file type, permissions */ + __le16 sd_nlink; /* number of hard links */ + __le16 sd_uid; /* owner */ + __le16 sd_gid; /* group */ + __le32 sd_size; /* file size */ + __le32 sd_atime; /* time of last access */ + __le32 sd_mtime; /* time file was last modified */ + __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + union { + __le32 sd_rdev; + __le32 sd_blocks; /* number of blocks file uses */ + } __attribute__ ((__packed__)) u; + __le32 sd_first_direct_byte; /* first byte of file which is stored + in a direct item: except that if it + equals 1 it is a symlink and if it + equals ~(__u32)0 there is no + direct item. The existence of this + field really grates on me. Let's + replace it with a macro based on + sd_size and our tail suppression + policy. Someday. -Hans */ +} __attribute__ ((__packed__)); + +#define SD_V1_SIZE (sizeof(struct stat_data_v1)) +#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5) +#define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) +#define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) +#define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink)) +#define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v)) +#define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid)) +#define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v)) +#define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid)) +#define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v)) +#define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size)) +#define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v)) +#define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) +#define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) +#define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) +#define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) +#define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) +#define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) +#define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) +#define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) +#define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks)) +#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v)) +#define sd_v1_first_direct_byte(sdp) \ + (le32_to_cpu((sdp)->sd_first_direct_byte)) +#define set_sd_v1_first_direct_byte(sdp,v) \ + ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) + +/* inode flags stored in sd_attrs (nee sd_reserved) */ + +/* we want common flags to have the same values as in ext2, + so chattr(1) will work without problems */ +#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define REISERFS_APPEND_FL FS_APPEND_FL +#define REISERFS_SYNC_FL FS_SYNC_FL +#define REISERFS_NOATIME_FL FS_NOATIME_FL +#define REISERFS_NODUMP_FL FS_NODUMP_FL +#define REISERFS_SECRM_FL FS_SECRM_FL +#define REISERFS_UNRM_FL FS_UNRM_FL +#define REISERFS_COMPR_FL FS_COMPR_FL +#define REISERFS_NOTAIL_FL FS_NOTAIL_FL + +/* persistent flags that file inherits from the parent directory */ +#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ + REISERFS_SYNC_FL | \ + REISERFS_NOATIME_FL | \ + REISERFS_NODUMP_FL | \ + REISERFS_SECRM_FL | \ + REISERFS_COMPR_FL | \ + REISERFS_NOTAIL_FL ) + +/* Stat Data on disk (reiserfs version of UFS disk inode minus the + address blocks) */ +struct stat_data { + __le16 sd_mode; /* file type, permissions */ + __le16 sd_attrs; /* persistent inode flags */ + __le32 sd_nlink; /* number of hard links */ + __le64 sd_size; /* file size */ + __le32 sd_uid; /* owner */ + __le32 sd_gid; /* group */ + __le32 sd_atime; /* time of last access */ + __le32 sd_mtime; /* time file was last modified */ + __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ + __le32 sd_blocks; + union { + __le32 sd_rdev; + __le32 sd_generation; + //__le32 sd_first_direct_byte; + /* first byte of file which is stored in a + direct item: except that if it equals 1 + it is a symlink and if it equals + ~(__u32)0 there is no direct item. The + existence of this field really grates + on me. Let's replace it with a macro + based on sd_size and our tail + suppression policy? */ + } __attribute__ ((__packed__)) u; +} __attribute__ ((__packed__)); +// +// this is 44 bytes long +// +#define SD_SIZE (sizeof(struct stat_data)) +#define SD_V2_SIZE SD_SIZE +#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) +#define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) +#define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) +/* sd_reserved */ +/* set_sd_reserved */ +#define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink)) +#define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v)) +#define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size)) +#define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v)) +#define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid)) +#define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v)) +#define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid)) +#define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v)) +#define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) +#define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) +#define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) +#define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) +#define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) +#define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) +#define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks)) +#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v)) +#define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) +#define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) +#define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation)) +#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v)) +#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) +#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) + +/***************************************************************************/ +/* DIRECTORY STRUCTURE */ +/***************************************************************************/ +/* + Picture represents the structure of directory items + ________________________________________________ + | Array of | | | | | | + | directory |N-1| N-2 | .... | 1st |0th| + | entry headers | | | | | | + |_______________|___|_____|________|_______|___| + <---- directory entries ------> + + First directory item has k_offset component 1. We store "." and ".." + in one item, always, we never split "." and ".." into differing + items. This makes, among other things, the code for removing + directories simpler. */ +#define SD_OFFSET 0 +#define SD_UNIQUENESS 0 +#define DOT_OFFSET 1 +#define DOT_DOT_OFFSET 2 +#define DIRENTRY_UNIQUENESS 500 + +/* */ +#define FIRST_ITEM_OFFSET 1 + +/* + Q: How to get key of object pointed to by entry from entry? + + A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key + of object, entry points to */ + +/* NOT IMPLEMENTED: + Directory will someday contain stat data of object */ + +struct reiserfs_de_head { + __le32 deh_offset; /* third component of the directory entry key */ + __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced + by directory entry */ + __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ + __le16 deh_location; /* offset of name in the whole item */ + __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether + entry is hidden (unlinked) */ +} __attribute__ ((__packed__)); +#define DEH_SIZE sizeof(struct reiserfs_de_head) +#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) +#define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id)) +#define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid)) +#define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location)) +#define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state)) + +#define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v))) +#define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v))) +#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v))) +#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v))) +#define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v))) + +/* empty directory contains two entries "." and ".." and their headers */ +#define EMPTY_DIR_SIZE \ +(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen (".."))) + +/* old format directories have this size when empty */ +#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) + +#define DEH_Statdata 0 /* not used now */ +#define DEH_Visible 2 + +/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ +#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__) +# define ADDR_UNALIGNED_BITS (3) +#endif + +/* These are only used to manipulate deh_state. + * Because of this, we'll use the ext2_ bit routines, + * since they are little endian */ +#ifdef ADDR_UNALIGNED_BITS + +# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) +# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) + +# define set_bit_unaligned(nr, addr) \ + __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) +# define clear_bit_unaligned(nr, addr) \ + __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) +# define test_bit_unaligned(nr, addr) \ + test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) + +#else + +# define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr) +# define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr) +# define test_bit_unaligned(nr, addr) test_bit_le(nr, addr) + +#endif + +#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) +#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) +#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) +#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) + +#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) +#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) +#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) + +extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid); +extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, + __le32 par_dirid, __le32 par_objid); + +/* array of the entry headers */ + /* get item body */ +#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) ) +#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih))) + +/* length of the directory entry in directory item. This define + calculates length of i-th directory entry using directory entry + locations from dir entry head. When it calculates length of 0-th + directory entry, it uses length of whole item in place of entry + location of the non-existent following entry in the calculation. + See picture above.*/ +/* +#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ +((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh)))) +*/ +static inline int entry_length(const struct buffer_head *bh, + const struct item_head *ih, int pos_in_item) +{ + struct reiserfs_de_head *deh; + + deh = B_I_DEH(bh, ih) + pos_in_item; + if (pos_in_item) + return deh_location(deh - 1) - deh_location(deh); + + return ih_item_len(ih) - deh_location(deh); +} + +/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */ +#define I_ENTRY_COUNT(ih) (ih_entry_count((ih))) + +/* name by bh, ih and entry_num */ +#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num)))) + +// two entries per block (at least) +#define REISERFS_MAX_NAME(block_size) 255 + +/* this structure is used for operations on directory entries. It is + not a disk structure. */ +/* When reiserfs_find_entry or search_by_entry_key find directory + entry, they return filled reiserfs_dir_entry structure */ +struct reiserfs_dir_entry { + struct buffer_head *de_bh; + int de_item_num; + struct item_head *de_ih; + int de_entry_num; + struct reiserfs_de_head *de_deh; + int de_entrylen; + int de_namelen; + char *de_name; + unsigned long *de_gen_number_bit_string; + + __u32 de_dir_id; + __u32 de_objectid; + + struct cpu_key de_entry_key; +}; + +/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ + +/* pointer to file name, stored in entry */ +#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh)) + +/* length of name */ +#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ +(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) + +/* hash value occupies bits from 7 up to 30 */ +#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) +/* generation number occupies 7 bits starting from 0 up to 6 */ +#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) +#define MAX_GENERATION_NUMBER 127 + +#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) + +/* + * Picture represents an internal node of the reiserfs tree + * ______________________________________________________ + * | | Array of | Array of | Free | + * |block | keys | pointers | space | + * | head | N | N+1 | | + * |______|_______________|___________________|___________| + */ + +/***************************************************************************/ +/* DISK CHILD */ +/***************************************************************************/ +/* Disk child pointer: The pointer from an internal node of the tree + to a node that is on disk. */ +struct disk_child { + __le32 dc_block_number; /* Disk child's block number. */ + __le16 dc_size; /* Disk child's used space. */ + __le16 dc_reserved; +}; + +#define DC_SIZE (sizeof(struct disk_child)) +#define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number)) +#define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size)) +#define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0) +#define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0) + +/* Get disk child by buffer header and position in the tree node. */ +#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\ +((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos))) + +/* Get disk child number by buffer header and position in the tree node. */ +#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos))) +#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \ + (put_dc_block_number(B_N_CHILD(bh, n_pos), val)) + + /* maximal value of field child_size in structure disk_child */ + /* child size is the combined size of all items and their headers */ +#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) + +/* amount of used space in buffer (not including block head) */ +#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) + +/* max and min number of keys in internal node */ +#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) +#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) + +/***************************************************************************/ +/* PATH STRUCTURES AND DEFINES */ +/***************************************************************************/ + +/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the + key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it + does not find them in the cache it reads them from disk. For each node search_by_key finds using + reiserfs_bread it then uses bin_search to look through that node. bin_search will find the + position of the block_number of the next node if it is looking through an internal node. If it + is looking through a leaf node bin_search will find the position of the item which has key either + equal to given key, or which is the maximal key less than the given key. */ + +struct path_element { + struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ + int pe_position; /* Position in the tree node which is placed in the */ + /* buffer above. */ +}; + +#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ +#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ +#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ + +#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ +#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ + +/* We need to keep track of who the ancestors of nodes are. When we + perform a search we record which nodes were visited while + descending the tree looking for the node we searched for. This list + of nodes is called the path. This information is used while + performing balancing. Note that this path information may become + invalid, and this means we must check it when using it to see if it + is still valid. You'll need to read search_by_key and the comments + in it, especially about decrement_counters_in_path(), to understand + this structure. + +Paths make the code so much harder to work with and debug.... An +enormous number of bugs are due to them, and trying to write or modify +code that uses them just makes my head hurt. They are based on an +excessive effort to avoid disturbing the precious VFS code.:-( The +gods only know how we are going to SMP the code that uses them. +znodes are the way! */ + +#define PATH_READA 0x1 /* do read ahead */ +#define PATH_READA_BACK 0x2 /* read backwards */ + +struct treepath { + int path_length; /* Length of the array above. */ + int reada; + struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ + int pos_in_item; +}; + +#define pos_in_item(path) ((path)->pos_in_item) + +#define INITIALIZE_PATH(var) \ +struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} + +/* Get path element by path and path position. */ +#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset)) + +/* Get buffer header at the path by path and path position. */ +#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer) + +/* Get position in the element at the path by path and path position. */ +#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) + +#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) + /* you know, to the person who didn't + write this the macro name does not + at first suggest what it does. + Maybe POSITION_FROM_PATH_END? Or + maybe we should just focus on + dumping paths... -Hans */ +#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) + +#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)) + +/* in do_balance leaf has h == 0 in contrast with path structure, + where root has level == 0. That is why we need these defines */ +#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */ +#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ +#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) +#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ + +#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) + +#define get_last_bh(path) PATH_PLAST_BUFFER(path) +#define get_ih(path) PATH_PITEM_HEAD(path) +#define get_item_pos(path) PATH_LAST_POSITION(path) +#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path))) +#define item_moved(ih,path) comp_items(ih, path) +#define path_changed(ih,path) comp_items (ih, path) + +/***************************************************************************/ +/* MISC */ +/***************************************************************************/ + +/* Size of pointer to the unformatted node. */ +#define UNFM_P_SIZE (sizeof(unp_t)) +#define UNFM_P_SHIFT 2 + +// in in-core inode key is stored on le form +#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) + +#define MAX_UL_INT 0xffffffff +#define MAX_INT 0x7ffffff +#define MAX_US_INT 0xffff + +// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset +#define U32_MAX (~(__u32)0) + +static inline loff_t max_reiserfs_offset(struct inode *inode) +{ + if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) + return (loff_t) U32_MAX; + + return (loff_t) ((~(__u64) 0) >> 4); +} + +/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ +#define MAX_KEY_OBJECTID MAX_UL_INT + +#define MAX_B_NUM MAX_UL_INT +#define MAX_FC_NUM MAX_US_INT + +/* the purpose is to detect overflow of an unsigned short */ +#define REISERFS_LINK_MAX (MAX_US_INT - 1000) + +/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ +#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ +#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ + +#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) +#define get_generation(s) atomic_read (&fs_generation(s)) +#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) +#define __fs_changed(gen,s) (gen != get_generation (s)) +#define fs_changed(gen,s) \ +({ \ + reiserfs_cond_resched(s); \ + __fs_changed(gen, s); \ +}) + +/***************************************************************************/ +/* FIXATE NODES */ +/***************************************************************************/ + +#define VI_TYPE_LEFT_MERGEABLE 1 +#define VI_TYPE_RIGHT_MERGEABLE 2 + +/* To make any changes in the tree we always first find node, that + contains item to be changed/deleted or place to insert a new + item. We call this node S. To do balancing we need to decide what + we will shift to left/right neighbor, or to a new node, where new + item will be etc. To make this analysis simpler we build virtual + node. Virtual node is an array of items, that will replace items of + node S. (For instance if we are going to delete an item, virtual + node does not contain it). Virtual node keeps information about + item sizes and types, mergeability of first and last items, sizes + of all entries in directory item. We use this array of items when + calculating what we can shift to neighbors and how many nodes we + have to have if we do not any shiftings, if we shift to left/right + neighbor or to both. */ +struct virtual_item { + int vi_index; // index in the array of item operations + unsigned short vi_type; // left/right mergeability + unsigned short vi_item_len; /* length of item that it will have after balancing */ + struct item_head *vi_ih; + const char *vi_item; // body of item (old or new) + const void *vi_new_data; // 0 always but paste mode + void *vi_uarea; // item specific area +}; + +struct virtual_node { + char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ + unsigned short vn_nr_item; /* number of items in virtual node */ + short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ + short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ + short vn_affected_item_num; + short vn_pos_in_item; + struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ + const void *vn_data; + struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ +}; + +/* used by directory items when creating virtual nodes */ +struct direntry_uarea { + int flags; + __u16 entry_count; + __u16 entry_sizes[1]; +} __attribute__ ((__packed__)); + +/***************************************************************************/ +/* TREE BALANCE */ +/***************************************************************************/ + +/* This temporary structure is used in tree balance algorithms, and + constructed as we go to the extent that its various parts are + needed. It contains arrays of nodes that can potentially be + involved in the balancing of node S, and parameters that define how + each of the nodes must be balanced. Note that in these algorithms + for balancing the worst case is to need to balance the current node + S and the left and right neighbors and all of their parents plus + create a new node. We implement S1 balancing for the leaf nodes + and S0 balancing for the internal nodes (S1 and S0 are defined in + our papers.)*/ + +#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ + +/* maximum number of FEB blocknrs on a single level */ +#define MAX_AMOUNT_NEEDED 2 + +/* someday somebody will prefix every field in this struct with tb_ */ +struct tree_balance { + int tb_mode; + int need_balance_dirty; + struct super_block *tb_sb; + struct reiserfs_transaction_handle *transaction_handle; + struct treepath *tb_path; + struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ + struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ + struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ + struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ + struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ + struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ + + struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals + cur_blknum. */ + struct buffer_head *used[MAX_FEB_SIZE]; + struct buffer_head *thrown[MAX_FEB_SIZE]; + int lnum[MAX_HEIGHT]; /* array of number of items which must be + shifted to the left in order to balance the + current node; for leaves includes item that + will be partially shifted; for internal + nodes, it is the number of child pointers + rather than items. It includes the new item + being created. The code sometimes subtracts + one to get the number of wholly shifted + items for other purposes. */ + int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ + int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and + S[h] to its item number within the node CFL[h] */ + int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ + int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from + S[h]. A negative value means removing. */ + int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after + balancing on the level h of the tree. If 0 then S is + being deleted, if 1 then S is remaining and no new nodes + are being created, if 2 or 3 then 1 or 2 new nodes is + being created */ + + /* fields that are used only for balancing leaves of the tree */ + int cur_blknum; /* number of empty blocks having been already allocated */ + int s0num; /* number of items that fall into left most node when S[0] splits */ + int s1num; /* number of items that fall into first new node when S[0] splits */ + int s2num; /* number of items that fall into second new node when S[0] splits */ + int lbytes; /* number of bytes which can flow to the left neighbor from the left */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int rbytes; /* number of bytes which will flow to the right neighbor from the right */ + /* most liquid item that cannot be shifted from S[0] entirely */ + /* if -1 then nothing will be partially shifted */ + int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ + /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ + int s2bytes; + struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ + char *vn_buf; /* kmalloced memory. Used to create + virtual node and keep map of + dirtied bitmap blocks */ + int vn_buf_size; /* size of the vn_buf */ + struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */ + + int fs_gen; /* saved value of `reiserfs_generation' counter + see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + struct in_core_key key; /* key pointer, to pass to block allocator or + another low-level subsystem */ +#endif +}; + +/* These are modes of balancing */ + +/* When inserting an item. */ +#define M_INSERT 'i' +/* When inserting into (directories only) or appending onto an already + existent item. */ +#define M_PASTE 'p' +/* When deleting an item. */ +#define M_DELETE 'd' +/* When truncating an item or removing an entry from a (directory) item. */ +#define M_CUT 'c' + +/* used when balancing on leaf level skipped (in reiserfsck) */ +#define M_INTERNAL 'n' + +/* When further balancing is not needed, then do_balance does not need + to be called. */ +#define M_SKIP_BALANCING 's' +#define M_CONVERT 'v' + +/* modes of leaf_move_items */ +#define LEAF_FROM_S_TO_L 0 +#define LEAF_FROM_S_TO_R 1 +#define LEAF_FROM_R_TO_L 2 +#define LEAF_FROM_L_TO_R 3 +#define LEAF_FROM_S_TO_SNEW 4 + +#define FIRST_TO_LAST 0 +#define LAST_TO_FIRST 1 + +/* used in do_balance for passing parent of node information that has + been gotten from tb struct */ +struct buffer_info { + struct tree_balance *tb; + struct buffer_head *bi_bh; + struct buffer_head *bi_parent; + int bi_position; +}; + +static inline struct super_block *sb_from_tb(struct tree_balance *tb) +{ + return tb ? tb->tb_sb : NULL; +} + +static inline struct super_block *sb_from_bi(struct buffer_info *bi) +{ + return bi ? sb_from_tb(bi->tb) : NULL; +} + +/* there are 4 types of items: stat data, directory item, indirect, direct. ++-------------------+------------+--------------+------------+ +| | k_offset | k_uniqueness | mergeable? | ++-------------------+------------+--------------+------------+ +| stat data | 0 | 0 | no | ++-------------------+------------+--------------+------------+ +| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | +| non 1st directory | hash value | | yes | +| item | | | | ++-------------------+------------+--------------+------------+ +| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object ++-------------------+------------+--------------+------------+ +| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object ++-------------------+------------+--------------+------------+ +*/ + +struct item_operations { + int (*bytes_number) (struct item_head * ih, int block_size); + void (*decrement_key) (struct cpu_key *); + int (*is_left_mergeable) (struct reiserfs_key * ih, + unsigned long bsize); + void (*print_item) (struct item_head *, char *item); + void (*check_item) (struct item_head *, char *item); + + int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, + int is_affected, int insert_size); + int (*check_left) (struct virtual_item * vi, int free, + int start_skip, int end_skip); + int (*check_right) (struct virtual_item * vi, int free); + int (*part_size) (struct virtual_item * vi, int from, int to); + int (*unit_num) (struct virtual_item * vi); + void (*print_vi) (struct virtual_item * vi); +}; + +extern struct item_operations *item_ops[TYPE_ANY + 1]; + +#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) +#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) +#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) +#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) +#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) +#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) +#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) +#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) +#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) +#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) + +#define COMP_SHORT_KEYS comp_short_keys + +/* number of blocks pointed to by the indirect item */ +#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) + +/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ +#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) + +/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ + +/* get the item header */ +#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) + +/* get key */ +#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) ) + +/* get the key */ +#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) ) + +/* get item body */ +#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num)))) + +/* get the stat data by the buffer header and the item order */ +#define B_N_STAT_DATA(bh,nr) \ +( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) ) + + /* following defines use reiserfs buffer header and item header */ + +/* get stat-data */ +#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) + +// this is 3976 for size==4096 +#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) + +/* indirect items consist of entries which contain blocknrs, pos + indicates which entry, and B_I_POS_UNFM_POINTER resolves to the + blocknr contained by the entry pos points to */ +#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) +#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) + +struct reiserfs_iget_args { + __u32 objectid; + __u32 dirid; +}; + +/***************************************************************************/ +/* FUNCTION DECLARATIONS */ +/***************************************************************************/ + +#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) + +#define journal_trans_half(blocksize) \ + ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32)) + +/* journal.c see journal.c for all the comments here */ + +/* first block written in a commit. */ +struct reiserfs_journal_desc { + __le32 j_trans_id; /* id of commit */ + __le32 j_len; /* length of commit. len +1 is the commit block */ + __le32 j_mount_id; /* mount id of this trans */ + __le32 j_realblock[1]; /* real locations for each block */ +}; + +#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) +#define get_desc_trans_len(d) le32_to_cpu((d)->j_len) +#define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id) + +#define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0) +#define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0) +#define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0) + +/* last block written in a commit */ +struct reiserfs_journal_commit { + __le32 j_trans_id; /* must match j_trans_id from the desc block */ + __le32 j_len; /* ditto */ + __le32 j_realblock[1]; /* real locations for each block */ +}; + +#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) +#define get_commit_trans_len(c) le32_to_cpu((c)->j_len) +#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id) + +#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) +#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) + +/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the +** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, +** and this transaction does not need to be replayed. +*/ +struct reiserfs_journal_header { + __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ + __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ + __le32 j_mount_id; + /* 12 */ struct journal_params jh_journal; +}; + +/* biggest tunable defines are right here */ +#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ +#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ +#define JOURNAL_TRANS_MIN_DEFAULT 256 +#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ +#define JOURNAL_MIN_RATIO 2 +#define JOURNAL_MAX_COMMIT_AGE 30 +#define JOURNAL_MAX_TRANS_AGE 30 +#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) +#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ + 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ + REISERFS_QUOTA_TRANS_BLOCKS(sb))) + +#ifdef CONFIG_QUOTA +#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA)) +/* We need to update data and inode (atime) */ +#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0) +/* 1 balancing, 1 bitmap, 1 data per write + stat data update */ +#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ +(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0) +/* same as with INIT */ +#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ +(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0) +#else +#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0 +#define REISERFS_QUOTA_INIT_BLOCKS(s) 0 +#define REISERFS_QUOTA_DEL_BLOCKS(s) 0 +#endif + +/* both of these can be as low as 1, or as high as you want. The min is the +** number of 4k bitmap nodes preallocated on mount. New nodes are allocated +** as needed, and released when transactions are committed. On release, if +** the current number of nodes is > max, the node is freed, otherwise, +** it is put on a free list for faster use later. +*/ +#define REISERFS_MIN_BITMAP_NODES 10 +#define REISERFS_MAX_BITMAP_NODES 100 + +#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ +#define JBH_HASH_MASK 8191 + +#define _jhashfn(sb,block) \ + (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \ + (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) +#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) + +// We need these to make journal.c code more readable +#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) +#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) +#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) + +enum reiserfs_bh_state_bits { + BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ + BH_JDirty_wait, + BH_JNew, /* disk block was taken off free list before + * being in a finished transaction, or + * written to disk. Can be reused immed. */ + BH_JPrepared, + BH_JRestore_dirty, + BH_JTest, // debugging only will go away +}; + +BUFFER_FNS(JDirty, journaled); +TAS_BUFFER_FNS(JDirty, journaled); +BUFFER_FNS(JDirty_wait, journal_dirty); +TAS_BUFFER_FNS(JDirty_wait, journal_dirty); +BUFFER_FNS(JNew, journal_new); +TAS_BUFFER_FNS(JNew, journal_new); +BUFFER_FNS(JPrepared, journal_prepared); +TAS_BUFFER_FNS(JPrepared, journal_prepared); +BUFFER_FNS(JRestore_dirty, journal_restore_dirty); +TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); +BUFFER_FNS(JTest, journal_test); +TAS_BUFFER_FNS(JTest, journal_test); + +/* +** transaction handle which is passed around for all journal calls +*/ +struct reiserfs_transaction_handle { + struct super_block *t_super; /* super for this FS when journal_begin was + called. saves calls to reiserfs_get_super + also used by nested transactions to make + sure they are nesting on the right FS + _must_ be first in the handle + */ + int t_refcount; + int t_blocks_logged; /* number of blocks this writer has logged */ + int t_blocks_allocated; /* number of blocks this writer allocated */ + unsigned int t_trans_id; /* sanity check, equals the current trans id */ + void *t_handle_save; /* save existing current->journal_info */ + unsigned displace_new_blocks:1; /* if new block allocation occurres, that block + should be displaced from others */ + struct list_head t_list; +}; + +/* used to keep track of ordered and tail writes, attached to the buffer + * head through b_journal_head. + */ +struct reiserfs_jh { + struct reiserfs_journal_list *jl; + struct buffer_head *bh; + struct list_head list; +}; + +void reiserfs_free_jh(struct buffer_head *bh); +int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); +int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); +int journal_mark_dirty(struct reiserfs_transaction_handle *, + struct super_block *, struct buffer_head *bh); + +static inline int reiserfs_file_data_log(struct inode *inode) +{ + if (reiserfs_data_log(inode->i_sb) || + (REISERFS_I(inode)->i_flags & i_data_log)) + return 1; + return 0; +} + +static inline int reiserfs_transaction_running(struct super_block *s) +{ + struct reiserfs_transaction_handle *th = current->journal_info; + if (th && th->t_super == s) + return 1; + if (th && th->t_super == NULL) + BUG(); + return 0; +} + +static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th) +{ + return th->t_blocks_allocated - th->t_blocks_logged; +} + +struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct + super_block + *, + int count); +int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); +int reiserfs_commit_page(struct inode *inode, struct page *page, + unsigned from, unsigned to); +int reiserfs_flush_old_commits(struct super_block *); +int reiserfs_commit_for_inode(struct inode *); +int reiserfs_inode_needs_commit(struct inode *); +void reiserfs_update_inode_transaction(struct inode *); +void reiserfs_wait_on_write_block(struct super_block *s); +void reiserfs_block_writes(struct reiserfs_transaction_handle *th); +void reiserfs_allow_writes(struct super_block *s); +void reiserfs_check_lock_depth(struct super_block *s, char *caller); +int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, + int wait); +void reiserfs_restore_prepared_buffer(struct super_block *, + struct buffer_head *bh); +int journal_init(struct super_block *, const char *j_dev_name, int old_format, + unsigned int); +int journal_release(struct reiserfs_transaction_handle *, struct super_block *); +int journal_release_error(struct reiserfs_transaction_handle *, + struct super_block *); +int journal_end(struct reiserfs_transaction_handle *, struct super_block *, + unsigned long); +int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, + unsigned long); +int journal_mark_freed(struct reiserfs_transaction_handle *, + struct super_block *, b_blocknr_t blocknr); +int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); +int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, + int bit_nr, int searchall, b_blocknr_t *next); +int journal_begin(struct reiserfs_transaction_handle *, + struct super_block *sb, unsigned long); +int journal_join_abort(struct reiserfs_transaction_handle *, + struct super_block *sb, unsigned long); +void reiserfs_abort_journal(struct super_block *sb, int errno); +void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); +int reiserfs_allocate_list_bitmaps(struct super_block *s, + struct reiserfs_list_bitmap *, unsigned int); + +void add_save_link(struct reiserfs_transaction_handle *th, + struct inode *inode, int truncate); +int remove_save_link(struct inode *inode, int truncate); + +/* objectid.c */ +__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th); +void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, + __u32 objectid_to_release); +int reiserfs_convert_objectid_map_v1(struct super_block *); + +/* stree.c */ +int B_IS_IN_TREE(const struct buffer_head *); +extern void copy_item_head(struct item_head *to, + const struct item_head *from); + +// first key is in cpu form, second - le +extern int comp_short_keys(const struct reiserfs_key *le_key, + const struct cpu_key *cpu_key); +extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); + +// both are in le form +extern int comp_le_keys(const struct reiserfs_key *, + const struct reiserfs_key *); +extern int comp_short_le_keys(const struct reiserfs_key *, + const struct reiserfs_key *); + +// +// get key version from on disk key - kludge +// +static inline int le_key_version(const struct reiserfs_key *key) +{ + int type; + + type = offset_v2_k_type(&(key->u.k_offset_v2)); + if (type != TYPE_DIRECT && type != TYPE_INDIRECT + && type != TYPE_DIRENTRY) + return KEY_FORMAT_3_5; + + return KEY_FORMAT_3_6; + +} + +static inline void copy_key(struct reiserfs_key *to, + const struct reiserfs_key *from) +{ + memcpy(to, from, KEY_SIZE); +} + +int comp_items(const struct item_head *stored_ih, const struct treepath *path); +const struct reiserfs_key *get_rkey(const struct treepath *chk_path, + const struct super_block *sb); +int search_by_key(struct super_block *, const struct cpu_key *, + struct treepath *, int); +#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) +int search_for_position_by_key(struct super_block *sb, + const struct cpu_key *cpu_key, + struct treepath *search_path); +extern void decrement_bcount(struct buffer_head *bh); +void decrement_counters_in_path(struct treepath *search_path); +void pathrelse(struct treepath *search_path); +int reiserfs_check_path(struct treepath *p); +void pathrelse_and_restore(struct super_block *s, struct treepath *search_path); + +int reiserfs_insert_item(struct reiserfs_transaction_handle *th, + struct treepath *path, + const struct cpu_key *key, + struct item_head *ih, + struct inode *inode, const char *body); + +int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, + struct treepath *path, + const struct cpu_key *key, + struct inode *inode, + const char *body, int paste_size); + +int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, + struct treepath *path, + struct cpu_key *key, + struct inode *inode, + struct page *page, loff_t new_file_size); + +int reiserfs_delete_item(struct reiserfs_transaction_handle *th, + struct treepath *path, + const struct cpu_key *key, + struct inode *inode, struct buffer_head *un_bh); + +void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, + struct inode *inode, struct reiserfs_key *key); +int reiserfs_delete_object(struct reiserfs_transaction_handle *th, + struct inode *inode); +int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, + struct inode *inode, struct page *, + int update_timestamps); + +#define i_block_size(inode) ((inode)->i_sb->s_blocksize) +#define file_size(inode) ((inode)->i_size) +#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) + +#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ +!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) + +void padd_item(char *item, int total_length, int length); + +/* inode.c */ +/* args for the create parameter of reiserfs_get_block */ +#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ +#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ +#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ +#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ +#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ +#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ + +void reiserfs_read_locked_inode(struct inode *inode, + struct reiserfs_iget_args *args); +int reiserfs_find_actor(struct inode *inode, void *p); +int reiserfs_init_locked_inode(struct inode *inode, void *p); +void reiserfs_evict_inode(struct inode *inode); +int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); +int reiserfs_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create); +struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, + int connectable); + +int reiserfs_truncate_file(struct inode *, int update_timestamps); +void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, + int type, int key_length); +void make_le_item_head(struct item_head *ih, const struct cpu_key *key, + int version, + loff_t offset, int type, int length, int entry_count); +struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); + +struct reiserfs_security_handle; +int reiserfs_new_inode(struct reiserfs_transaction_handle *th, + struct inode *dir, umode_t mode, + const char *symname, loff_t i_size, + struct dentry *dentry, struct inode *inode, + struct reiserfs_security_handle *security); + +void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, + struct inode *inode, loff_t size); + +static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, + struct inode *inode) +{ + reiserfs_update_sd_size(th, inode, inode->i_size); +} + +void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); +void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); +int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); + +int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); + +/* namei.c */ +void set_de_name_and_namelen(struct reiserfs_dir_entry *de); +int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, + struct treepath *path, struct reiserfs_dir_entry *de); +struct dentry *reiserfs_get_parent(struct dentry *); + +#ifdef CONFIG_REISERFS_PROC_INFO +int reiserfs_proc_info_init(struct super_block *sb); +int reiserfs_proc_info_done(struct super_block *sb); +int reiserfs_proc_info_global_init(void); +int reiserfs_proc_info_global_done(void); + +#define PROC_EXP( e ) e + +#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data +#define PROC_INFO_MAX( sb, field, value ) \ + __PINFO( sb ).field = \ + max( REISERFS_SB( sb ) -> s_proc_info_data.field, value ) +#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) ) +#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) ) +#define PROC_INFO_BH_STAT( sb, bh, level ) \ + PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \ + PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \ + PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) ) +#else +static inline int reiserfs_proc_info_init(struct super_block *sb) +{ + return 0; +} + +static inline int reiserfs_proc_info_done(struct super_block *sb) +{ + return 0; +} + +static inline int reiserfs_proc_info_global_init(void) +{ + return 0; +} + +static inline int reiserfs_proc_info_global_done(void) +{ + return 0; +} + +#define PROC_EXP( e ) +#define VOID_V ( ( void ) 0 ) +#define PROC_INFO_MAX( sb, field, value ) VOID_V +#define PROC_INFO_INC( sb, field ) VOID_V +#define PROC_INFO_ADD( sb, field, val ) VOID_V +#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V +#endif + +/* dir.c */ +extern const struct inode_operations reiserfs_dir_inode_operations; +extern const struct inode_operations reiserfs_symlink_inode_operations; +extern const struct inode_operations reiserfs_special_inode_operations; +extern const struct file_operations reiserfs_dir_operations; +int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *); + +/* tail_conversion.c */ +int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, + struct treepath *, struct buffer_head *, loff_t); +int indirect2direct(struct reiserfs_transaction_handle *, struct inode *, + struct page *, struct treepath *, const struct cpu_key *, + loff_t, char *); +void reiserfs_unmap_buffer(struct buffer_head *); + +/* file.c */ +extern const struct inode_operations reiserfs_file_inode_operations; +extern const struct file_operations reiserfs_file_operations; +extern const struct address_space_operations reiserfs_address_space_operations; + +/* fix_nodes.c */ + +int fix_nodes(int n_op_mode, struct tree_balance *tb, + struct item_head *ins_ih, const void *); +void unfix_nodes(struct tree_balance *); + +/* prints.c */ +void __reiserfs_panic(struct super_block *s, const char *id, + const char *function, const char *fmt, ...) + __attribute__ ((noreturn)); +#define reiserfs_panic(s, id, fmt, args...) \ + __reiserfs_panic(s, id, __func__, fmt, ##args) +void __reiserfs_error(struct super_block *s, const char *id, + const char *function, const char *fmt, ...); +#define reiserfs_error(s, id, fmt, args...) \ + __reiserfs_error(s, id, __func__, fmt, ##args) +void reiserfs_info(struct super_block *s, const char *fmt, ...); +void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); +void print_indirect_item(struct buffer_head *bh, int item_num); +void store_print_tb(struct tree_balance *tb); +void print_cur_tb(char *mes); +void print_de(struct reiserfs_dir_entry *de); +void print_bi(struct buffer_info *bi, char *mes); +#define PRINT_LEAF_ITEMS 1 /* print all items */ +#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ +#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ +void print_block(struct buffer_head *bh, ...); +void print_bmap(struct super_block *s, int silent); +void print_bmap_block(int i, char *data, int size, int silent); +/*void print_super_block (struct super_block * s, char * mes);*/ +void print_objectid_map(struct super_block *s); +void print_block_head(struct buffer_head *bh, char *mes); +void check_leaf(struct buffer_head *bh); +void check_internal(struct buffer_head *bh); +void print_statistics(struct super_block *s); +char *reiserfs_hashname(int code); + +/* lbalance.c */ +int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, + int mov_bytes, struct buffer_head *Snew); +int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes); +int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); +void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, + int del_num, int del_bytes); +void leaf_insert_into_buf(struct buffer_info *bi, int before, + struct item_head *inserted_item_ih, + const char *inserted_item_body, int zeros_number); +void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, + int pos_in_item, int paste_size, const char *body, + int zeros_number); +void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, + int pos_in_item, int cut_size); +void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, + int new_entry_count, struct reiserfs_de_head *new_dehs, + const char *records, int paste_size); +/* ibalance.c */ +int balance_internal(struct tree_balance *, int, int, struct item_head *, + struct buffer_head **); + +/* do_balance.c */ +void do_balance_mark_leaf_dirty(struct tree_balance *tb, + struct buffer_head *bh, int flag); +#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty +#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty + +void do_balance(struct tree_balance *tb, struct item_head *ih, + const char *body, int flag); +void reiserfs_invalidate_buffer(struct tree_balance *tb, + struct buffer_head *bh); + +int get_left_neighbor_position(struct tree_balance *tb, int h); +int get_right_neighbor_position(struct tree_balance *tb, int h); +void replace_key(struct tree_balance *tb, struct buffer_head *, int, + struct buffer_head *, int); +void make_empty_node(struct buffer_info *); +struct buffer_head *get_FEB(struct tree_balance *); + +/* bitmap.c */ + +/* structure contains hints for block allocator, and it is a container for + * arguments, such as node, search path, transaction_handle, etc. */ +struct __reiserfs_blocknr_hint { + struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ + sector_t block; /* file offset, in blocks */ + struct in_core_key key; + struct treepath *path; /* search path, used by allocator to deternine search_start by + * various ways */ + struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and + * bitmap blocks changes */ + b_blocknr_t beg, end; + b_blocknr_t search_start; /* a field used to transfer search start value (block number) + * between different block allocator procedures + * (determine_search_start() and others) */ + int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed + * function that do actual allocation */ + + unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for + * formatted/unformatted blocks with/without preallocation */ + unsigned preallocate:1; +}; + +typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; + +int reiserfs_parse_alloc_options(struct super_block *, char *); +void reiserfs_init_alloc_options(struct super_block *s); + +/* + * given a directory, this will tell you what packing locality + * to use for a new object underneat it. The locality is returned + * in disk byte order (le). + */ +__le32 reiserfs_choose_packing(struct inode *dir); + +int reiserfs_init_bitmap_cache(struct super_block *sb); +void reiserfs_free_bitmap_cache(struct super_block *sb); +void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); +struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); +int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); +void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, + b_blocknr_t, int for_unformatted); +int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int, + int); +static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, + b_blocknr_t * new_blocknrs, + int amount_needed) +{ + reiserfs_blocknr_hint_t hint = { + .th = tb->transaction_handle, + .path = tb->tb_path, + .inode = NULL, + .key = tb->key, + .block = 0, + .formatted_node = 1 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, + 0); +} + +static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle + *th, struct inode *inode, + b_blocknr_t * new_blocknrs, + struct treepath *path, + sector_t block) +{ + reiserfs_blocknr_hint_t hint = { + .th = th, + .path = path, + .inode = inode, + .block = block, + .formatted_node = 0, + .preallocate = 0 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); +} + +#ifdef REISERFS_PREALLOCATE +static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle + *th, struct inode *inode, + b_blocknr_t * new_blocknrs, + struct treepath *path, + sector_t block) +{ + reiserfs_blocknr_hint_t hint = { + .th = th, + .path = path, + .inode = inode, + .block = block, + .formatted_node = 0, + .preallocate = 1 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); +} + +void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, + struct inode *inode); +void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th); +#endif + +/* hashes.c */ +__u32 keyed_hash(const signed char *msg, int len); +__u32 yura_hash(const signed char *msg, int len); +__u32 r5_hash(const signed char *msg, int len); + +#define reiserfs_set_le_bit __set_bit_le +#define reiserfs_test_and_set_le_bit __test_and_set_bit_le +#define reiserfs_clear_le_bit __clear_bit_le +#define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le +#define reiserfs_test_le_bit test_bit_le +#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le + +/* sometimes reiserfs_truncate may require to allocate few new blocks + to perform indirect2direct conversion. People probably used to + think, that truncate should work without problems on a filesystem + without free disk space. They may complain that they can not + truncate due to lack of free disk space. This spare space allows us + to not worry about it. 500 is probably too much, but it should be + absolutely safe */ +#define SPARE_SPACE 500 + +/* prototypes from ioctl.c */ +long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long reiserfs_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); +int reiserfs_unpack(struct inode *inode, struct file *filp); diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 7483279b482d..9a17f63c3fd7 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -13,8 +13,7 @@ #include <linux/vmalloc.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_fs_sb.h> +#include "reiserfs.h" #include <linux/buffer_head.h> int reiserfs_resize(struct super_block *s, unsigned long block_count_new) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 313d39d639eb..f8afa4b162b8 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -51,7 +51,7 @@ #include <linux/time.h> #include <linux/string.h> #include <linux/pagemap.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/buffer_head.h> #include <linux/quotaops.h> @@ -1284,12 +1284,12 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, ** -clm */ - data = kmap_atomic(un_bh->b_page, KM_USER0); + data = kmap_atomic(un_bh->b_page); off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); memcpy(data + off, B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), ret_value); - kunmap_atomic(data, KM_USER0); + kunmap_atomic(data); } /* Perform balancing after all resources have been collected at once. */ do_balance(&s_del_balance, NULL, NULL, M_DELETE); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index e12d8b97cd4d..8b7616ef06d8 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -16,9 +16,9 @@ #include <linux/vmalloc.h> #include <linux/time.h> #include <asm/uaccess.h> -#include <linux/reiserfs_fs.h> -#include <linux/reiserfs_acl.h> -#include <linux/reiserfs_xattr.h> +#include "reiserfs.h" +#include "acl.h" +#include "xattr.h" #include <linux/init.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> @@ -1874,11 +1874,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) unlock_new_inode(root_inode); } - s->s_root = d_alloc_root(root_inode); - if (!s->s_root) { - iput(root_inode); + s->s_root = d_make_root(root_inode); + if (!s->s_root) goto error; - } // define and initialize hash function sbi->s_hash_function = hash_function(s); if (sbi->s_hash_function == NULL) { diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index d7f6e51bef2a..5e2624d12f70 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c @@ -5,7 +5,7 @@ #include <linux/time.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" /* access to tail : when one is going to read tail it must make sure, that is not running. direct2indirect and indirect2direct can not run concurrently */ @@ -128,9 +128,9 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, if (up_to_date_bh) { unsigned pgoff = (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); - char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); + char *kaddr = kmap_atomic(up_to_date_bh->b_page); memset(kaddr + pgoff, 0, blk_size - total_tail); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } REISERFS_I(inode)->i_first_direct_byte = U32_MAX; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c24deda8a8bc..46fc1c20a6b1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -33,7 +33,7 @@ * The xattrs themselves are protected by the xattr_sem. */ -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/capability.h> #include <linux/dcache.h> #include <linux/namei.h> @@ -43,8 +43,8 @@ #include <linux/file.h> #include <linux/pagemap.h> #include <linux/xattr.h> -#include <linux/reiserfs_xattr.h> -#include <linux/reiserfs_acl.h> +#include "xattr.h" +#include "acl.h" #include <asm/uaccess.h> #include <net/checksum.h> #include <linux/stat.h> diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h new file mode 100644 index 000000000000..f59626c5d33b --- /dev/null +++ b/fs/reiserfs/xattr.h @@ -0,0 +1,122 @@ +#include <linux/reiserfs_xattr.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/rwsem.h> + +struct inode; +struct dentry; +struct iattr; +struct super_block; +struct nameidata; + +int reiserfs_xattr_register_handlers(void) __init; +void reiserfs_xattr_unregister_handlers(void); +int reiserfs_xattr_init(struct super_block *sb, int mount_flags); +int reiserfs_lookup_privroot(struct super_block *sb); +int reiserfs_delete_xattrs(struct inode *inode); +int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); +int reiserfs_permission(struct inode *inode, int mask); + +#ifdef CONFIG_REISERFS_FS_XATTR +#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) +ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size); +int reiserfs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int reiserfs_removexattr(struct dentry *dentry, const char *name); + +int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); +int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); +int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *, + struct inode *, const char *, const void *, + size_t, int); + +extern const struct xattr_handler reiserfs_xattr_user_handler; +extern const struct xattr_handler reiserfs_xattr_trusted_handler; +extern const struct xattr_handler reiserfs_xattr_security_handler; +#ifdef CONFIG_REISERFS_FS_SECURITY +int reiserfs_security_init(struct inode *dir, struct inode *inode, + const struct qstr *qstr, + struct reiserfs_security_handle *sec); +int reiserfs_security_write(struct reiserfs_transaction_handle *th, + struct inode *inode, + struct reiserfs_security_handle *sec); +void reiserfs_security_free(struct reiserfs_security_handle *sec); +#endif + +static inline int reiserfs_xattrs_initialized(struct super_block *sb) +{ + return REISERFS_SB(sb)->priv_root != NULL; +} + +#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header)) +static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) +{ + loff_t ret = 0; + if (reiserfs_file_data_log(inode)) { + ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize); + ret >>= inode->i_sb->s_blocksize_bits; + } + return ret; +} + +/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. + * Let's try to be smart about it. + * xattr root: We cache it. If it's not cached, we may need to create it. + * xattr dir: If anything has been loaded for this inode, we can set a flag + * saying so. + * xattr file: Since we don't cache xattrs, we can't tell. We always include + * blocks for it. + * + * However, since root and dir can be created between calls - YOU MUST SAVE + * THIS VALUE. + */ +static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) +{ + size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); + + if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { + nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); + if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode) + nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); + } + + return nblocks; +} + +static inline void reiserfs_init_xattr_rwsem(struct inode *inode) +{ + init_rwsem(&REISERFS_I(inode)->i_xattr_sem); +} + +#else + +#define reiserfs_getxattr NULL +#define reiserfs_setxattr NULL +#define reiserfs_listxattr NULL +#define reiserfs_removexattr NULL + +static inline void reiserfs_init_xattr_rwsem(struct inode *inode) +{ +} +#endif /* CONFIG_REISERFS_FS_XATTR */ + +#ifndef CONFIG_REISERFS_FS_SECURITY +static inline int reiserfs_security_init(struct inode *dir, + struct inode *inode, + const struct qstr *qstr, + struct reiserfs_security_handle *sec) +{ + return 0; +} +static inline int +reiserfs_security_write(struct reiserfs_transaction_handle *th, + struct inode *inode, + struct reiserfs_security_handle *sec) +{ + return 0; +} +static inline void reiserfs_security_free(struct reiserfs_security_handle *sec) +{} +#endif diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 6da0396e5052..44474f9b990d 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -1,14 +1,14 @@ #include <linux/capability.h> #include <linux/fs.h> #include <linux/posix_acl.h> -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/xattr.h> #include <linux/slab.h> #include <linux/posix_acl_xattr.h> -#include <linux/reiserfs_xattr.h> -#include <linux/reiserfs_acl.h> +#include "xattr.h" +#include "acl.h" #include <asm/uaccess.h> static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 534668fa41be..800a3cef6f62 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -1,10 +1,10 @@ -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/errno.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/xattr.h> #include <linux/slab.h> -#include <linux/reiserfs_xattr.h> +#include "xattr.h" #include <linux/security.h> #include <asm/uaccess.h> diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 9883736ce3ec..a0035719f66b 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -1,10 +1,10 @@ -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/capability.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/xattr.h> -#include <linux/reiserfs_xattr.h> +#include "xattr.h" #include <asm/uaccess.h> static int diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 45ae1a00013a..8667491ae7c3 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -1,9 +1,9 @@ -#include <linux/reiserfs_fs.h> +#include "reiserfs.h" #include <linux/errno.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/xattr.h> -#include <linux/reiserfs_xattr.h> +#include "xattr.h" #include <asm/uaccess.h> static int diff --git a/fs/romfs/super.c b/fs/romfs/super.c index bb36ab74eb45..e64f6b5f7ae5 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -538,14 +538,12 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(root)) goto error; - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) - goto error_i; + goto error; return 0; -error_i: - iput(root); error: return -EINVAL; error_rsb_inval: diff --git a/fs/select.c b/fs/select.c index d33418fdc858..e782258d0de3 100644 --- a/fs/select.c +++ b/fs/select.c @@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block) } SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, - long, timeout_msecs) + int, timeout_msecs) { struct timespec end_time, *to = NULL; int ret; diff --git a/fs/seq_file.c b/fs/seq_file.c index 4023d6be939b..aa242dc99373 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -140,9 +140,21 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) mutex_lock(&m->lock); + /* + * seq_file->op->..m_start/m_stop/m_next may do special actions + * or optimisations based on the file->f_version, so we want to + * pass the file->f_version to those methods. + * + * seq_file->version is just copy of f_version, and seq_file + * methods can treat it simply as file version. + * It is copied in first and copied out after all operations. + * It is convenient to have it as part of structure to avoid the + * need of passing another argument to all the seq_file methods. + */ + m->version = file->f_version; + /* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { - m->read_pos = *ppos; while ((err = traverse(m, *ppos)) == -EAGAIN) ; if (err) { @@ -152,21 +164,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) m->index = 0; m->count = 0; goto Done; + } else { + m->read_pos = *ppos; } } - /* - * seq_file->op->..m_start/m_stop/m_next may do special actions - * or optimisations based on the file->f_version, so we want to - * pass the file->f_version to those methods. - * - * seq_file->version is just copy of f_version, and seq_file - * methods can treat it simply as file version. - * It is copied in first and copied out after all operations. - * It is convenient to have it as part of structure to avoid the - * need of passing another argument to all the seq_file methods. - */ - m->version = file->f_version; /* grab buffer if we didn't have one */ if (!m->buf) { m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); diff --git a/fs/signalfd.c b/fs/signalfd.c index 492465b451dd..7ae2a574cb25 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -30,6 +30,21 @@ #include <linux/signalfd.h> #include <linux/syscalls.h> +void signalfd_cleanup(struct sighand_struct *sighand) +{ + wait_queue_head_t *wqh = &sighand->signalfd_wqh; + /* + * The lockless check can race with remove_wait_queue() in progress, + * but in this case its caller should run under rcu_read_lock() and + * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. + */ + if (likely(!waitqueue_active(wqh))) + return; + + /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */ + wake_up_poll(wqh, POLLHUP | POLLFREE); +} + struct signalfd_ctx { sigset_t sigmask; }; diff --git a/fs/splice.c b/fs/splice.c index 1ec0493266b3..f16402ed915c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -737,15 +737,12 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, goto out; if (buf->page != page) { - /* - * Careful, ->map() uses KM_USER0! - */ char *src = buf->ops->map(pipe, buf, 1); - char *dst = kmap_atomic(page, KM_USER1); + char *dst = kmap_atomic(page); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); - kunmap_atomic(dst, KM_USER1); + kunmap_atomic(dst); buf->ops->unmap(pipe, buf, src); } ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 38bb1c640559..8ca62c28fe12 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -464,10 +464,10 @@ static int squashfs_readpage(struct file *file, struct page *page) if (PageUptodate(push_page)) goto skip_page; - pageaddr = kmap_atomic(push_page, KM_USER0); + pageaddr = kmap_atomic(push_page); squashfs_copy_data(pageaddr, buffer, offset, avail); memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); - kunmap_atomic(pageaddr, KM_USER0); + kunmap_atomic(pageaddr); flush_dcache_page(push_page); SetPageUptodate(push_page); skip_page: @@ -484,9 +484,9 @@ skip_page: error_out: SetPageError(page); out: - pageaddr = kmap_atomic(page, KM_USER0); + pageaddr = kmap_atomic(page); memset(pageaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(pageaddr, KM_USER0); + kunmap_atomic(pageaddr); flush_dcache_page(page); if (!PageError(page)) SetPageUptodate(page); diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index ecaa2f7bdb8f..970b1167e7cb 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -316,11 +316,10 @@ check_directory_table: } insert_inode_hash(root); - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (sb->s_root == NULL) { ERROR("Root inode create failed\n"); err = -ENOMEM; - iput(root); goto failed_mount; } diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c index 1191817264cc..12806dffb345 100644 --- a/fs/squashfs/symlink.c +++ b/fs/squashfs/symlink.c @@ -90,14 +90,14 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page) goto error_out; } - pageaddr = kmap_atomic(page, KM_USER0); + pageaddr = kmap_atomic(page); copied = squashfs_copy_data(pageaddr + bytes, entry, offset, length - bytes); if (copied == length - bytes) memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length); else block = entry->next_index; - kunmap_atomic(pageaddr, KM_USER0); + kunmap_atomic(pageaddr); squashfs_cache_put(entry); } diff --git a/fs/stat.c b/fs/stat.c index 8806b8997d2e..86f13563a463 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -307,7 +307,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, if (inode->i_op->readlink) { error = security_inode_readlink(path.dentry); if (!error) { - touch_atime(path.mnt, path.dentry); + touch_atime(&path); error = inode->i_op->readlink(path.dentry, buf, bufsiz); } diff --git a/fs/super.c b/fs/super.c index 6015c02296b7..7fcb1354c554 100644 --- a/fs/super.c +++ b/fs/super.c @@ -32,6 +32,7 @@ #include <linux/backing-dev.h> #include <linux/rculist_bl.h> #include <linux/cleancache.h> +#include <linux/fsnotify.h> #include "internal.h" @@ -250,7 +251,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { - cleancache_flush_fs(s); + cleancache_invalidate_fs(s); fs->kill_sb(s); /* caches are now gone, we can safely kill the shrinker now */ @@ -634,6 +635,28 @@ rescan: EXPORT_SYMBOL(get_super); /** + * get_super_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen). %NULL is returned if no match + * is found. + */ +struct super_block *get_super_thawed(struct block_device *bdev) +{ + while (1) { + struct super_block *s = get_super(bdev); + if (!s || s->s_frozen == SB_UNFROZEN) + return s; + up_read(&s->s_umount); + vfs_check_frozen(s, SB_FREEZE_WRITE); + put_super(s); + } +} +EXPORT_SYMBOL(get_super_thawed); + +/** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for * diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 7fdf6a7b7436..2a7a3f5d1ca6 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -22,76 +22,103 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/security.h> +#include <linux/hash.h> #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); DEFINE_SPINLOCK(sysfs_assoc_lock); +#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb); + static DEFINE_SPINLOCK(sysfs_ino_lock); static DEFINE_IDA(sysfs_ino_ida); /** - * sysfs_link_sibling - link sysfs_dirent into sibling list + * sysfs_name_hash + * @ns: Namespace tag to hash + * @name: Null terminated string to hash + * + * Returns 31 bit hash of ns + name (so it fits in an off_t ) + */ +static unsigned int sysfs_name_hash(const void *ns, const char *name) +{ + unsigned long hash = init_name_hash(); + unsigned int len = strlen(name); + while (len--) + hash = partial_name_hash(*name++, hash); + hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) ); + hash &= 0x7fffffffU; + /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ + if (hash < 1) + hash += 2; + if (hash >= INT_MAX) + hash = INT_MAX - 1; + return hash; +} + +static int sysfs_name_compare(unsigned int hash, const void *ns, + const char *name, const struct sysfs_dirent *sd) +{ + if (hash != sd->s_hash) + return hash - sd->s_hash; + if (ns != sd->s_ns) + return ns - sd->s_ns; + return strcmp(name, sd->s_name); +} + +static int sysfs_sd_compare(const struct sysfs_dirent *left, + const struct sysfs_dirent *right) +{ + return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name, + right); +} + +/** + * sysfs_link_subling - link sysfs_dirent into sibling rbtree * @sd: sysfs_dirent of interest * - * Link @sd into its sibling list which starts from + * Link @sd into its sibling rbtree which starts from * sd->s_parent->s_dir.children. * * Locking: * mutex_lock(sysfs_mutex) + * + * RETURNS: + * 0 on susccess -EEXIST on failure. */ -static void sysfs_link_sibling(struct sysfs_dirent *sd) +static int sysfs_link_sibling(struct sysfs_dirent *sd) { - struct sysfs_dirent *parent_sd = sd->s_parent; - - struct rb_node **p; - struct rb_node *parent; + struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; + struct rb_node *parent = NULL; if (sysfs_type(sd) == SYSFS_DIR) - parent_sd->s_dir.subdirs++; - - p = &parent_sd->s_dir.inode_tree.rb_node; - parent = NULL; - while (*p) { - parent = *p; -#define node rb_entry(parent, struct sysfs_dirent, inode_node) - if (sd->s_ino < node->s_ino) { - p = &node->inode_node.rb_left; - } else if (sd->s_ino > node->s_ino) { - p = &node->inode_node.rb_right; - } else { - printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", - (unsigned long) sd->s_ino); - BUG(); - } -#undef node - } - rb_link_node(&sd->inode_node, parent, p); - rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree); - - p = &parent_sd->s_dir.name_tree.rb_node; - parent = NULL; - while (*p) { - int c; - parent = *p; -#define node rb_entry(parent, struct sysfs_dirent, name_node) - c = strcmp(sd->s_name, node->s_name); - if (c < 0) { - p = &node->name_node.rb_left; - } else { - p = &node->name_node.rb_right; - } -#undef node + sd->s_parent->s_dir.subdirs++; + + while (*node) { + struct sysfs_dirent *pos; + int result; + + pos = to_sysfs_dirent(*node); + parent = *node; + result = sysfs_sd_compare(sd, pos); + if (result < 0) + node = &pos->s_rb.rb_left; + else if (result > 0) + node = &pos->s_rb.rb_right; + else + return -EEXIST; } - rb_link_node(&sd->name_node, parent, p); - rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree); + /* add new node and rebalance the tree */ + rb_link_node(&sd->s_rb, parent, node); + rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); + return 0; } /** - * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list + * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree * @sd: sysfs_dirent of interest * - * Unlink @sd from its sibling list which starts from + * Unlink @sd from its sibling rbtree which starts from * sd->s_parent->s_dir.children. * * Locking: @@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) if (sysfs_type(sd) == SYSFS_DIR) sd->s_parent->s_dir.subdirs--; - rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree); - rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree); + rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); } /** @@ -198,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd) rwsem_release(&sd->dep_map, 1, _RET_IP_); } -static int sysfs_alloc_ino(ino_t *pino) +static int sysfs_alloc_ino(unsigned int *pino) { int ino, rc; @@ -217,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino) return rc; } -static void sysfs_free_ino(ino_t ino) +static void sysfs_free_ino(unsigned int ino) { spin_lock(&sysfs_ino_lock); ida_remove(&sysfs_ino_ida, ino); @@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; + int ret; if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", @@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) return -EINVAL; } - if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) - return -EEXIST; - + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); sd->s_parent = sysfs_get(acxt->parent_sd); - sysfs_link_sibling(sd); + ret = sysfs_link_sibling(sd); + if (ret) + return ret; /* Update timestamps on the parent */ ps_iattr = acxt->parent_sd->s_iattr; @@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, const void *ns, const unsigned char *name) { - struct rb_node *p = parent_sd->s_dir.name_tree.rb_node; - struct sysfs_dirent *found = NULL; + struct rb_node *node = parent_sd->s_dir.children.rb_node; + unsigned int hash; if (!!sysfs_ns_type(parent_sd) != !!ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", @@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, return NULL; } - while (p) { - int c; -#define node rb_entry(p, struct sysfs_dirent, name_node) - c = strcmp(name, node->s_name); - if (c < 0) { - p = node->name_node.rb_left; - } else if (c > 0) { - p = node->name_node.rb_right; - } else { - found = node; - p = node->name_node.rb_left; - } -#undef node - } - - if (found) { - while (found->s_ns != ns) { - p = rb_next(&found->name_node); - if (!p) - return NULL; - found = rb_entry(p, struct sysfs_dirent, name_node); - if (strcmp(name, found->s_name)) - return NULL; - } + hash = sysfs_name_hash(ns, name); + while (node) { + struct sysfs_dirent *sd; + int result; + + sd = to_sysfs_dirent(node); + result = sysfs_name_compare(hash, ns, name, sd); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return sd; } - - return found; + return NULL; } /** @@ -804,9 +819,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); sysfs_addrm_start(&acxt, dir_sd); - pos = rb_first(&dir_sd->s_dir.inode_tree); + pos = rb_first(&dir_sd->s_dir.children); while (pos) { - struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node); + struct sysfs_dirent *sd = to_sysfs_dirent(pos); pos = rb_next(pos); if (sysfs_type(sd) != SYSFS_DIR) sysfs_remove_one(&acxt, sd); @@ -863,6 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd, dup_name = sd->s_name; sd->s_name = new_name; + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); } /* Move to the appropriate place in the appropriate directories rbtree. */ @@ -919,38 +935,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp) } static struct sysfs_dirent *sysfs_dir_pos(const void *ns, - struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) + struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) { if (pos) { int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && pos->s_parent == parent_sd && - ino == pos->s_ino; + hash == pos->s_hash; sysfs_put(pos); if (!valid) pos = NULL; } - if (!pos && (ino > 1) && (ino < INT_MAX)) { - struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node; - while (p) { -#define node rb_entry(p, struct sysfs_dirent, inode_node) - if (ino < node->s_ino) { - pos = node; - p = node->inode_node.rb_left; - } else if (ino > node->s_ino) { - p = node->inode_node.rb_right; - } else { - pos = node; + if (!pos && (hash > 1) && (hash < INT_MAX)) { + struct rb_node *node = parent_sd->s_dir.children.rb_node; + while (node) { + pos = to_sysfs_dirent(node); + + if (hash < pos->s_hash) + node = node->rb_left; + else if (hash > pos->s_hash) + node = node->rb_right; + else break; - } -#undef node } } + /* Skip over entries in the wrong namespace */ while (pos && pos->s_ns != ns) { - struct rb_node *p = rb_next(&pos->inode_node); - if (!p) + struct rb_node *node = rb_next(&pos->s_rb); + if (!node) pos = NULL; else - pos = rb_entry(p, struct sysfs_dirent, inode_node); + pos = to_sysfs_dirent(node); } return pos; } @@ -960,11 +974,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, { pos = sysfs_dir_pos(ns, parent_sd, ino, pos); if (pos) do { - struct rb_node *p = rb_next(&pos->inode_node); - if (!p) + struct rb_node *node = rb_next(&pos->s_rb); + if (!node) pos = NULL; else - pos = rb_entry(p, struct sysfs_dirent, inode_node); + pos = to_sysfs_dirent(node); } while (pos && pos->s_ns != ns); return pos; } @@ -1006,7 +1020,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) len = strlen(name); ino = pos->s_ino; type = dt_type(pos); - filp->f_pos = ino; + filp->f_pos = pos->s_hash; filp->private_data = sysfs_get(pos); mutex_unlock(&sysfs_mutex); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 85eb81683a29..feb2d69396cf 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec void *old_secdata; size_t old_secdata_len; - iattrs = sd->s_iattr; - if (!iattrs) - iattrs = sysfs_init_inode_attrs(sd); - if (!iattrs) - return -ENOMEM; + if (!sd->s_iattr) { + sd->s_iattr = sysfs_init_inode_attrs(sd); + if (!sd->s_iattr) + return -ENOMEM; + } + iattrs = sd->s_iattr; old_secdata = iattrs->ia_secdata; old_secdata_len = iattrs->ia_secdata_len; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index e34f0d99ea4e..52c3bdb66a84 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = { .s_name = "", .s_count = ATOMIC_INIT(1), .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), - .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + .s_mode = S_IFDIR | S_IRUGO | S_IXUGO, .s_ino = 1, }; @@ -61,10 +61,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) } /* instantiate and link root dentry */ - root = d_alloc_root(inode); + root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n",__func__); - iput(inode); return -ENOMEM; } root->d_fsdata = &sysfs_root; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 7484a36ee678..661a9639570b 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -20,9 +20,8 @@ struct sysfs_elem_dir { struct kobject *kobj; unsigned long subdirs; - - struct rb_root inode_tree; - struct rb_root name_tree; + /* children rbtree starts here and goes through sd->s_rb */ + struct rb_root children; }; struct sysfs_elem_symlink { @@ -62,8 +61,7 @@ struct sysfs_dirent { struct sysfs_dirent *s_parent; const char *s_name; - struct rb_node inode_node; - struct rb_node name_node; + struct rb_node s_rb; union { struct completion *completion; @@ -71,6 +69,7 @@ struct sysfs_dirent { } u; const void *s_ns; /* namespace tag */ + unsigned int s_hash; /* ns + name hash */ union { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; @@ -78,9 +77,9 @@ struct sysfs_dirent { struct sysfs_elem_bin_attr s_bin_attr; }; - unsigned int s_flags; + unsigned short s_flags; umode_t s_mode; - ino_t s_ino; + unsigned int s_ino; struct sysfs_inode_attrs *s_iattr; }; @@ -95,11 +94,11 @@ struct sysfs_dirent { #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) /* identify any namespace tag on sysfs_dirents */ -#define SYSFS_NS_TYPE_MASK 0xff00 +#define SYSFS_NS_TYPE_MASK 0xf00 #define SYSFS_NS_TYPE_SHIFT 8 #define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) -#define SYSFS_FLAG_REMOVED 0x020000 +#define SYSFS_FLAG_REMOVED 0x02000 static inline unsigned int sysfs_type(struct sysfs_dirent *sd) { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index b217797e621b..d7466e293614 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -121,9 +121,6 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, { struct inode *inode = old_dentry->d_inode; - if (inode->i_nlink >= SYSV_SB(inode->i_sb)->s_link_max) - return -EMLINK; - inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); ihold(inode); @@ -134,10 +131,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) { struct inode * inode; - int err = -EMLINK; + int err; - if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max) - goto out; inode_inc_link_count(dir); inode = sysv_new_inode(dir, S_IFDIR|mode); @@ -251,11 +246,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) - goto out_dir; - } err = sysv_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/sysv/super.c b/fs/sysv/super.c index f60c196913ea..7491c33b6468 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -44,7 +44,7 @@ enum { JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60 }; -static void detected_xenix(struct sysv_sb_info *sbi) +static void detected_xenix(struct sysv_sb_info *sbi, unsigned *max_links) { struct buffer_head *bh1 = sbi->s_bh1; struct buffer_head *bh2 = sbi->s_bh2; @@ -59,7 +59,7 @@ static void detected_xenix(struct sysv_sb_info *sbi) sbd2 = (struct xenix_super_block *) (bh2->b_data - 512); } - sbi->s_link_max = XENIX_LINK_MAX; + *max_links = XENIX_LINK_MAX; sbi->s_fic_size = XENIX_NICINOD; sbi->s_flc_size = XENIX_NICFREE; sbi->s_sbd1 = (char *)sbd1; @@ -75,7 +75,7 @@ static void detected_xenix(struct sysv_sb_info *sbi) sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize); } -static void detected_sysv4(struct sysv_sb_info *sbi) +static void detected_sysv4(struct sysv_sb_info *sbi, unsigned *max_links) { struct sysv4_super_block * sbd; struct buffer_head *bh1 = sbi->s_bh1; @@ -86,7 +86,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi) else sbd = (struct sysv4_super_block *) bh2->b_data; - sbi->s_link_max = SYSV_LINK_MAX; + *max_links = SYSV_LINK_MAX; sbi->s_fic_size = SYSV_NICINOD; sbi->s_flc_size = SYSV_NICFREE; sbi->s_sbd1 = (char *)sbd; @@ -103,7 +103,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi) sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); } -static void detected_sysv2(struct sysv_sb_info *sbi) +static void detected_sysv2(struct sysv_sb_info *sbi, unsigned *max_links) { struct sysv2_super_block *sbd; struct buffer_head *bh1 = sbi->s_bh1; @@ -114,7 +114,7 @@ static void detected_sysv2(struct sysv_sb_info *sbi) else sbd = (struct sysv2_super_block *) bh2->b_data; - sbi->s_link_max = SYSV_LINK_MAX; + *max_links = SYSV_LINK_MAX; sbi->s_fic_size = SYSV_NICINOD; sbi->s_flc_size = SYSV_NICFREE; sbi->s_sbd1 = (char *)sbd; @@ -131,14 +131,14 @@ static void detected_sysv2(struct sysv_sb_info *sbi) sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); } -static void detected_coherent(struct sysv_sb_info *sbi) +static void detected_coherent(struct sysv_sb_info *sbi, unsigned *max_links) { struct coh_super_block * sbd; struct buffer_head *bh1 = sbi->s_bh1; sbd = (struct coh_super_block *) bh1->b_data; - sbi->s_link_max = COH_LINK_MAX; + *max_links = COH_LINK_MAX; sbi->s_fic_size = COH_NICINOD; sbi->s_flc_size = COH_NICFREE; sbi->s_sbd1 = (char *)sbd; @@ -154,12 +154,12 @@ static void detected_coherent(struct sysv_sb_info *sbi) sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); } -static void detected_v7(struct sysv_sb_info *sbi) +static void detected_v7(struct sysv_sb_info *sbi, unsigned *max_links) { struct buffer_head *bh2 = sbi->s_bh2; struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data; - sbi->s_link_max = V7_LINK_MAX; + *max_links = V7_LINK_MAX; sbi->s_fic_size = V7_NICINOD; sbi->s_flc_size = V7_NICFREE; sbi->s_sbd1 = (char *)sbd; @@ -290,7 +290,7 @@ static char *flavour_names[] = { [FSTYPE_AFS] = "AFS", }; -static void (*flavour_setup[])(struct sysv_sb_info *) = { +static void (*flavour_setup[])(struct sysv_sb_info *, unsigned *) = { [FSTYPE_XENIX] = detected_xenix, [FSTYPE_SYSV4] = detected_sysv4, [FSTYPE_SYSV2] = detected_sysv2, @@ -310,7 +310,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) sbi->s_firstinodezone = 2; - flavour_setup[sbi->s_type](sbi); + flavour_setup[sbi->s_type](sbi, &sb->s_max_links); sbi->s_truncate = 1; sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone; @@ -341,9 +341,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size) printk("SysV FS: get root inode failed\n"); return 0; } - sb->s_root = d_alloc_root(root_inode); + sb->s_root = d_make_root(root_inode); if (!sb->s_root) { - iput(root_inode); printk("SysV FS: get root dentry failed\n"); return 0; } diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 0e4b821c5691..11b07672f6c5 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -24,7 +24,6 @@ struct sysv_sb_info { char s_bytesex; /* bytesex (le/be/pdp) */ char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */ /* if 0: they are disallowed (ENAMETOOLONG) */ - nlink_t s_link_max; /* max number of hard links to a file */ unsigned int s_inodes_per_block; /* number of inodes per block */ unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */ unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */ diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f9c234bf33d3..5c8f6dc1d28b 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1042,10 +1042,10 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (i_size > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 63765d58445b..76e4e0566ad6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2076,15 +2076,13 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_umount; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) - goto out_iput; + goto out_umount; mutex_unlock(&c->umount_mutex); return 0; -out_iput: - iput(root); out_umount: ubifs_umount(c); out_unlock: diff --git a/fs/udf/file.c b/fs/udf/file.c index dca0c3881e82..7f3f7ba3df6e 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -87,10 +87,10 @@ static int udf_adinicb_write_end(struct file *file, char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, kaddr + offset, copied); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return simple_write_end(file, mapping, pos, len, copied, page, fsdata); } @@ -201,12 +201,10 @@ out: static int udf_release_file(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE) { - mutex_lock(&inode->i_mutex); down_write(&UDF_I(inode)->i_data_sem); udf_discard_prealloc(inode); udf_truncate_tail_extent(inode); up_write(&UDF_I(inode)->i_data_sem); - mutex_unlock(&inode->i_mutex); } return 0; } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 08bf46edf9c4..38de8f234b94 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -32,8 +32,6 @@ #include <linux/crc-itu-t.h> #include <linux/exportfs.h> -enum { UDF_MAX_LINKS = 0xffff }; - static inline int udf_match(int len1, const unsigned char *name1, int len2, const unsigned char *name2) { @@ -649,10 +647,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct udf_inode_info *dinfo = UDF_I(dir); struct udf_inode_info *iinfo; - err = -EMLINK; - if (dir->i_nlink >= UDF_MAX_LINKS) - goto out; - err = -EIO; inode = udf_new_inode(dir, S_IFDIR | mode, &err); if (!inode) @@ -1032,9 +1026,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, struct fileIdentDesc cfi, *fi; int err; - if (inode->i_nlink >= UDF_MAX_LINKS) - return -EMLINK; - fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (!fi) { return err; @@ -1126,10 +1117,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) != old_dir->i_ino) goto end_rename; - - retval = -EMLINK; - if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS) - goto end_rename; } if (!nfi) { nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, diff --git a/fs/udf/super.c b/fs/udf/super.c index c09a84daaf50..85067b4c7e14 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -75,6 +75,8 @@ #define UDF_DEFAULT_BLOCKSIZE 2048 +enum { UDF_MAX_LINKS = 0xffff }; + /* These are the "meat" - everything else is stuffing */ static int udf_fill_super(struct super_block *, void *, int); static void udf_put_super(struct super_block *); @@ -2035,13 +2037,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) } /* Allocate a dentry for the root inode */ - sb->s_root = d_alloc_root(inode); + sb->s_root = d_make_root(inode); if (!sb->s_root) { udf_err(sb, "Couldn't allocate root dentry\n"); - iput(inode); goto error_out; } sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_max_links = UDF_MAX_LINKS; return 0; error_out: diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 38cac199edff..a2281cadefa1 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -166,10 +166,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, int error; lock_ufs(dir->i_sb); - if (inode->i_nlink >= UFS_LINK_MAX) { - unlock_ufs(dir->i_sb); - return -EMLINK; - } inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); @@ -183,10 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) { struct inode * inode; - int err = -EMLINK; - - if (dir->i_nlink >= UFS_LINK_MAX) - goto out; + int err; lock_ufs(dir->i_sb); inode_inc_link_count(dir); @@ -305,11 +298,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= UFS_LINK_MAX) - goto out_dir; - } err = ufs_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 5246ee3e5607..f636f6b460d0 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1157,16 +1157,17 @@ magic_found: "fast symlink size (%u)\n", uspi->s_maxsymlinklen); uspi->s_maxsymlinklen = maxsymlen; } + sb->s_max_links = UFS_LINK_MAX; inode = ufs_iget(sb, UFS_ROOTINO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto failed; } - sb->s_root = d_alloc_root(inode); + sb->s_root = d_make_root(inode); if (!sb->s_root) { ret = -ENOMEM; - goto dalloc_failed; + goto failed; } ufs_setup_cstotal(sb); @@ -1180,8 +1181,6 @@ magic_found: UFSD("EXIT\n"); return 0; -dalloc_failed: - iput(inode); failed: if (ubh) ubh_brelse_uspi (uspi); diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 292eff198030..ab7c53fe346e 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone) extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -static inline int -kmem_shake_allow(gfp_t gfp_mask) -{ - return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); -} - #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4ff40b5f918..53db20ee3e77 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33; static struct lock_class_key xfs_dquot_other_class; /* - * Allocate and initialize a dquot. We don't always allocate fresh memory; - * we try to reclaim a free dquot if the number of incore dquots are above - * a threshold. - * The only field inside the core that gets initialized at this point - * is the d_id field. The idea is to fill in the entire q_core - * when we read in the on disk dquot. - */ -STATIC xfs_dquot_t * -xfs_qm_dqinit( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type) -{ - xfs_dquot_t *dqp; - boolean_t brandnewdquot; - - brandnewdquot = xfs_qm_dqalloc_incore(&dqp); - dqp->dq_flags = type; - dqp->q_core.d_id = cpu_to_be32(id); - dqp->q_mount = mp; - - /* - * No need to re-initialize these if this is a reclaimed dquot. - */ - if (brandnewdquot) { - INIT_LIST_HEAD(&dqp->q_freelist); - mutex_init(&dqp->q_qlock); - init_waitqueue_head(&dqp->q_pinwait); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&dqp->q_flush); - complete(&dqp->q_flush); - - trace_xfs_dqinit(dqp); - } else { - /* - * Only the q_core portion was zeroed in dqreclaim_one(). - * So, we need to reset others. - */ - dqp->q_nrefs = 0; - dqp->q_blkno = 0; - INIT_LIST_HEAD(&dqp->q_mplist); - INIT_LIST_HEAD(&dqp->q_hashlist); - dqp->q_bufoffset = 0; - dqp->q_fileoffset = 0; - dqp->q_transp = NULL; - dqp->q_gdquot = NULL; - dqp->q_res_bcount = 0; - dqp->q_res_icount = 0; - dqp->q_res_rtbcount = 0; - atomic_set(&dqp->q_pincount, 0); - dqp->q_hash = NULL; - ASSERT(list_empty(&dqp->q_freelist)); - - trace_xfs_dqreuse(dqp); - } - - /* - * In either case we need to make sure group quotas have a different - * lock class than user quotas, to make sure lockdep knows we can - * locks of one of each at the same time. - */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - - /* - * log item gets initialized later - */ - return (dqp); -} - -/* * This is called to free all the memory associated with a dquot */ void @@ -215,10 +139,10 @@ xfs_qm_adjust_dqtimers( if (!d->d_btimer) { if ((d->d_blk_softlimit && - (be64_to_cpu(d->d_bcount) >= + (be64_to_cpu(d->d_bcount) > be64_to_cpu(d->d_blk_softlimit))) || (d->d_blk_hardlimit && - (be64_to_cpu(d->d_bcount) >= + (be64_to_cpu(d->d_bcount) > be64_to_cpu(d->d_blk_hardlimit)))) { d->d_btimer = cpu_to_be32(get_seconds() + mp->m_quotainfo->qi_btimelimit); @@ -227,10 +151,10 @@ xfs_qm_adjust_dqtimers( } } else { if ((!d->d_blk_softlimit || - (be64_to_cpu(d->d_bcount) < + (be64_to_cpu(d->d_bcount) <= be64_to_cpu(d->d_blk_softlimit))) && (!d->d_blk_hardlimit || - (be64_to_cpu(d->d_bcount) < + (be64_to_cpu(d->d_bcount) <= be64_to_cpu(d->d_blk_hardlimit)))) { d->d_btimer = 0; } @@ -238,10 +162,10 @@ xfs_qm_adjust_dqtimers( if (!d->d_itimer) { if ((d->d_ino_softlimit && - (be64_to_cpu(d->d_icount) >= + (be64_to_cpu(d->d_icount) > be64_to_cpu(d->d_ino_softlimit))) || (d->d_ino_hardlimit && - (be64_to_cpu(d->d_icount) >= + (be64_to_cpu(d->d_icount) > be64_to_cpu(d->d_ino_hardlimit)))) { d->d_itimer = cpu_to_be32(get_seconds() + mp->m_quotainfo->qi_itimelimit); @@ -250,10 +174,10 @@ xfs_qm_adjust_dqtimers( } } else { if ((!d->d_ino_softlimit || - (be64_to_cpu(d->d_icount) < + (be64_to_cpu(d->d_icount) <= be64_to_cpu(d->d_ino_softlimit))) && (!d->d_ino_hardlimit || - (be64_to_cpu(d->d_icount) < + (be64_to_cpu(d->d_icount) <= be64_to_cpu(d->d_ino_hardlimit)))) { d->d_itimer = 0; } @@ -261,10 +185,10 @@ xfs_qm_adjust_dqtimers( if (!d->d_rtbtimer) { if ((d->d_rtb_softlimit && - (be64_to_cpu(d->d_rtbcount) >= + (be64_to_cpu(d->d_rtbcount) > be64_to_cpu(d->d_rtb_softlimit))) || (d->d_rtb_hardlimit && - (be64_to_cpu(d->d_rtbcount) >= + (be64_to_cpu(d->d_rtbcount) > be64_to_cpu(d->d_rtb_hardlimit)))) { d->d_rtbtimer = cpu_to_be32(get_seconds() + mp->m_quotainfo->qi_rtbtimelimit); @@ -273,10 +197,10 @@ xfs_qm_adjust_dqtimers( } } else { if ((!d->d_rtb_softlimit || - (be64_to_cpu(d->d_rtbcount) < + (be64_to_cpu(d->d_rtbcount) <= be64_to_cpu(d->d_rtb_softlimit))) && (!d->d_rtb_hardlimit || - (be64_to_cpu(d->d_rtbcount) < + (be64_to_cpu(d->d_rtbcount) <= be64_to_cpu(d->d_rtb_hardlimit)))) { d->d_rtbtimer = 0; } @@ -567,7 +491,32 @@ xfs_qm_dqread( int error; int cancelflags = 0; - dqp = xfs_qm_dqinit(mp, id, type); + + dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + INIT_LIST_HEAD(&dqp->q_freelist); + mutex_init(&dqp->q_qlock); + init_waitqueue_head(&dqp->q_pinwait); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + + /* + * Make sure group quotas have a different lock class than user + * quotas. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + + atomic_inc(&xfs_Gqm->qm_totaldquots); trace_xfs_dqread(dqp); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 541a508adea1..0ed9ee77937c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; @@ -1981,7 +1981,7 @@ xfs_qm_dqcheck( if (!errs && ddq->d_id) { if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) >= + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit)) { if (!ddq->d_btimer) { if (flags & XFS_QMOPT_DOWARN) @@ -1992,7 +1992,7 @@ xfs_qm_dqcheck( } } if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) >= + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit)) { if (!ddq->d_itimer) { if (flags & XFS_QMOPT_DOWARN) @@ -2003,7 +2003,7 @@ xfs_qm_dqcheck( } } if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) >= + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit)) { if (!ddq->d_rtbtimer) { if (flags & XFS_QMOPT_DOWARN) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 671f37eae1c7..c436def733bf 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -50,7 +50,6 @@ */ struct mutex xfs_Gqm_lock; struct xfs_qm *xfs_Gqm; -uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; @@ -93,7 +92,6 @@ xfs_Gqm_init(void) goto out_free_udqhash; hsize /= sizeof(xfs_dqhash_t); - ndquot = hsize << 8; xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); xqm->qm_dqhashmask = hsize - 1; @@ -137,7 +135,6 @@ xfs_Gqm_init(void) xqm->qm_dqtrxzone = qm_dqtrxzone; atomic_set(&xqm->qm_totaldquots, 0); - xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; return xqm; @@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos( return 0; } +STATIC void +xfs_qm_dqfree_one( + struct xfs_dquot *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_quotainfo *qi = mp->m_quotainfo; + mutex_lock(&dqp->q_hash->qh_lock); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + mutex_unlock(&dqp->q_hash->qh_lock); -/* - * Pop the least recently used dquot off the freelist and recycle it. - */ -STATIC struct xfs_dquot * -xfs_qm_dqreclaim_one(void) + mutex_lock(&qi->qi_dqlist_lock); + list_del_init(&dqp->q_mplist); + qi->qi_dquots--; + qi->qi_dqreclaims++; + mutex_unlock(&qi->qi_dqlist_lock); + + xfs_qm_dqdestroy(dqp); +} + +STATIC void +xfs_qm_dqreclaim_one( + struct xfs_dquot *dqp, + struct list_head *dispose_list) { - struct xfs_dquot *dqp; - int restarts = 0; + struct xfs_mount *mp = dqp->q_mount; + int error; - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); -restart: - list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { - struct xfs_mount *mp = dqp->q_mount; + if (!xfs_dqlock_nowait(dqp)) + goto out_busy; - if (!xfs_dqlock_nowait(dqp)) - continue; + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); - /* - * This dquot has already been grabbed by dqlookup. - * Remove it from the freelist and try again. - */ - if (dqp->q_nrefs) { - trace_xfs_dqreclaim_want(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - restarts++; - goto dqunlock; - } + trace_xfs_dqreclaim_want(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + return; + } - /* - * Try to grab the flush lock. If this dquot is in the process - * of getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto dqunlock; + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; + /* + * Try to grab the flush lock. If this dquot is in the process of + * getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) + goto out_busy; - trace_xfs_dqreclaim_dirty(dqp); + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + trace_xfs_dqreclaim_dirty(dqp); - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - } - goto dqunlock; + /* + * We flush it delayed write, so don't bother releasing the + * freelist lock. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) { + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); } - xfs_dqfunlock(dqp); /* - * Prevent lookup now that we are going to reclaim the dquot. - * Once XFS_DQ_FREEING is set lookup won't touch the dquot, - * thus we can drop the lock now. + * Give the dquot another try on the freelist, as the + * flushing will take some time. */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - mutex_lock(&dqp->q_hash->qh_lock); - list_del_init(&dqp->q_hashlist); - dqp->q_hash->qh_version++; - mutex_unlock(&dqp->q_hash->qh_lock); - - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + goto out_busy; + } + xfs_dqfunlock(dqp); - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return dqp; -dqunlock: - xfs_dqunlock(dqp); - if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - break; - goto restart; - } + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_freelist, dispose_list); + xfs_Gqm->qm_dqfrlist_cnt--; - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return NULL; -} + trace_xfs_dqreclaim_done(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + return; -/* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - */ -STATIC int -xfs_qm_shake_freelist( - int howmany) -{ - int nreclaimed = 0; - xfs_dquot_t *dqp; +out_busy: + xfs_dqunlock(dqp); - if (howmany <= 0) - return 0; + /* + * Move the dquot to the tail of the list so that we don't spin on it. + */ + list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); - while (nreclaimed < howmany) { - dqp = xfs_qm_dqreclaim_one(); - if (!dqp) - return nreclaimed; - xfs_qm_dqdestroy(dqp); - nreclaimed++; - } - return nreclaimed; + trace_xfs_dqreclaim_busy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); } -/* - * The kmem_shake interface is invoked when memory is running low. - */ -/* ARGSUSED */ STATIC int xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) + struct shrinker *shrink, + struct shrink_control *sc) { - int ndqused, nfree, n; - gfp_t gfp_mask = sc->gfp_mask; - - if (!kmem_shake_allow(gfp_mask)) - return 0; - if (!xfs_Gqm) - return 0; - - nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ - /* incore dquots in all f/s's */ - ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; - - ASSERT(ndqused >= 0); + int nr_to_scan = sc->nr_to_scan; + LIST_HEAD (dispose_list); + struct xfs_dquot *dqp; - if (nfree <= ndqused && nfree < ndquot) + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) return 0; + if (!nr_to_scan) + goto out; - ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ - n = nfree - ndqused - ndquot; /* # over target */ - - return xfs_qm_shake_freelist(MAX(nfree, n)); -} - - -/*------------------------------------------------------------------*/ - -/* - * Return a new incore dquot. Depending on the number of - * dquots in the system, we either allocate a new one on the kernel heap, - * or reclaim a free one. - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed - * to reclaim an existing one from the freelist. - */ -boolean_t -xfs_qm_dqalloc_incore( - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - /* - * Check against high water mark to see if we want to pop - * a nincompoop dquot off the freelist. - */ - if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { - /* - * Try to recycle a dquot from the freelist. - */ - if ((dqp = xfs_qm_dqreclaim_one())) { - XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); - /* - * Just zero the core here. The rest will get - * reinitialized by caller. XXX we shouldn't even - * do this zero ... - */ - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - *O_dqpp = dqp; - return B_FALSE; - } - XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { + if (nr_to_scan-- <= 0) + break; + dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, + q_freelist); + xfs_qm_dqreclaim_one(dqp, &dispose_list); } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - /* - * Allocate a brand new dquot on the kernel heap and return it - * to the caller to initialize. - */ - ASSERT(xfs_Gqm->qm_dqzone != NULL); - *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); - atomic_inc(&xfs_Gqm->qm_totaldquots); - - return B_TRUE; + while (!list_empty(&dispose_list)) { + dqp = list_first_entry(&dispose_list, struct xfs_dquot, + q_freelist); + list_del_init(&dqp->q_freelist); + xfs_qm_dqfree_one(dqp); + } +out: + return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; } - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9b4f3adefbc5..9a9b997e1a0a 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -26,24 +26,12 @@ struct xfs_qm; struct xfs_inode; -extern uint ndquot; extern struct mutex xfs_Gqm_lock; extern struct xfs_qm *xfs_Gqm; extern kmem_zone_t *qm_dqzone; extern kmem_zone_t *qm_dqtrxzone; /* - * Ditto, for xfs_qm_dqreclaim_one. - */ -#define XFS_QM_RECLAIM_MAX_RESTARTS 4 - -/* - * Ideal ratio of free to in use dquots. Quota manager makes an attempt - * to keep this balance. - */ -#define XFS_QM_DQFREE_RATIO 2 - -/* * Dquot hashtable constants/threshold values. */ #define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) @@ -74,7 +62,6 @@ typedef struct xfs_qm { int qm_dqfrlist_cnt; atomic_t qm_totaldquots; /* total incore dquots */ uint qm_nrefs; /* file systems with quota on */ - int qm_dqfree_ratio;/* ratio of free to inuse dquots */ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ } xfs_qm_t; @@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); /* dquot stuff */ -extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c index 8671a0b32644..5729ba570877 100644 --- a/fs/xfs/xfs_qm_stats.c +++ b/fs/xfs/xfs_qm_stats.c @@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - ndquot, + 0, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, - xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + 0, xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); return 0; } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index eafbcff81f3a..711a86e39ff0 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -813,11 +813,11 @@ xfs_qm_export_dquot( (XFS_IS_OQUOTA_ENFORCED(mp) && (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && dst->d_id != 0) { - if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && + if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { ASSERT(dst->d_btimer != 0); } - if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && + if (((int) dst->d_icount > (int) dst->d_ino_softlimit) && (dst->d_ino_softlimit > 0)) { ASSERT(dst->d_itimer != 0); } diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 866de277079a..e44ef7ee8ce8 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -118,17 +118,6 @@ xfs_rename( new_parent = (src_dp != target_dp); src_is_directory = S_ISDIR(src_ip->i_d.di_mode); - if (src_is_directory) { - /* - * Check for link count overflow on target_dp - */ - if (target_ip == NULL && new_parent && - target_dp->i_d.di_nlink >= XFS_MAXLINK) { - error = XFS_ERROR(EMLINK); - goto std_return; - } - } - xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, inodes, &num_inodes); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ee5b695c99a7..baf40e378d35 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1341,6 +1341,7 @@ xfs_fs_fill_super( sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); + sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; set_posix_acl_flag(sb); @@ -1361,10 +1362,10 @@ xfs_fs_fill_super( error = EINVAL; goto out_syncd_stop; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { error = ENOMEM; - goto out_iput; + goto out_syncd_stop; } return 0; @@ -1383,8 +1384,6 @@ xfs_fs_fill_super( out: return -error; - out_iput: - iput(root); out_syncd_stop: xfs_syncd_stop(mp); out_unmount: diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6b6df5802e95..bb134a819930 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \ DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_done); DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_get); -DEFINE_DQUOT_EVENT(xfs_dqinit); -DEFINE_DQUOT_EVENT(xfs_dqreuse); DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqread); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 329b06aba1c2..7adcdf15ae0c 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1151,8 +1151,8 @@ xfs_trans_add_item( { struct xfs_log_item_desc *lidp; - ASSERT(lip->li_mountp = tp->t_mountp); - ASSERT(lip->li_ailp = tp->t_mountp->m_ail); + ASSERT(lip->li_mountp == tp->t_mountp); + ASSERT(lip->li_ailp == tp->t_mountp->m_ail); lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 4d00ee67792d..c4ba366d24e6 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -649,12 +649,12 @@ xfs_trans_dqresv( * nblks. */ if (hardlimit > 0ULL && - hardlimit <= nblks + *resbcountp) { + hardlimit < nblks + *resbcountp) { xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); goto error_return; } if (softlimit > 0ULL && - softlimit <= nblks + *resbcountp) { + softlimit < nblks + *resbcountp) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, @@ -677,11 +677,13 @@ xfs_trans_dqresv( if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit > 0ULL && count >= hardlimit) { + if (hardlimit > 0ULL && + hardlimit < ninos + count) { xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); goto error_return; } - if (softlimit > 0ULL && count >= softlimit) { + if (softlimit > 0ULL && + softlimit < ninos + count) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 89dbb4a50872..79c05ac85bfe 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -296,8 +296,6 @@ xfs_bumplink( xfs_trans_t *tp, xfs_inode_t *ip) { - if (ip->i_d.di_nlink >= XFS_MAXLINK) - return XFS_ERROR(EMLINK); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); ASSERT(ip->i_d.di_nlink > 0); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index ebdb88840a47..64981d7e7375 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -917,14 +917,6 @@ xfs_create( xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = B_TRUE; - /* - * Check for directory link count overflow. - */ - if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) { - error = XFS_ERROR(EMLINK); - goto out_trans_cancel; - } - xfs_bmap_init(&free_list, &first_block); /* @@ -1429,14 +1421,6 @@ xfs_link( xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); /* - * If the source has too many links, we can't make any more to it. - */ - if (sip->i_d.di_nlink >= XFS_MAXLINK) { - error = XFS_ERROR(EMLINK); - goto error_return; - } - - /* * If we are using project inheritance, we only allow hard link * creation in our tree when the project IDs are the same; else * the tree quota mechanism could be circumvented. |