diff options
Diffstat (limited to 'fs')
95 files changed, 1341 insertions, 1723 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 9ee534159cc6..42e102e2e74a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -823,28 +823,21 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ERR(dfid)) return ERR_CAST(dfid); - name = dentry->d_name.name; - fid = p9_client_walk(dfid, 1, &name, 1); - if (IS_ERR(fid)) { - if (fid == ERR_PTR(-ENOENT)) { - d_add(dentry, NULL); - return NULL; - } - return ERR_CAST(fid); - } /* * Make sure we don't use a wrong inode due to parallel * unlink. For cached mode create calls request for new * inode. But with cache disabled, lookup should do this. */ - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) + name = dentry->d_name.name; + fid = p9_client_walk(dfid, 1, &name, 1); + if (fid == ERR_PTR(-ENOENT)) + inode = NULL; + else if (IS_ERR(fid)) + inode = ERR_CAST(fid); + else if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); else inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); - if (IS_ERR(inode)) { - p9_client_clunk(fid); - return ERR_CAST(inode); - } /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with @@ -853,12 +846,14 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, * k/b. */ res = d_splice_alias(inode, dentry); - if (!res) - v9fs_fid_add(dentry, fid); - else if (!IS_ERR(res)) - v9fs_fid_add(res, fid); - else - p9_client_clunk(fid); + if (!IS_ERR(fid)) { + if (!res) + v9fs_fid_add(dentry, fid); + else if (!IS_ERR(res)) + v9fs_fid_add(res, fid); + else + p9_client_clunk(fid); + } return res; } diff --git a/fs/Kconfig b/fs/Kconfig index bc821a86d965..ac4ac908f001 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -196,7 +196,7 @@ config HUGETLBFS help hugetlbfs is a filesystem backing for HugeTLB pages, based on ramfs. For architectures that support it, say Y here and read - <file:Documentation/vm/hugetlbpage.txt> for details. + <file:Documentation/admin-guide/mm/hugetlbpage.rst> for details. If unsure, say N. diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 29444c83da48..e18eff854e1a 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -146,20 +146,6 @@ adfs_dir_lookup_byname(struct inode *inode, const struct qstr *name, struct obje obj->parent_id = inode->i_ino; - /* - * '.' is handled by reserved_lookup() in fs/namei.c - */ - if (name->len == 2 && name->name[0] == '.' && name->name[1] == '.') { - /* - * Currently unable to fill in the rest of 'obj', - * but this is better than nothing. We need to - * ascend one level to find it's parent. - */ - obj->name_len = 0; - obj->file_id = obj->parent_id; - goto free_out; - } - read_lock(&adfs_dir_lock); ret = ops->setpos(&dir, 0); @@ -266,17 +252,17 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { - error = -EACCES; /* * This only returns NULL if get_empty_inode * fails. */ inode = adfs_iget(dir->i_sb, &obj); - if (inode) - error = 0; + if (!inode) + inode = ERR_PTR(-EACCES); + } else if (error != -ENOENT) { + inode = ERR_PTR(error); } - d_add(dentry, inode); - return ERR_PTR(error); + return d_splice_alias(inode, dentry); } /* diff --git a/fs/affs/super.c b/fs/affs/super.c index e602619aed9d..d1ad11a8a4a5 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -241,6 +241,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, affs_set_opt(*mount_opts, SF_NO_TRUNCATE); break; case Opt_prefix: + kfree(*prefix); *prefix = match_strdup(&args[0]); if (!*prefix) return 0; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 839a22280606..3aad32762989 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -62,7 +62,6 @@ static const struct file_operations afs_proc_rootcell_fops = { .llseek = no_llseek, }; -static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos); static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *pos); @@ -76,15 +75,6 @@ static const struct seq_operations afs_proc_cell_volumes_ops = { .show = afs_proc_cell_volumes_show, }; -static const struct file_operations afs_proc_cell_volumes_fops = { - .open = afs_proc_cell_volumes_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int afs_proc_cell_vlservers_open(struct inode *inode, - struct file *file); static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos); static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *pos); @@ -98,14 +88,6 @@ static const struct seq_operations afs_proc_cell_vlservers_ops = { .show = afs_proc_cell_vlservers_show, }; -static const struct file_operations afs_proc_cell_vlservers_fops = { - .open = afs_proc_cell_vlservers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int afs_proc_servers_open(struct inode *inode, struct file *file); static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); static void *afs_proc_servers_next(struct seq_file *p, void *v, loff_t *pos); @@ -119,13 +101,6 @@ static const struct seq_operations afs_proc_servers_ops = { .show = afs_proc_servers_show, }; -static const struct file_operations afs_proc_servers_fops = { - .open = afs_proc_servers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int afs_proc_sysname_open(struct inode *inode, struct file *file); static int afs_proc_sysname_release(struct inode *inode, struct file *file); static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos); @@ -152,7 +127,7 @@ static const struct file_operations afs_proc_sysname_fops = { .write = afs_proc_sysname_write, }; -static const struct file_operations afs_proc_stats_fops; +static int afs_proc_stats_show(struct seq_file *m, void *v); /* * initialise the /proc/fs/afs/ directory @@ -167,8 +142,8 @@ int afs_proc_init(struct afs_net *net) if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || - !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) || - !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) || + !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) || + !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) || !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops)) goto error_tree; @@ -196,16 +171,7 @@ void afs_proc_cleanup(struct afs_net *net) */ static int afs_proc_cells_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - - ret = seq_open(file, &afs_proc_cells_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = PDE_DATA(inode); - return 0; + return seq_open(file, &afs_proc_cells_ops); } /* @@ -430,10 +396,11 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) if (!dir) goto error_dir; - if (!proc_create_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_fops, cell) || - !proc_create_data("volumes", 0, dir, - &afs_proc_cell_volumes_fops, cell)) + if (!proc_create_seq_data("vlservers", 0, dir, + &afs_proc_cell_vlservers_ops, cell)) + goto error_tree; + if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops, + cell)) goto error_tree; _leave(" = 0"); @@ -459,36 +426,13 @@ void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell) } /* - * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells - */ -static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file) -{ - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_volumes_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = cell; - - return 0; -} - -/* * set up the iterator to start reading from the cells list and return the * first item */ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) __acquires(cell->proc_lock) { - struct afs_cell *cell = m->private; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); _enter("cell=%p pos=%Ld", cell, *_pos); @@ -502,7 +446,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, loff_t *_pos) { - struct afs_cell *cell = p->private; + struct afs_cell *cell = PDE_DATA(file_inode(p->file)); _enter("cell=%p pos=%Ld", cell, *_pos); return seq_list_next(v, &cell->proc_volumes, _pos); @@ -514,7 +458,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) __releases(cell->proc_lock) { - struct afs_cell *cell = p->private; + struct afs_cell *cell = PDE_DATA(file_inode(p->file)); read_unlock(&cell->proc_lock); } @@ -530,7 +474,7 @@ static const char afs_vol_types[3][3] = { */ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { - struct afs_cell *cell = m->private; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); /* Display header on line 1 */ @@ -547,30 +491,6 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume - * location server - */ -static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) -{ - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_vlservers_ops); - if (ret<0) - return ret; - - m = file->private_data; - m->private = cell; - - return 0; -} - -/* * set up the iterator to start reading from the cells list and return the * first item */ @@ -578,7 +498,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { struct afs_addr_list *alist; - struct afs_cell *cell = m->private; + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); @@ -603,7 +523,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *_pos) { struct afs_addr_list *alist; - struct afs_cell *cell = p->private; + struct afs_cell *cell = PDE_DATA(file_inode(p->file)); loff_t pos; alist = rcu_dereference(cell->vl_addrs); @@ -644,15 +564,6 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/afs/servers" which provides a summary of active - * servers - */ -static int afs_proc_servers_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &afs_proc_servers_ops); -} - -/* * Set up the iterator to start reading from the server list and return the * first item. */ @@ -931,18 +842,3 @@ static int afs_proc_stats_show(struct seq_file *m, void *v) atomic_long_read(&net->n_store_bytes)); return 0; } - -/* - * Open "/proc/fs/afs/stats" to allow reading of the stat counters. - */ -static int afs_proc_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, afs_proc_stats_show, NULL); -} - -static const struct file_operations afs_proc_stats_fops = { - .open = afs_proc_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; diff --git a/fs/afs/security.c b/fs/afs/security.c index 1992b0ffa543..81dfedb7879f 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -372,18 +372,14 @@ int afs_permission(struct inode *inode, int mask) mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); if (S_ISDIR(inode->i_mode)) { - if (mask & MAY_EXEC) { + if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; - } else if (mask & MAY_READ) { - if (!(access & AFS_ACE_LOOKUP)) - goto permission_denied; - } else if (mask & MAY_WRITE) { + } + if (mask & MAY_WRITE) { if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ goto permission_denied; - } else { - BUG(); } } else { if (!(access & AFS_ACE_LOOKUP)) diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 1ed7e2fd2f35..c3b740813fc7 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uvldbentry__xdr *uvldb; struct afs_vldb_entry *entry; bool new_only = false; - u32 tmp, nr_servers; + u32 tmp, nr_servers, vlflags; int i, ret; _enter(""); @@ -55,6 +55,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) new_only = true; } + vlflags = ntohl(uvldb->flags); for (i = 0; i < nr_servers; i++) { struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; @@ -64,12 +65,13 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) if (tmp & AFS_VLSF_DONTUSE || (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) continue; - if (tmp & AFS_VLSF_RWVOL) + if (tmp & AFS_VLSF_RWVOL) { entry->fs_mask[i] |= AFS_VOL_VTM_RW; + if (vlflags & AFS_VLF_BACKEXISTS) + entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + } if (tmp & AFS_VLSF_ROVOL) entry->fs_mask[i] |= AFS_VOL_VTM_RO; - if (tmp & AFS_VLSF_BACKVOL) - entry->fs_mask[i] |= AFS_VOL_VTM_BAK; if (!entry->fs_mask[i]) continue; @@ -89,15 +91,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) for (i = 0; i < AFS_MAXTYPES; i++) entry->vid[i] = ntohl(uvldb->volumeId[i]); - tmp = ntohl(uvldb->flags); - if (tmp & AFS_VLF_RWEXISTS) + if (vlflags & AFS_VLF_RWEXISTS) __set_bit(AFS_VLDB_HAS_RW, &entry->flags); - if (tmp & AFS_VLF_ROEXISTS) + if (vlflags & AFS_VLF_ROEXISTS) __set_bit(AFS_VLDB_HAS_RO, &entry->flags); - if (tmp & AFS_VLF_BACKEXISTS) + if (vlflags & AFS_VLF_BACKEXISTS) __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); - if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { entry->error = -ENOMEDIUM; __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); } @@ -5,6 +5,7 @@ * Implements an efficient asynchronous io interface. * * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. + * Copyright 2018 Christoph Hellwig. * * See ../COPYING for licensing terms. */ @@ -46,6 +47,8 @@ #include "internal.h" +#define KIOCB_KEY 0 + #define AIO_RING_MAGIC 0xa10a10a1 #define AIO_RING_COMPAT_FEATURES 1 #define AIO_RING_INCOMPAT_FEATURES 0 @@ -156,21 +159,29 @@ struct kioctx { unsigned id; }; -/* - * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either - * cancelled or completed (this makes a certain amount of sense because - * successful cancellation - io_cancel() - does deliver the completion to - * userspace). - * - * And since most things don't implement kiocb cancellation and we'd really like - * kiocb completion to be lockless when possible, we use ki_cancel to - * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED - * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). - */ -#define KIOCB_CANCELLED ((void *) (~0ULL)) +struct fsync_iocb { + struct work_struct work; + struct file *file; + bool datasync; +}; + +struct poll_iocb { + struct file *file; + __poll_t events; + struct wait_queue_head *head; + + union { + struct wait_queue_entry wait; + struct work_struct work; + }; +}; struct aio_kiocb { - struct kiocb common; + union { + struct kiocb rw; + struct fsync_iocb fsync; + struct poll_iocb poll; + }; struct kioctx *ki_ctx; kiocb_cancel_fn *ki_cancel; @@ -264,9 +275,6 @@ static int __init aio_setup(void) kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); - - pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); - return 0; } __initcall(aio_setup); @@ -552,42 +560,20 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common); + struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); struct kioctx *ctx = req->ki_ctx; unsigned long flags; - spin_lock_irqsave(&ctx->ctx_lock, flags); - - if (!req->ki_list.next) - list_add(&req->ki_list, &ctx->active_reqs); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) + return; + spin_lock_irqsave(&ctx->ctx_lock, flags); + list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; - spin_unlock_irqrestore(&ctx->ctx_lock, flags); } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct aio_kiocb *kiocb) -{ - kiocb_cancel_fn *old, *cancel; - - /* - * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it - * actually has a cancel function, hence the cmpxchg() - */ - - cancel = READ_ONCE(kiocb->ki_cancel); - do { - if (!cancel || cancel == KIOCB_CANCELLED) - return -EINVAL; - - old = cancel; - cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); - } while (cancel != old); - - return cancel(&kiocb->common); -} - /* * free_ioctx() should be RCU delayed to synchronize against the RCU * protected lookup_ioctx() and also needs process context to call @@ -634,9 +620,8 @@ static void free_ioctx_users(struct percpu_ref *ref) while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, struct aio_kiocb, ki_list); - + req->ki_cancel(&req->rw); list_del_init(&req->ki_list); - kiocb_cancel(req); } spin_unlock_irq(&ctx->ctx_lock); @@ -1042,7 +1027,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) goto out_put; percpu_ref_get(&ctx->reqs); - + INIT_LIST_HEAD(&req->ki_list); req->ki_ctx = ctx; return req; out_put: @@ -1050,15 +1035,6 @@ out_put: return NULL; } -static void kiocb_free(struct aio_kiocb *req) -{ - if (req->common.ki_filp) - fput(req->common.ki_filp); - if (req->ki_eventfd != NULL) - eventfd_ctx_put(req->ki_eventfd); - kmem_cache_free(kiocb_cachep, req); -} - static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct aio_ring __user *ring = (void __user *)ctx_id; @@ -1089,44 +1065,14 @@ out: /* aio_complete * Called when the io request on the given iocb is complete. */ -static void aio_complete(struct kiocb *kiocb, long res, long res2) +static void aio_complete(struct aio_kiocb *iocb, long res, long res2) { - struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common); struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; unsigned tail, pos, head; unsigned long flags; - if (kiocb->ki_flags & IOCB_WRITE) { - struct file *file = kiocb->ki_filp; - - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ - if (S_ISREG(file_inode(file)->i_mode)) - __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); - file_end_write(file); - } - - /* - * Special case handling for sync iocbs: - * - events go directly into the iocb for fast handling - * - the sync task with the iocb in its stack holds the single iocb - * ref, no other paths have a way to get another ref - * - the sync task helpfully left a reference to itself in the iocb - */ - BUG_ON(is_sync_kiocb(kiocb)); - - if (iocb->ki_list.next) { - unsigned long flags; - - spin_lock_irqsave(&ctx->ctx_lock, flags); - list_del(&iocb->ki_list); - spin_unlock_irqrestore(&ctx->ctx_lock, flags); - } - /* * Add a completion event to the ring buffer. Must be done holding * ctx->completion_lock to prevent other code from messing with the tail @@ -1180,11 +1126,12 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2) * eventfd. The eventfd_signal() function is safe to be called * from IRQ context. */ - if (iocb->ki_eventfd != NULL) + if (iocb->ki_eventfd) { eventfd_signal(iocb->ki_eventfd, 1); + eventfd_ctx_put(iocb->ki_eventfd); + } - /* everything turned out well, dispose of the aiocb. */ - kiocb_free(iocb); + kmem_cache_free(kiocb_cachep, iocb); /* * We have to order our ring_info tail store above and test @@ -1250,14 +1197,13 @@ static long aio_read_events_ring(struct kioctx *ctx, if (head == tail) break; - avail = min(avail, nr - ret); - avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - - ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE)); - pos = head + AIO_EVENTS_OFFSET; page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; + avail = min(avail, nr - ret); + avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); + ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); @@ -1328,10 +1274,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, wait_event_interruptible_hrtimeout(ctx->wait, aio_read_events(ctx, min_nr, nr, event, &ret), until); - - if (!ret && signal_pending(current)) - ret = -EINTR; - return ret; } @@ -1447,6 +1389,58 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) return -EINVAL; } +static void aio_remove_iocb(struct aio_kiocb *iocb) +{ + struct kioctx *ctx = iocb->ki_ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->ctx_lock, flags); + list_del(&iocb->ki_list); + spin_unlock_irqrestore(&ctx->ctx_lock, flags); +} + +static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) +{ + struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw); + + if (!list_empty_careful(&iocb->ki_list)) + aio_remove_iocb(iocb); + + if (kiocb->ki_flags & IOCB_WRITE) { + struct inode *inode = file_inode(kiocb->ki_filp); + + /* + * Tell lockdep we inherited freeze protection from submission + * thread. + */ + if (S_ISREG(inode->i_mode)) + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + file_end_write(kiocb->ki_filp); + } + + fput(kiocb->ki_filp); + aio_complete(iocb, res, res2); +} + +static int aio_prep_rw(struct kiocb *req, struct iocb *iocb) +{ + int ret; + + req->ki_filp = fget(iocb->aio_fildes); + if (unlikely(!req->ki_filp)) + return -EBADF; + req->ki_complete = aio_complete_rw; + req->ki_pos = iocb->aio_offset; + req->ki_flags = iocb_flags(req->ki_filp); + if (iocb->aio_flags & IOCB_FLAG_RESFD) + req->ki_flags |= IOCB_EVENTFD; + req->ki_hint = file_write_hint(req->ki_filp); + ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); + if (unlikely(ret)) + fput(req->ki_filp); + return ret; +} + static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, bool vectored, bool compat, struct iov_iter *iter) { @@ -1466,11 +1460,11 @@ static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter); } -static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret) +static inline void aio_rw_done(struct kiocb *req, ssize_t ret) { switch (ret) { case -EIOCBQUEUED: - return ret; + break; case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: @@ -1482,85 +1476,270 @@ static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret) ret = -EINTR; /*FALLTHRU*/ default: - aio_complete(req, ret, 0); - return 0; + aio_complete_rw(req, ret, 0); } } static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, bool compat) { - struct file *file = req->ki_filp; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; + struct file *file; ssize_t ret; + ret = aio_prep_rw(req, iocb); + if (ret) + return ret; + file = req->ki_filp; + + ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_READ))) - return -EBADF; + goto out_fput; + ret = -EINVAL; if (unlikely(!file->f_op->read_iter)) - return -EINVAL; + goto out_fput; ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); if (ret) - return ret; + goto out_fput; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) - ret = aio_ret(req, call_read_iter(file, req, &iter)); + aio_rw_done(req, call_read_iter(file, req, &iter)); kfree(iovec); +out_fput: + if (unlikely(ret)) + fput(file); return ret; } static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, bool compat) { - struct file *file = req->ki_filp; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; + struct file *file; ssize_t ret; + ret = aio_prep_rw(req, iocb); + if (ret) + return ret; + file = req->ki_filp; + + ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_WRITE))) - return -EBADF; + goto out_fput; + ret = -EINVAL; if (unlikely(!file->f_op->write_iter)) - return -EINVAL; + goto out_fput; ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); if (ret) - return ret; + goto out_fput; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { - req->ki_flags |= IOCB_WRITE; - file_start_write(file); - ret = aio_ret(req, call_write_iter(file, req, &iter)); /* - * We release freeze protection in aio_complete(). Fool lockdep - * by telling it the lock got released so that it doesn't - * complain about held lock when we return to userspace. + * Open-code file_start_write here to grab freeze protection, + * which will be released by another thread in + * aio_complete_rw(). Fool lockdep by telling it the lock got + * released so that it doesn't complain about the held lock when + * we return to userspace. */ - if (S_ISREG(file_inode(file)->i_mode)) + if (S_ISREG(file_inode(file)->i_mode)) { + __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); + } + req->ki_flags |= IOCB_WRITE; + aio_rw_done(req, call_write_iter(file, req, &iter)); } kfree(iovec); +out_fput: + if (unlikely(ret)) + fput(file); return ret; } +static void aio_fsync_work(struct work_struct *work) +{ + struct fsync_iocb *req = container_of(work, struct fsync_iocb, work); + int ret; + + ret = vfs_fsync(req->file, req->datasync); + fput(req->file); + aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); +} + +static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) +{ + if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || + iocb->aio_rw_flags)) + return -EINVAL; + req->file = fget(iocb->aio_fildes); + if (unlikely(!req->file)) + return -EBADF; + if (unlikely(!req->file->f_op->fsync)) { + fput(req->file); + return -EINVAL; + } + + req->datasync = datasync; + INIT_WORK(&req->work, aio_fsync_work); + schedule_work(&req->work); + return 0; +} + +/* need to use list_del_init so we can check if item was present */ +static inline bool __aio_poll_remove(struct poll_iocb *req) +{ + if (list_empty(&req->wait.entry)) + return false; + list_del_init(&req->wait.entry); + return true; +} + +static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) +{ + fput(iocb->poll.file); + aio_complete(iocb, mangle_poll(mask), 0); +} + +static void aio_poll_work(struct work_struct *work) +{ + struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work); + + if (!list_empty_careful(&iocb->ki_list)) + aio_remove_iocb(iocb); + __aio_poll_complete(iocb, iocb->poll.events); +} + +static int aio_poll_cancel(struct kiocb *iocb) +{ + struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); + struct poll_iocb *req = &aiocb->poll; + struct wait_queue_head *head = req->head; + bool found = false; + + spin_lock(&head->lock); + found = __aio_poll_remove(req); + spin_unlock(&head->lock); + + if (found) { + req->events = 0; + INIT_WORK(&req->work, aio_poll_work); + schedule_work(&req->work); + } + return 0; +} + +static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, + void *key) +{ + struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + struct file *file = req->file; + __poll_t mask = key_to_poll(key); + + assert_spin_locked(&req->head->lock); + + /* for instances that support it check for an event match first: */ + if (mask && !(mask & req->events)) + return 0; + + mask = file->f_op->poll_mask(file, req->events); + if (!mask) + return 0; + + __aio_poll_remove(req); + + /* + * Try completing without a context switch if we can acquire ctx_lock + * without spinning. Otherwise we need to defer to a workqueue to + * avoid a deadlock due to the lock order. + */ + if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { + list_del_init(&iocb->ki_list); + spin_unlock(&iocb->ki_ctx->ctx_lock); + + __aio_poll_complete(iocb, mask); + } else { + req->events = mask; + INIT_WORK(&req->work, aio_poll_work); + schedule_work(&req->work); + } + + return 1; +} + +static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) +{ + struct kioctx *ctx = aiocb->ki_ctx; + struct poll_iocb *req = &aiocb->poll; + __poll_t mask; + + /* reject any unknown events outside the normal event mask. */ + if ((u16)iocb->aio_buf != iocb->aio_buf) + return -EINVAL; + /* reject fields that are not defined for poll */ + if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) + return -EINVAL; + + req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; + req->file = fget(iocb->aio_fildes); + if (unlikely(!req->file)) + return -EBADF; + if (!file_has_poll_mask(req->file)) + goto out_fail; + + req->head = req->file->f_op->get_poll_head(req->file, req->events); + if (!req->head) + goto out_fail; + if (IS_ERR(req->head)) { + mask = EPOLLERR; + goto done; + } + + init_waitqueue_func_entry(&req->wait, aio_poll_wake); + aiocb->ki_cancel = aio_poll_cancel; + + spin_lock_irq(&ctx->ctx_lock); + spin_lock(&req->head->lock); + mask = req->file->f_op->poll_mask(req->file, req->events); + if (!mask) { + __add_wait_queue(req->head, &req->wait); + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + } + spin_unlock(&req->head->lock); + spin_unlock_irq(&ctx->ctx_lock); +done: + if (mask) + __aio_poll_complete(aiocb, mask); + return 0; +out_fail: + fput(req->file); + return -EINVAL; /* same as no support for IOCB_CMD_POLL */ +} + static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb, bool compat) + bool compat) { struct aio_kiocb *req; - struct file *file; + struct iocb iocb; ssize_t ret; + if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb)))) + return -EFAULT; + /* enforce forwards compatibility on users */ - if (unlikely(iocb->aio_reserved2)) { + if (unlikely(iocb.aio_reserved2)) { pr_debug("EINVAL: reserve field set\n"); return -EINVAL; } /* prevent overflows */ if (unlikely( - (iocb->aio_buf != (unsigned long)iocb->aio_buf) || - (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || - ((ssize_t)iocb->aio_nbytes < 0) + (iocb.aio_buf != (unsigned long)iocb.aio_buf) || + (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || + ((ssize_t)iocb.aio_nbytes < 0) )) { pr_debug("EINVAL: overflow check\n"); return -EINVAL; @@ -1570,37 +1749,19 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->common.ki_filp = file = fget(iocb->aio_fildes); - if (unlikely(!req->common.ki_filp)) { - ret = -EBADF; - goto out_put_req; - } - req->common.ki_pos = iocb->aio_offset; - req->common.ki_complete = aio_complete; - req->common.ki_flags = iocb_flags(req->common.ki_filp); - req->common.ki_hint = file_write_hint(file); - - if (iocb->aio_flags & IOCB_FLAG_RESFD) { + if (iocb.aio_flags & IOCB_FLAG_RESFD) { /* * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an * instance of the file* now. The file descriptor must be * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); + req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd); if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); req->ki_eventfd = NULL; goto out_put_req; } - - req->common.ki_flags |= IOCB_EVENTFD; - } - - ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags); - if (unlikely(ret)) { - pr_debug("EINVAL: aio_rw_flags\n"); - goto out_put_req; } ret = put_user(KIOCB_KEY, &user_iocb->aio_key); @@ -1610,41 +1771,67 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } req->ki_user_iocb = user_iocb; - req->ki_user_data = iocb->aio_data; + req->ki_user_data = iocb.aio_data; - get_file(file); - switch (iocb->aio_lio_opcode) { + switch (iocb.aio_lio_opcode) { case IOCB_CMD_PREAD: - ret = aio_read(&req->common, iocb, false, compat); + ret = aio_read(&req->rw, &iocb, false, compat); break; case IOCB_CMD_PWRITE: - ret = aio_write(&req->common, iocb, false, compat); + ret = aio_write(&req->rw, &iocb, false, compat); break; case IOCB_CMD_PREADV: - ret = aio_read(&req->common, iocb, true, compat); + ret = aio_read(&req->rw, &iocb, true, compat); break; case IOCB_CMD_PWRITEV: - ret = aio_write(&req->common, iocb, true, compat); + ret = aio_write(&req->rw, &iocb, true, compat); + break; + case IOCB_CMD_FSYNC: + ret = aio_fsync(&req->fsync, &iocb, false); + break; + case IOCB_CMD_FDSYNC: + ret = aio_fsync(&req->fsync, &iocb, true); + break; + case IOCB_CMD_POLL: + ret = aio_poll(req, &iocb); break; default: - pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); + pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); ret = -EINVAL; break; } - fput(file); - if (ret && ret != -EIOCBQUEUED) + /* + * If ret is 0, we'd either done aio_complete() ourselves or have + * arranged for that to be done asynchronously. Anything non-zero + * means that we need to destroy req ourselves. + */ + if (ret) goto out_put_req; return 0; out_put_req: put_reqs_available(ctx, 1); percpu_ref_put(&ctx->reqs); - kiocb_free(req); + if (req->ki_eventfd) + eventfd_ctx_put(req->ki_eventfd); + kmem_cache_free(kiocb_cachep, req); return ret; } -static long do_io_submit(aio_context_t ctx_id, long nr, - struct iocb __user *__user *iocbpp, bool compat) +/* sys_io_submit: + * Queue the nr iocbs pointed to by iocbpp for processing. Returns + * the number of iocbs queued. May return -EINVAL if the aio_context + * specified by ctx_id is invalid, if nr is < 0, if the iocb at + * *iocbpp[0] is not properly initialized, if the operation specified + * is invalid for the file descriptor in the iocb. May fail with + * -EFAULT if any of the data structures point to invalid data. May + * fail with -EBADF if the file descriptor specified in the first + * iocb is invalid. May fail with -EAGAIN if insufficient resources + * are available to queue any iocbs. Will return 0 if nr is 0. Will + * fail with -ENOSYS if not implemented. + */ +SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, + struct iocb __user * __user *, iocbpp) { struct kioctx *ctx; long ret = 0; @@ -1654,39 +1841,25 @@ static long do_io_submit(aio_context_t ctx_id, long nr, if (unlikely(nr < 0)) return -EINVAL; - if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) - nr = LONG_MAX/sizeof(*iocbpp); - - if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) - return -EFAULT; - ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) { pr_debug("EINVAL: invalid context id\n"); return -EINVAL; } - blk_start_plug(&plug); + if (nr > ctx->nr_events) + nr = ctx->nr_events; - /* - * AKPM: should this return a partial result if some of the IOs were - * successfully submitted? - */ - for (i=0; i<nr; i++) { + blk_start_plug(&plug); + for (i = 0; i < nr; i++) { struct iocb __user *user_iocb; - struct iocb tmp; - if (unlikely(__get_user(user_iocb, iocbpp + i))) { + if (unlikely(get_user(user_iocb, iocbpp + i))) { ret = -EFAULT; break; } - if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) { - ret = -EFAULT; - break; - } - - ret = io_submit_one(ctx, user_iocb, &tmp, compat); + ret = io_submit_one(ctx, user_iocb, false); if (ret) break; } @@ -1696,59 +1869,44 @@ static long do_io_submit(aio_context_t ctx_id, long nr, return i ? i : ret; } -/* sys_io_submit: - * Queue the nr iocbs pointed to by iocbpp for processing. Returns - * the number of iocbs queued. May return -EINVAL if the aio_context - * specified by ctx_id is invalid, if nr is < 0, if the iocb at - * *iocbpp[0] is not properly initialized, if the operation specified - * is invalid for the file descriptor in the iocb. May fail with - * -EFAULT if any of the data structures point to invalid data. May - * fail with -EBADF if the file descriptor specified in the first - * iocb is invalid. May fail with -EAGAIN if insufficient resources - * are available to queue any iocbs. Will return 0 if nr is 0. Will - * fail with -ENOSYS if not implemented. - */ -SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, - struct iocb __user * __user *, iocbpp) -{ - return do_io_submit(ctx_id, nr, iocbpp, 0); -} - #ifdef CONFIG_COMPAT -static inline long -copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) +COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, + int, nr, compat_uptr_t __user *, iocbpp) { - compat_uptr_t uptr; - int i; + struct kioctx *ctx; + long ret = 0; + int i = 0; + struct blk_plug plug; - for (i = 0; i < nr; ++i) { - if (get_user(uptr, ptr32 + i)) - return -EFAULT; - if (put_user(compat_ptr(uptr), ptr64 + i)) - return -EFAULT; + if (unlikely(nr < 0)) + return -EINVAL; + + ctx = lookup_ioctx(ctx_id); + if (unlikely(!ctx)) { + pr_debug("EINVAL: invalid context id\n"); + return -EINVAL; } - return 0; -} -#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) + if (nr > ctx->nr_events) + nr = ctx->nr_events; -COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, - int, nr, u32 __user *, iocb) -{ - struct iocb __user * __user *iocb64; - long ret; + blk_start_plug(&plug); + for (i = 0; i < nr; i++) { + compat_uptr_t user_iocb; - if (unlikely(nr < 0)) - return -EINVAL; + if (unlikely(get_user(user_iocb, iocbpp + i))) { + ret = -EFAULT; + break; + } - if (nr > MAX_AIO_SUBMITS) - nr = MAX_AIO_SUBMITS; + ret = io_submit_one(ctx, compat_ptr(user_iocb), true); + if (ret) + break; + } + blk_finish_plug(&plug); - iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); - ret = copy_iocb(nr, iocb, iocb64); - if (!ret) - ret = do_io_submit(ctx_id, nr, iocb64, 1); - return ret; + percpu_ref_put(&ctx->users); + return i ? i : ret; } #endif @@ -1756,15 +1914,12 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, * Finds a given iocb for cancellation. */ static struct aio_kiocb * -lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key) +lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb) { struct aio_kiocb *kiocb; assert_spin_locked(&ctx->ctx_lock); - if (key != KIOCB_KEY) - return NULL; - /* TODO: use a hash or array, this sucks. */ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { if (kiocb->ki_user_iocb == iocb) @@ -1788,25 +1943,24 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, { struct kioctx *ctx; struct aio_kiocb *kiocb; + int ret = -EINVAL; u32 key; - int ret; - ret = get_user(key, &iocb->aio_key); - if (unlikely(ret)) + if (unlikely(get_user(key, &iocb->aio_key))) return -EFAULT; + if (unlikely(key != KIOCB_KEY)) + return -EINVAL; ctx = lookup_ioctx(ctx_id); if (unlikely(!ctx)) return -EINVAL; spin_lock_irq(&ctx->ctx_lock); - - kiocb = lookup_kiocb(ctx, iocb, key); - if (kiocb) - ret = kiocb_cancel(kiocb); - else - ret = -EINVAL; - + kiocb = lookup_kiocb(ctx, iocb); + if (kiocb) { + ret = kiocb->ki_cancel(&kiocb->rw); + list_del_init(&kiocb->ki_list); + } spin_unlock_irq(&ctx->ctx_lock); if (!ret) { @@ -1861,13 +2015,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, struct timespec __user *, timeout) { struct timespec64 ts; + int ret; + + if (timeout && unlikely(get_timespec64(&ts, timeout))) + return -EFAULT; + + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); + if (!ret && signal_pending(current)) + ret = -EINTR; + return ret; +} + +SYSCALL_DEFINE6(io_pgetevents, + aio_context_t, ctx_id, + long, min_nr, + long, nr, + struct io_event __user *, events, + struct timespec __user *, timeout, + const struct __aio_sigset __user *, usig) +{ + struct __aio_sigset ksig = { NULL, }; + sigset_t ksigmask, sigsaved; + struct timespec64 ts; + int ret; + + if (timeout && unlikely(get_timespec64(&ts, timeout))) + return -EFAULT; + + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) + return -EFAULT; - if (timeout) { - if (unlikely(get_timespec64(&ts, timeout))) + if (ksig.sigmask) { + if (ksig.sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask))) return -EFAULT; + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); + if (signal_pending(current)) { + if (ksig.sigmask) { + current->saved_sigmask = sigsaved; + set_restore_sigmask(); + } + + if (!ret) + ret = -ERESTARTNOHAND; + } else { + if (ksig.sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); } - return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); + return ret; } #ifdef CONFIG_COMPAT @@ -1878,13 +2079,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, struct compat_timespec __user *, timeout) { struct timespec64 t; + int ret; + + if (timeout && compat_get_timespec64(&t, timeout)) + return -EFAULT; + + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + if (!ret && signal_pending(current)) + ret = -EINTR; + return ret; +} + + +struct __compat_aio_sigset { + compat_sigset_t __user *sigmask; + compat_size_t sigsetsize; +}; + +COMPAT_SYSCALL_DEFINE6(io_pgetevents, + compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct compat_timespec __user *, timeout, + const struct __compat_aio_sigset __user *, usig) +{ + struct __compat_aio_sigset ksig = { NULL, }; + sigset_t ksigmask, sigsaved; + struct timespec64 t; + int ret; + + if (timeout && compat_get_timespec64(&t, timeout)) + return -EFAULT; + + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) + return -EFAULT; - if (timeout) { - if (compat_get_timespec64(&t, timeout)) + if (ksig.sigmask) { + if (ksig.sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (get_compat_sigset(&ksigmask, ksig.sigmask)) return -EFAULT; + sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + if (signal_pending(current)) { + if (ksig.sigmask) { + current->saved_sigmask = sigsaved; + set_restore_sigmask(); + } + if (!ret) + ret = -ERESTARTNOHAND; + } else { + if (ksig.sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); } - return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + return ret; } #endif diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index ee832ca5f734..f32f21c3bbc7 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -21,10 +21,9 @@ #define dprintf(x...) #endif -static int bfs_add_entry(struct inode *dir, const unsigned char *name, - int namelen, int ino); +static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino); static struct buffer_head *bfs_find_entry(struct inode *dir, - const unsigned char *name, int namelen, + const struct qstr *child, struct bfs_dirent **res_dir); static int bfs_readdir(struct file *f, struct dir_context *ctx) @@ -111,8 +110,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, mark_inode_dirty(inode); bfs_dump_imap("create", s); - err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, - inode->i_ino); + err = bfs_add_entry(dir, &dentry->d_name, inode->i_ino); if (err) { inode_dec_link_count(inode); mutex_unlock(&info->bfs_lock); @@ -136,19 +134,14 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); mutex_lock(&info->bfs_lock); - bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); + bh = bfs_find_entry(dir, &dentry->d_name, &de); if (bh) { unsigned long ino = (unsigned long)le16_to_cpu(de->ino); brelse(bh); inode = bfs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) { - mutex_unlock(&info->bfs_lock); - return ERR_CAST(inode); - } } mutex_unlock(&info->bfs_lock); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static int bfs_link(struct dentry *old, struct inode *dir, @@ -159,8 +152,7 @@ static int bfs_link(struct dentry *old, struct inode *dir, int err; mutex_lock(&info->bfs_lock); - err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, - inode->i_ino); + err = bfs_add_entry(dir, &new->d_name, inode->i_ino); if (err) { mutex_unlock(&info->bfs_lock); return err; @@ -183,7 +175,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) struct bfs_sb_info *info = BFS_SB(inode->i_sb); mutex_lock(&info->bfs_lock); - bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); + bh = bfs_find_entry(dir, &dentry->d_name, &de); if (!bh || (le16_to_cpu(de->ino) != inode->i_ino)) goto out_brelse; @@ -228,27 +220,21 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, info = BFS_SB(old_inode->i_sb); mutex_lock(&info->bfs_lock); - old_bh = bfs_find_entry(old_dir, - old_dentry->d_name.name, - old_dentry->d_name.len, &old_de); + old_bh = bfs_find_entry(old_dir, &old_dentry->d_name, &old_de); if (!old_bh || (le16_to_cpu(old_de->ino) != old_inode->i_ino)) goto end_rename; error = -EPERM; new_inode = d_inode(new_dentry); - new_bh = bfs_find_entry(new_dir, - new_dentry->d_name.name, - new_dentry->d_name.len, &new_de); + new_bh = bfs_find_entry(new_dir, &new_dentry->d_name, &new_de); if (new_bh && !new_inode) { brelse(new_bh); new_bh = NULL; } if (!new_bh) { - error = bfs_add_entry(new_dir, - new_dentry->d_name.name, - new_dentry->d_name.len, + error = bfs_add_entry(new_dir, &new_dentry->d_name, old_inode->i_ino); if (error) goto end_rename; @@ -278,9 +264,10 @@ const struct inode_operations bfs_dir_inops = { .rename = bfs_rename, }; -static int bfs_add_entry(struct inode *dir, const unsigned char *name, - int namelen, int ino) +static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino) { + const unsigned char *name = child->name; + int namelen = child->len; struct buffer_head *bh; struct bfs_dirent *de; int block, sblock, eblock, off, pos; @@ -332,12 +319,14 @@ static inline int bfs_namecmp(int len, const unsigned char *name, } static struct buffer_head *bfs_find_entry(struct inode *dir, - const unsigned char *name, int namelen, + const struct qstr *child, struct bfs_dirent **res_dir) { unsigned long block = 0, offset = 0; struct buffer_head *bh = NULL; struct bfs_dirent *de; + const unsigned char *name = child->name; + int namelen = child->len; *res_dir = NULL; if (namelen > BFS_NAMELEN) diff --git a/fs/block_dev.c b/fs/block_dev.c index 7ec920e27065..bef6934b6189 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -272,7 +272,7 @@ struct blkdev_dio { struct bio bio; }; -static struct bio_set *blkdev_dio_pool __read_mostly; +static struct bio_set blkdev_dio_pool; static void blkdev_bio_end_io(struct bio *bio) { @@ -334,7 +334,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool); + bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); bio_get(bio); /* extra ref for the completion handler */ dio = container_of(bio, struct blkdev_dio, bio); @@ -432,10 +432,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static __init int blkdev_init(void) { - blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS); - if (!blkdev_dio_pool) - return -ENOMEM; - return 0; + return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS); } module_init(blkdev_init); @@ -1322,27 +1319,30 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) * check_disk_size_change - checks for disk size change and adjusts bdev size. * @disk: struct gendisk to check * @bdev: struct bdev to adjust. + * @verbose: if %true log a message about a size change if there is any * * This routine checks to see if the bdev size does not match the disk size * and adjusts it if it differs. When shrinking the bdev size, its all caches * are freed. */ -void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) +void check_disk_size_change(struct gendisk *disk, struct block_device *bdev, + bool verbose) { loff_t disk_size, bdev_size; disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { - printk(KERN_INFO - "%s: detected capacity change from %lld to %lld\n", - disk->disk_name, bdev_size, disk_size); + if (verbose) { + printk(KERN_INFO + "%s: detected capacity change from %lld to %lld\n", + disk->disk_name, bdev_size, disk_size); + } i_size_write(bdev->bd_inode, disk_size); if (bdev_size > disk_size) flush_disk(bdev, false); } } -EXPORT_SYMBOL(check_disk_size_change); /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back @@ -1364,7 +1364,7 @@ int revalidate_disk(struct gendisk *disk) return ret; mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev); + check_disk_size_change(disk, bdev, ret == 0); bdev->bd_invalidated = 0; mutex_unlock(&bdev->bd_mutex); bdput(bdev); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index af2f0408c6e4..51fc015c7d2c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -26,7 +26,7 @@ static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; -static struct bio_set *btrfs_bioset; +static struct bio_set btrfs_bioset; static inline bool extent_state_in_tree(const struct extent_state *state) { @@ -162,20 +162,18 @@ int __init extent_io_init(void) if (!extent_buffer_cache) goto free_state_cache; - btrfs_bioset = bioset_create(BIO_POOL_SIZE, - offsetof(struct btrfs_io_bio, bio), - BIOSET_NEED_BVECS); - if (!btrfs_bioset) + if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, + offsetof(struct btrfs_io_bio, bio), + BIOSET_NEED_BVECS)) goto free_buffer_cache; - if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE)) + if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE)) goto free_bioset; return 0; free_bioset: - bioset_free(btrfs_bioset); - btrfs_bioset = NULL; + bioset_exit(&btrfs_bioset); free_buffer_cache: kmem_cache_destroy(extent_buffer_cache); @@ -198,8 +196,7 @@ void __cold extent_io_exit(void) rcu_barrier(); kmem_cache_destroy(extent_state_cache); kmem_cache_destroy(extent_buffer_cache); - if (btrfs_bioset) - bioset_free(btrfs_bioset); + bioset_exit(&btrfs_bioset); } void extent_io_tree_init(struct extent_io_tree *tree, @@ -2679,7 +2676,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); @@ -2692,7 +2689,7 @@ struct bio *btrfs_bio_clone(struct bio *bio) struct bio *new; /* Bio allocation backed by a bioset does not fail */ - new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset); + new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); btrfs_bio = btrfs_io_bio(new); btrfs_io_bio_init(btrfs_bio); btrfs_bio->iter = bio->bi_iter; @@ -2704,7 +2701,7 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) struct bio *bio; /* Bio allocation backed by a bioset does not fail */ - bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; } @@ -2715,7 +2712,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) struct btrfs_io_bio *btrfs_bio; /* this will never fail when it's backed by a bioset */ - bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset); + bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset); ASSERT(bio); btrfs_bio = btrfs_io_bio(bio); diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c index 125b90f6c796..0ce1aa56b67f 100644 --- a/fs/cachefiles/proc.c +++ b/fs/cachefiles/proc.c @@ -85,21 +85,6 @@ static const struct seq_operations cachefiles_histogram_ops = { }; /* - * open "/proc/fs/cachefiles/XXX" which provide statistics summaries - */ -static int cachefiles_histogram_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &cachefiles_histogram_ops); -} - -static const struct file_operations cachefiles_histogram_fops = { - .open = cachefiles_histogram_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -/* * initialise the /proc/fs/cachefiles/ directory */ int __init cachefiles_proc_init(void) @@ -109,8 +94,8 @@ int __init cachefiles_proc_init(void) if (!proc_mkdir("fs/cachefiles", NULL)) goto error_dir; - if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, - &cachefiles_histogram_fops)) + if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL, + &cachefiles_histogram_ops)) goto error_histogram; _leave(" = 0"); diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index d61e2de8d0eb..5f132d59dfc2 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -197,7 +197,7 @@ config CIFS_SMB311 config CIFS_SMB_DIRECT bool "SMB Direct support (Experimental)" - depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND_ADDR_TRANS=y + depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y help Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 9d69ea433330..4bc4a7ac61d9 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -314,18 +314,6 @@ skip_rdma: return 0; } -static int cifs_debug_data_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cifs_debug_data_proc_show, NULL); -} - -static const struct file_operations cifs_debug_data_proc_fops = { - .open = cifs_debug_data_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #ifdef CONFIG_CIFS_STATS static ssize_t cifs_stats_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) @@ -497,7 +485,8 @@ cifs_proc_init(void) if (proc_fs_cifs == NULL) return; - proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops); + proc_create_single("DebugData", 0, proc_fs_cifs, + cifs_debug_data_proc_show); #ifdef CONFIG_CIFS_STATS proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 925844343038..9eb03e8b1ada 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -780,21 +780,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { free_xid(xid); - return (struct dentry *)tlink; + return ERR_CAST(tlink); } pTcon = tlink_tcon(tlink); rc = check_name(direntry, pTcon); - if (rc) - goto lookup_out; + if (unlikely(rc)) { + cifs_put_tlink(tlink); + free_xid(xid); + return ERR_PTR(rc); + } /* can not grab the rename sem here since it would deadlock in the cases (beginning of sys_rename itself) in which we already have the sb rename sem */ full_path = build_path_from_dentry(direntry); if (full_path == NULL) { - rc = -ENOMEM; - goto lookup_out; + cifs_put_tlink(tlink); + free_xid(xid); + return ERR_PTR(-ENOMEM); } if (d_really_is_positive(direntry)) { @@ -813,29 +817,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, parent_dir_inode->i_sb, xid, NULL); } - if ((rc == 0) && (newInode != NULL)) { - d_add(direntry, newInode); + if (rc == 0) { /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); - } else if (rc == -ENOENT) { - rc = 0; cifs_set_time(direntry, jiffies); - d_add(direntry, NULL); - /* if it was once a directory (but how can we tell?) we could do - shrink_dcache_parent(direntry); */ - } else if (rc != -EACCES) { - cifs_dbg(FYI, "Unexpected lookup error %d\n", rc); - /* We special case check for Access Denied - since that - is a common return code */ + newInode = NULL; + } else { + if (rc != -EACCES) { + cifs_dbg(FYI, "Unexpected lookup error %d\n", rc); + /* We special case check for Access Denied - since that + is a common return code */ + } + newInode = ERR_PTR(rc); } - -lookup_out: kfree(full_path); cifs_put_tlink(tlink); free_xid(xid); - return ERR_PTR(rc); + return d_splice_alias(newInode, direntry); } static int diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 124b093d14e5..c4fb9ad7c808 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -808,10 +808,7 @@ static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, un } out: mutex_unlock(&read_mutex); - if (IS_ERR(inode)) - return ERR_CAST(inode); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static int cramfs_readpage(struct file *file, struct page *page) @@ -677,7 +677,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, * downgrading page table protection not changing it to point * to a new page. * - * See Documentation/vm/mmu_notifier.txt + * See Documentation/vm/mmu_notifier.rst */ if (pmdp) { #ifdef CONFIG_FS_DAX_PMD diff --git a/fs/dcache.c b/fs/dcache.c index 2acfc69878f5..0e8e5de3c48a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -580,6 +580,7 @@ static void __dentry_kill(struct dentry *dentry) spin_unlock(&dentry->d_lock); if (likely(can_free)) dentry_free(dentry); + cond_resched(); } static struct dentry *__lock_parent(struct dentry *dentry) @@ -827,30 +828,24 @@ static inline bool fast_dput(struct dentry *dentry) */ void dput(struct dentry *dentry) { - if (unlikely(!dentry)) - return; + while (dentry) { + might_sleep(); -repeat: - might_sleep(); + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); + return; + } - rcu_read_lock(); - if (likely(fast_dput(dentry))) { + /* Slow case: now with the dentry lock held */ rcu_read_unlock(); - return; - } - - /* Slow case: now with the dentry lock held */ - rcu_read_unlock(); - if (likely(retain_dentry(dentry))) { - spin_unlock(&dentry->d_lock); - return; - } + if (likely(retain_dentry(dentry))) { + spin_unlock(&dentry->d_lock); + return; + } - dentry = dentry_kill(dentry); - if (dentry) { - cond_resched(); - goto repeat; + dentry = dentry_kill(dentry); } } EXPORT_SYMBOL(dput); @@ -907,6 +902,35 @@ repeat: } EXPORT_SYMBOL(dget_parent); +static struct dentry * __d_find_any_alias(struct inode *inode) +{ + struct dentry *alias; + + if (hlist_empty(&inode->i_dentry)) + return NULL; + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); + __dget(alias); + return alias; +} + +/** + * d_find_any_alias - find any alias for a given inode + * @inode: inode to find an alias for + * + * If any aliases exist for the given inode, take and return a + * reference for one of them. If no aliases exist, return %NULL. + */ +struct dentry *d_find_any_alias(struct inode *inode) +{ + struct dentry *de; + + spin_lock(&inode->i_lock); + de = __d_find_any_alias(inode); + spin_unlock(&inode->i_lock); + return de; +} +EXPORT_SYMBOL(d_find_any_alias); + /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question @@ -923,34 +947,19 @@ EXPORT_SYMBOL(dget_parent); */ static struct dentry *__d_find_alias(struct inode *inode) { - struct dentry *alias, *discon_alias; + struct dentry *alias; + + if (S_ISDIR(inode->i_mode)) + return __d_find_any_alias(inode); -again: - discon_alias = NULL; hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); - if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { - if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) { - discon_alias = alias; - } else { - __dget_dlock(alias); - spin_unlock(&alias->d_lock); - return alias; - } - } - spin_unlock(&alias->d_lock); - } - if (discon_alias) { - alias = discon_alias; - spin_lock(&alias->d_lock); - if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (!d_unhashed(alias)) { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } spin_unlock(&alias->d_lock); - goto again; } return NULL; } @@ -1052,8 +1061,6 @@ static void shrink_dentry_list(struct list_head *list) while (!list_empty(list)) { struct dentry *dentry, *parent; - cond_resched(); - dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); rcu_read_lock(); @@ -1230,13 +1237,11 @@ enum d_walk_ret { * @parent: start of walk * @data: data passed to @enter() and @finish() * @enter: callback when first entering the dentry - * @finish: callback when successfully finished the walk * - * The @enter() and @finish() callbacks are called with d_lock held. + * The @enter() callbacks are called with d_lock held. */ static void d_walk(struct dentry *parent, void *data, - enum d_walk_ret (*enter)(void *, struct dentry *), - void (*finish)(void *)) + enum d_walk_ret (*enter)(void *, struct dentry *)) { struct dentry *this_parent; struct list_head *next; @@ -1325,8 +1330,6 @@ ascend: if (need_seqretry(&rename_lock, seq)) goto rename_retry; rcu_read_unlock(); - if (finish) - finish(data); out_unlock: spin_unlock(&this_parent->d_lock); @@ -1375,7 +1378,7 @@ int path_has_submounts(const struct path *parent) struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; read_seqlock_excl(&mount_lock); - d_walk(parent->dentry, &data, path_check_mount, NULL); + d_walk(parent->dentry, &data, path_check_mount); read_sequnlock_excl(&mount_lock); return data.mounted; @@ -1483,11 +1486,16 @@ void shrink_dcache_parent(struct dentry *parent) data.start = parent; data.found = 0; - d_walk(parent, &data, select_collect, NULL); + d_walk(parent, &data, select_collect); + + if (!list_empty(&data.dispose)) { + shrink_dentry_list(&data.dispose); + continue; + } + + cond_resched(); if (!data.found) break; - - shrink_dentry_list(&data.dispose); } } EXPORT_SYMBOL(shrink_dcache_parent); @@ -1518,7 +1526,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) static void do_one_tree(struct dentry *dentry) { shrink_dcache_parent(dentry); - d_walk(dentry, dentry, umount_check, NULL); + d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); } @@ -1542,78 +1550,48 @@ void shrink_dcache_for_umount(struct super_block *sb) } } -struct detach_data { - struct select_data select; - struct dentry *mountpoint; -}; -static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry) +static enum d_walk_ret find_submount(void *_data, struct dentry *dentry) { - struct detach_data *data = _data; - + struct dentry **victim = _data; if (d_mountpoint(dentry)) { __dget_dlock(dentry); - data->mountpoint = dentry; + *victim = dentry; return D_WALK_QUIT; } - - return select_collect(&data->select, dentry); -} - -static void check_and_drop(void *_data) -{ - struct detach_data *data = _data; - - if (!data->mountpoint && list_empty(&data->select.dispose)) - __d_drop(data->select.start); + return D_WALK_CONTINUE; } /** * d_invalidate - detach submounts, prune dcache, and drop * @dentry: dentry to invalidate (aka detach, prune and drop) - * - * no dcache lock. - * - * The final d_drop is done as an atomic operation relative to - * rename_lock ensuring there are no races with d_set_mounted. This - * ensures there are no unhashed dentries on the path to a mountpoint. */ void d_invalidate(struct dentry *dentry) { - /* - * If it's already been dropped, return OK. - */ + bool had_submounts = false; spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); return; } + __d_drop(dentry); spin_unlock(&dentry->d_lock); /* Negative dentries can be dropped without further checks */ - if (!dentry->d_inode) { - d_drop(dentry); + if (!dentry->d_inode) return; - } + shrink_dcache_parent(dentry); for (;;) { - struct detach_data data; - - data.mountpoint = NULL; - INIT_LIST_HEAD(&data.select.dispose); - data.select.start = dentry; - data.select.found = 0; - - d_walk(dentry, &data, detach_and_collect, check_and_drop); - - if (!list_empty(&data.select.dispose)) - shrink_dentry_list(&data.select.dispose); - else if (!data.mountpoint) + struct dentry *victim = NULL; + d_walk(dentry, &victim, find_submount); + if (!victim) { + if (had_submounts) + shrink_dcache_parent(dentry); return; - - if (data.mountpoint) { - detach_mounts(data.mountpoint); - dput(data.mountpoint); } + had_submounts = true; + detach_mounts(victim); + dput(victim); } } EXPORT_SYMBOL(d_invalidate); @@ -1963,35 +1941,6 @@ struct dentry *d_make_root(struct inode *root_inode) } EXPORT_SYMBOL(d_make_root); -static struct dentry * __d_find_any_alias(struct inode *inode) -{ - struct dentry *alias; - - if (hlist_empty(&inode->i_dentry)) - return NULL; - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - __dget(alias); - return alias; -} - -/** - * d_find_any_alias - find any alias for a given inode - * @inode: inode to find an alias for - * - * If any aliases exist for the given inode, take and return a - * reference for one of them. If no aliases exist, return %NULL. - */ -struct dentry *d_find_any_alias(struct inode *inode) -{ - struct dentry *de; - - spin_lock(&inode->i_lock); - de = __d_find_any_alias(inode); - spin_unlock(&inode->i_lock); - return de; -} -EXPORT_SYMBOL(d_find_any_alias); - static struct dentry *__d_instantiate_anon(struct dentry *dentry, struct inode *inode, bool disconnected) @@ -3134,7 +3083,7 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) void d_genocide(struct dentry *parent) { - d_walk(parent, parent, d_genocide_kill, NULL); + d_walk(parent, parent, d_genocide_kill); } EXPORT_SYMBOL(d_genocide); diff --git a/fs/direct-io.c b/fs/direct-io.c index 874607bb6e02..093fb54cd316 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -432,8 +432,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, struct bio *bio; /* - * bio_alloc() is guaranteed to return a bio when called with - * __GFP_RECLAIM and we request a valid number of vectors. + * bio_alloc() is guaranteed to return a bio when allowed to sleep and + * we request a valid number of vectors. */ bio = bio_alloc(GFP_KERNEL, nr_vecs); diff --git a/fs/eventfd.c b/fs/eventfd.c index 08d3bd602f73..61c9514da5e9 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -101,14 +101,20 @@ static int eventfd_release(struct inode *inode, struct file *file) return 0; } -static __poll_t eventfd_poll(struct file *file, poll_table *wait) +static struct wait_queue_head * +eventfd_get_poll_head(struct file *file, __poll_t events) +{ + struct eventfd_ctx *ctx = file->private_data; + + return &ctx->wqh; +} + +static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; - poll_wait(file, &ctx->wqh, wait); - /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait @@ -305,7 +311,8 @@ static const struct file_operations eventfd_fops = { .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, - .poll = eventfd_poll, + .get_poll_head = eventfd_get_poll_head, + .poll_mask = eventfd_poll_mask, .read = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 602ca4285b2e..67db22fe99c5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -884,8 +884,7 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, pt->_key = epi->event.events; if (!is_file_epoll(epi->ffd.file)) - return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & - epi->event.events; + return vfs_poll(epi->ffd.file, pt) & epi->event.events; ep = epi->ffd.file->private_data; poll_wait(epi->ffd.file, &ep->poll_wait, pt); @@ -2025,7 +2024,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* The target file descriptor must support poll */ error = -EPERM; - if (!tf.file->f_op->poll) + if (!file_can_poll(tf.file)) goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 3c6a9c156b7a..ddbf87246898 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -790,7 +790,7 @@ int ore_create(struct ore_io_state *ios) for (i = 0; i < ios->oc->numdevs; i++) { struct osd_request *or; - or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -815,7 +815,7 @@ int ore_remove(struct ore_io_state *ios) for (i = 0; i < ios->oc->numdevs; i++) { struct osd_request *or; - or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -847,7 +847,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, dev)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -966,7 +966,7 @@ int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp) return 0; /* Just an empty slot */ first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; - or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, first_dev)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; @@ -1060,7 +1060,7 @@ static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, cur_comp)); if (unlikely(!or)) { ORE_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 179cd5c2f52a..719a3152da80 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -229,7 +229,7 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, u64 offset, void *p, unsigned length) { - struct osd_request *or = osd_start_request(od, GFP_KERNEL); + struct osd_request *or = osd_start_request(od); /* struct osd_sense_info osi = {.key = 0};*/ int ret; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a42e71203e53..229ea4da6785 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2390,7 +2390,7 @@ extern int ext4_init_inode_table(struct super_block *sb, extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ -extern const struct file_operations ext4_seq_mb_groups_fops; +extern const struct seq_operations ext4_mb_seq_groups_ops; extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; extern int ext4_mb_init(struct super_block *); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 769a62708b1c..6884e81c1465 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2254,7 +2254,7 @@ out: static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { - struct super_block *sb = seq->private; + struct super_block *sb = PDE_DATA(file_inode(seq->file)); ext4_group_t group; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) @@ -2265,7 +2265,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = seq->private; + struct super_block *sb = PDE_DATA(file_inode(seq->file)); ext4_group_t group; ++*pos; @@ -2277,7 +2277,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { - struct super_block *sb = seq->private; + struct super_block *sb = PDE_DATA(file_inode(seq->file)); ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err, buddy_loaded = 0; @@ -2330,34 +2330,13 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) { } -static const struct seq_operations ext4_mb_seq_groups_ops = { +const struct seq_operations ext4_mb_seq_groups_ops = { .start = ext4_mb_seq_groups_start, .next = ext4_mb_seq_groups_next, .stop = ext4_mb_seq_groups_stop, .show = ext4_mb_seq_groups_show, }; -static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) -{ - struct super_block *sb = PDE_DATA(inode); - int rc; - - rc = seq_open(file, &ext4_mb_seq_groups_ops); - if (rc == 0) { - struct seq_file *m = file->private_data; - m->private = sb; - } - return rc; - -} - -const struct file_operations ext4_seq_mb_groups_fops = { - .open = ext4_mb_seq_groups_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) { int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9ebd26c957c2..f34da0bb8f17 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -346,39 +346,9 @@ static struct kobject *ext4_root; static struct kobject *ext4_feat; -#define PROC_FILE_SHOW_DEFN(name) \ -static int name##_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \ -} \ -\ -static const struct file_operations ext4_seq_##name##_fops = { \ - .open = name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -} - -#define PROC_FILE_LIST(name) \ - { __stringify(name), &ext4_seq_##name##_fops } - -PROC_FILE_SHOW_DEFN(es_shrinker_info); -PROC_FILE_SHOW_DEFN(options); - -static const struct ext4_proc_files { - const char *name; - const struct file_operations *fops; -} proc_files[] = { - PROC_FILE_LIST(options), - PROC_FILE_LIST(es_shrinker_info), - PROC_FILE_LIST(mb_groups), - { NULL, NULL }, -}; - int ext4_register_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const struct ext4_proc_files *p; int err; init_completion(&sbi->s_kobj_unregister); @@ -392,11 +362,14 @@ int ext4_register_sysfs(struct super_block *sb) if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - if (sbi->s_proc) { - for (p = proc_files; p->name; p++) - proc_create_data(p->name, S_IRUGO, sbi->s_proc, - p->fops, sb); + proc_create_single_data("options", S_IRUGO, sbi->s_proc, + ext4_seq_options_show, sb); + proc_create_single_data("es_shrinker_info", S_IRUGO, + sbi->s_proc, ext4_seq_es_shrinker_info_show, + sb); + proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_groups_ops, sb); } return 0; } @@ -404,13 +377,9 @@ int ext4_register_sysfs(struct super_block *sb) void ext4_unregister_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - const struct ext4_proc_files *p; - if (sbi->s_proc) { - for (p = proc_files; p->name; p++) - remove_proc_entry(p->name, sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } + if (sbi->s_proc) + remove_proc_subtree(sb->s_id, ext4_proc_root); kobject_del(&sbi->s_kobj); } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index f33a56d6e6dd..4b47ca6296a7 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -572,23 +572,6 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset) return 0; } -#define F2FS_PROC_FILE_DEF(_name) \ -static int _name##_open_fs(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ -} \ - \ -static const struct file_operations f2fs_seq_##_name##_fops = { \ - .open = _name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - -F2FS_PROC_FILE_DEF(segment_info); -F2FS_PROC_FILE_DEF(segment_bits); -F2FS_PROC_FILE_DEF(iostat_info); - int __init f2fs_init_sysfs(void) { int ret; @@ -632,12 +615,12 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); if (sbi->s_proc) { - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_bits_fops, sb); - proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_iostat_info_fops, sb); + proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc, + segment_info_seq_show, sb); + proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc, + segment_bits_seq_show, sb); + proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc, + iostat_info_seq_show, sb); } return 0; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 582ca731a6c9..484ce674e0cd 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -314,10 +314,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) int err; mutex_lock(&MSDOS_SB(sb)->s_lock); - /* - * Check whether the directory is not in use, then check - * whether it is empty. - */ err = fat_dir_empty(inode); if (err) goto out; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 2649759c478a..4f4362d5a04c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -697,15 +697,6 @@ static int vfat_find(struct inode *dir, const struct qstr *qname, return fat_search_long(dir, qname->name, len, sinfo); } -/* - * (nfsd's) anonymous disconnected dentry? - * NOTE: !IS_ROOT() is not anonymous (I.e. d_splice_alias() did the job). - */ -static int vfat_d_anon_disconn(struct dentry *dentry) -{ - return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED); -} - static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -738,8 +729,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, * Checking "alias->d_parent == dentry->d_parent" to make sure * FS is not corrupted (especially double linked dir). */ - if (alias && alias->d_parent == dentry->d_parent && - !vfat_d_anon_disconn(alias)) { + if (alias && alias->d_parent == dentry->d_parent) { /* * This inode has non anonymous-DCACHE_DISCONNECTED * dentry. This means, the user did ->lookup() by an @@ -747,7 +737,6 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, * * Switch to new one for reason of locality if possible. */ - BUG_ON(d_unhashed(alias)); if (!S_ISDIR(inode->i_mode)) d_move(alias, dentry); iput(inode); diff --git a/fs/fcntl.c b/fs/fcntl.c index d737ff082472..c42169459298 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -871,9 +871,9 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) if (fa->fa_file != filp) continue; - spin_lock_irq(&fa->fa_lock); + write_lock_irq(&fa->fa_lock); fa->fa_file = NULL; - spin_unlock_irq(&fa->fa_lock); + write_unlock_irq(&fa->fa_lock); *fp = fa->fa_next; call_rcu(&fa->fa_rcu, fasync_free_rcu); @@ -918,13 +918,13 @@ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasy if (fa->fa_file != filp) continue; - spin_lock_irq(&fa->fa_lock); + write_lock_irq(&fa->fa_lock); fa->fa_fd = fd; - spin_unlock_irq(&fa->fa_lock); + write_unlock_irq(&fa->fa_lock); goto out; } - spin_lock_init(&new->fa_lock); + rwlock_init(&new->fa_lock); new->magic = FASYNC_MAGIC; new->fa_file = filp; new->fa_fd = fd; @@ -987,14 +987,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; - unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - spin_lock_irqsave(&fa->fa_lock, flags); + read_lock(&fa->fa_lock); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -1003,7 +1002,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - spin_unlock_irqrestore(&fa->fa_lock, flags); + read_unlock(&fa->fa_lock); fa = rcu_dereference(fa->fa_next); } } diff --git a/fs/filesystems.c b/fs/filesystems.c index f2728a4a03a1..b03f57b1105b 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -238,21 +238,9 @@ static int filesystems_proc_show(struct seq_file *m, void *v) return 0; } -static int filesystems_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, filesystems_proc_show, NULL); -} - -static const struct file_operations filesystems_proc_fops = { - .open = filesystems_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_filesystems_init(void) { - proc_create("filesystems", 0, NULL, &filesystems_proc_fops); + proc_create_single("filesystems", 0, NULL, filesystems_proc_show); return 0; } module_init(proc_filesystems_init); diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index ce4785fd81c6..a51425634f65 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -193,13 +193,9 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags) return ERR_PTR(-ENAMETOOLONG); ino = vxfs_inode_by_name(dip, dp); - if (ino) { + if (ino) ip = vxfs_iget(dip->i_sb, ino); - if (IS_ERR(ip)) - return ERR_CAST(ip); - } - d_add(dp, ip); - return NULL; + return d_splice_alias(ip, dp); } /** diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c index 15a3d042247e..9a13e9e15b69 100644 --- a/fs/fscache/histogram.c +++ b/fs/fscache/histogram.c @@ -83,24 +83,9 @@ static void fscache_histogram_stop(struct seq_file *m, void *v) { } -static const struct seq_operations fscache_histogram_ops = { +const struct seq_operations fscache_histogram_ops = { .start = fscache_histogram_start, .stop = fscache_histogram_stop, .next = fscache_histogram_next, .show = fscache_histogram_show, }; - -/* - * open "/proc/fs/fscache/histogram" to provide latency data - */ -static int fscache_histogram_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &fscache_histogram_ops); -} - -const struct file_operations fscache_histogram_fops = { - .open = fscache_histogram_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 500650f938fe..f83328a7f048 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -31,6 +31,7 @@ #include <linux/fscache-cache.h> #include <trace/events/fscache.h> #include <linux/sched.h> +#include <linux/seq_file.h> #define FSCACHE_MIN_THREADS 4 #define FSCACHE_MAX_THREADS 32 @@ -84,7 +85,7 @@ static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) atomic_inc(&histogram[jif]); } -extern const struct file_operations fscache_histogram_fops; +extern const struct seq_operations fscache_histogram_ops; #else #define fscache_hist(hist, start_jif) do {} while (0) @@ -294,7 +295,7 @@ static inline void fscache_stat_d(atomic_t *stat) #define __fscache_stat(stat) (stat) -extern const struct file_operations fscache_stats_fops; +int fscache_stats_show(struct seq_file *m, void *v); #else #define __fscache_stat(stat) (NULL) diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index 1d9e4951a597..49a8c90414bc 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -26,14 +26,14 @@ int __init fscache_proc_init(void) goto error_dir; #ifdef CONFIG_FSCACHE_STATS - if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL, - &fscache_stats_fops)) + if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL, + fscache_stats_show)) goto error_stats; #endif #ifdef CONFIG_FSCACHE_HISTOGRAM - if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL, - &fscache_histogram_fops)) + if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL, + &fscache_histogram_ops)) goto error_histogram; #endif diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index fcc8c2f2690e..00564a1dfd76 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -138,7 +138,7 @@ atomic_t fscache_n_cache_culled_objects; /* * display the general statistics */ -static int fscache_stats_show(struct seq_file *m, void *v) +int fscache_stats_show(struct seq_file *m, void *v) { seq_puts(m, "FS-Cache statistics\n"); @@ -284,18 +284,3 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cache_culled_objects)); return 0; } - -/* - * open "/proc/fs/fscache/stats" allowing provision of a statistical summary - */ -static int fscache_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, fscache_stats_show, NULL); -} - -const struct file_operations fscache_stats_fops = { - .open = fscache_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 75b254280ff6..3bf2ae0e467c 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -31,21 +31,15 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); res = hfs_brec_read(&fd, &rec, sizeof(rec)); if (res) { - hfs_find_exit(&fd); - if (res == -ENOENT) { - /* No such entry */ - inode = NULL; - goto done; - } - return ERR_PTR(res); + if (res != -ENOENT) + inode = ERR_PTR(res); + } else { + inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec); + if (!inode) + inode = ERR_PTR(-EACCES); } - inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec); hfs_find_exit(&fd); - if (!inode) - return ERR_PTR(-EACCES); -done: - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } /* diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 2538b49cc349..b3309b83371a 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -543,9 +543,9 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, igrab(dir); hlist_add_fake(&inode->i_hash); mark_inode_dirty(inode); + dont_mount(dentry); out: - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } void hfs_evict_inode(struct inode *inode) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 15e06fb552da..b5254378f011 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -122,8 +122,7 @@ again: if (S_ISREG(inode->i_mode)) HFSPLUS_I(inode)->linkid = linkid; out: - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); fail: hfs_find_exit(&fd); return ERR_PTR(err); diff --git a/fs/inode.c b/fs/inode.c index 13ceb98c3bd3..3b55391072f3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -178,6 +178,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; + mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; diff --git a/fs/internal.h b/fs/internal.h index e08972db0303..980d005b21b4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -125,6 +125,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); +extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file *filp_clone_open(struct file *); diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index a70907606025..35a5b2a81ae0 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -29,7 +29,6 @@ #ifdef PROC_FS_JFS /* see jfs_debug.h */ -static struct proc_dir_entry *base; #ifdef CONFIG_JFS_DEBUG static int jfs_loglevel_proc_show(struct seq_file *m, void *v) { @@ -66,43 +65,29 @@ static const struct file_operations jfs_loglevel_proc_fops = { }; #endif -static struct { - const char *name; - const struct file_operations *proc_fops; -} Entries[] = { -#ifdef CONFIG_JFS_STATISTICS - { "lmstats", &jfs_lmstats_proc_fops, }, - { "txstats", &jfs_txstats_proc_fops, }, - { "xtstat", &jfs_xtstat_proc_fops, }, - { "mpstat", &jfs_mpstat_proc_fops, }, -#endif -#ifdef CONFIG_JFS_DEBUG - { "TxAnchor", &jfs_txanchor_proc_fops, }, - { "loglevel", &jfs_loglevel_proc_fops } -#endif -}; -#define NPROCENT ARRAY_SIZE(Entries) - void jfs_proc_init(void) { - int i; + struct proc_dir_entry *base; - if (!(base = proc_mkdir("fs/jfs", NULL))) + base = proc_mkdir("fs/jfs", NULL); + if (!base) return; - for (i = 0; i < NPROCENT; i++) - proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); +#ifdef CONFIG_JFS_STATISTICS + proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show); + proc_create_single("txstats", 0, base, jfs_txstats_proc_show); + proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show); + proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show); +#endif +#ifdef CONFIG_JFS_DEBUG + proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show); + proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops); +#endif } void jfs_proc_clean(void) { - int i; - - if (base) { - for (i = 0; i < NPROCENT; i++) - remove_proc_entry(Entries[i].name, base); - remove_proc_entry("fs/jfs", NULL); - } + remove_proc_subtree("fs/jfs", NULL); } #endif /* PROC_FS_JFS */ diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index eafd1300a00b..0d9e35da8462 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -62,7 +62,7 @@ extern void jfs_proc_clean(void); extern int jfsloglevel; -extern const struct file_operations jfs_txanchor_proc_fops; +int jfs_txanchor_proc_show(struct seq_file *m, void *v); /* information message: e.g., configuration, major event */ #define jfs_info(fmt, arg...) do { \ @@ -105,10 +105,10 @@ extern const struct file_operations jfs_txanchor_proc_fops; * ---------- */ #ifdef CONFIG_JFS_STATISTICS -extern const struct file_operations jfs_lmstats_proc_fops; -extern const struct file_operations jfs_txstats_proc_fops; -extern const struct file_operations jfs_mpstat_proc_fops; -extern const struct file_operations jfs_xtstat_proc_fops; +int jfs_lmstats_proc_show(struct seq_file *m, void *v); +int jfs_txstats_proc_show(struct seq_file *m, void *v); +int jfs_mpstat_proc_show(struct seq_file *m, void *v); +int jfs_xtstat_proc_show(struct seq_file *m, void *v); #define INCREMENT(x) ((x)++) #define DECREMENT(x) ((x)--) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 0e5d412c0b01..6b68df395892 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2493,7 +2493,7 @@ exit: } #ifdef CONFIG_JFS_STATISTICS -static int jfs_lmstats_proc_show(struct seq_file *m, void *v) +int jfs_lmstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Logmgr stats\n" @@ -2510,16 +2510,4 @@ static int jfs_lmstats_proc_show(struct seq_file *m, void *v) lmStat.partial_page); return 0; } - -static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_lmstats_proc_show, NULL); -} - -const struct file_operations jfs_lmstats_proc_fops = { - .open = jfs_lmstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_JFS_STATISTICS */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 1a3b0cc22ad3..fa2c6824c7f2 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -815,7 +815,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) } #ifdef CONFIG_JFS_STATISTICS -static int jfs_mpstat_proc_show(struct seq_file *m, void *v) +int jfs_mpstat_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Metapage statistics\n" @@ -828,16 +828,4 @@ static int jfs_mpstat_proc_show(struct seq_file *m, void *v) mpStat.lockwait); return 0; } - -static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_mpstat_proc_show, NULL); -} - -const struct file_operations jfs_mpstat_proc_fops = { - .open = jfs_mpstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 4d973524c887..a5663cb621d8 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2998,7 +2998,7 @@ int jfs_sync(void *arg) } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) -static int jfs_txanchor_proc_show(struct seq_file *m, void *v) +int jfs_txanchor_proc_show(struct seq_file *m, void *v) { char *freewait; char *freelockwait; @@ -3032,22 +3032,10 @@ static int jfs_txanchor_proc_show(struct seq_file *m, void *v) list_empty(&TxAnchor.unlock_queue) ? "" : "not "); return 0; } - -static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_txanchor_proc_show, NULL); -} - -const struct file_operations jfs_txanchor_proc_fops = { - .open = jfs_txanchor_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) -static int jfs_txstats_proc_show(struct seq_file *m, void *v) +int jfs_txstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS TxStats\n" @@ -3072,16 +3060,4 @@ static int jfs_txstats_proc_show(struct seq_file *m, void *v) TxStat.txLockAlloc_freelock); return 0; } - -static int jfs_txstats_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_txstats_proc_show, NULL); -} - -const struct file_operations jfs_txstats_proc_fops = { - .open = jfs_txstats_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 5cde6d2fcfca..2c200b5256a6 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -3874,7 +3874,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) } #ifdef CONFIG_JFS_STATISTICS -static int jfs_xtstat_proc_show(struct seq_file *m, void *v) +int jfs_xtstat_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Xtree statistics\n" @@ -3887,16 +3887,4 @@ static int jfs_xtstat_proc_show(struct seq_file *m, void *v) xtStat.split); return 0; } - -static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, jfs_xtstat_proc_show, NULL); -} - -const struct file_operations jfs_xtstat_proc_fops = { - .open = jfs_xtstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif diff --git a/fs/locks.c b/fs/locks.c index 62bbe8b31f26..05e211be8684 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2788,22 +2788,10 @@ static const struct seq_operations locks_seq_operations = { .show = locks_show, }; -static int locks_open(struct inode *inode, struct file *filp) -{ - return seq_open_private(filp, &locks_seq_operations, - sizeof(struct locks_iterator)); -} - -static const struct file_operations proc_locks_operations = { - .open = locks_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static int __init proc_locks_init(void) { - proc_create("locks", 0, NULL, &proc_locks_operations); + proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, + sizeof(struct locks_iterator), NULL); return 0; } fs_initcall(proc_locks_init); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index ccf0f00030bf..1a6084d2b02e 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -28,13 +28,9 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un return ERR_PTR(-ENAMETOOLONG); ino = minix_inode_by_name(dentry); - if (ino) { + if (ino) inode = minix_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) diff --git a/fs/namei.c b/fs/namei.c index 186bd2464fd5..a59968de1636 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1438,10 +1438,8 @@ static int path_parent_directory(struct path *path) static int follow_dotdot(struct nameidata *nd) { while(1) { - if (nd->path.dentry == nd->root.dentry && - nd->path.mnt == nd->root.mnt) { + if (path_equal(&nd->path, &nd->root)) break; - } if (nd->path.dentry != nd->path.mnt->mnt_root) { int ret = path_parent_directory(&nd->path); if (ret) @@ -3367,7 +3365,9 @@ finish_open_created: goto out; *opened |= FILE_OPENED; opened: - error = ima_file_check(file, op->acc_mode, *opened); + error = open_check_o_direct(file); + if (!error) + error = ima_file_check(file, op->acc_mode, *opened); if (!error && will_truncate) error = handle_truncate(file); out: @@ -3447,6 +3447,9 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, error = finish_open(file, child, NULL, opened); if (error) goto out2; + error = open_check_o_direct(file); + if (error) + fput(file); out2: mnt_drop_write(path.mnt); out: @@ -3847,11 +3850,11 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out; - shrink_dcache_parent(dentry); error = dir->i_op->rmdir(dir, dentry); if (error) goto out; + shrink_dcache_parent(dentry); dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); @@ -4434,8 +4437,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir->i_nlink >= max_links) goto out; } - if (is_dir && !(flags & RENAME_EXCHANGE) && target) - shrink_dcache_parent(new_dentry); if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) @@ -4452,8 +4453,10 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; if (!(flags & RENAME_EXCHANGE) && target) { - if (is_dir) + if (is_dir) { + shrink_dcache_parent(new_dentry); target->i_flags |= S_DEAD; + } dont_mount(new_dentry); detach_mounts(new_dentry); } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b9129e2befea..bbc91d7ca1bd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1067,7 +1067,6 @@ void nfs_clients_init(struct net *net) } #ifdef CONFIG_PROC_FS -static int nfs_server_list_open(struct inode *inode, struct file *file); static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_server_list_stop(struct seq_file *p, void *v); @@ -1080,14 +1079,6 @@ static const struct seq_operations nfs_server_list_ops = { .show = nfs_server_list_show, }; -static const struct file_operations nfs_server_list_fops = { - .open = nfs_server_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static int nfs_volume_list_open(struct inode *inode, struct file *file); static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos); static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_volume_list_stop(struct seq_file *p, void *v); @@ -1100,23 +1091,6 @@ static const struct seq_operations nfs_volume_list_ops = { .show = nfs_volume_list_show, }; -static const struct file_operations nfs_volume_list_fops = { - .open = nfs_volume_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* - * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which - * we're dealing - */ -static int nfs_server_list_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nfs_server_list_ops, - sizeof(struct seq_net_private)); -} - /* * set up the iterator to start reading from the server list and return the first item */ @@ -1185,15 +1159,6 @@ static int nfs_server_list_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes - */ -static int nfs_volume_list_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &nfs_volume_list_ops, - sizeof(struct seq_net_private)); -} - -/* * set up the iterator to start reading from the volume list and return the first item */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) @@ -1278,14 +1243,14 @@ int nfs_fs_proc_net_init(struct net *net) goto error_0; /* a file of servers with which we're dealing */ - p = proc_create("servers", S_IFREG|S_IRUGO, - nn->proc_nfsfs, &nfs_server_list_fops); + p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs, + &nfs_server_list_ops, sizeof(struct seq_net_private)); if (!p) goto error_1; /* a file of volumes that we have mounted */ - p = proc_create("volumes", S_IFREG|S_IRUGO, - nn->proc_nfsfs, &nfs_volume_list_fops); + p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs, + &nfs_volume_list_ops, sizeof(struct seq_net_private)); if (!p) goto error_1; return 0; diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 70b8bf781fce..a43dfedd69ec 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -227,7 +227,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, if (!buf) return -ENOMEM; - rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); + rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); if (IS_ERR(rq)) { error = -ENOMEM; goto out_free_buf; diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index b7146526afff..4bee3a72b9f3 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -305,11 +305,10 @@ static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry, ino_t ino = be64_to_cpu(oi->i_head.h_self); brelse(bh); inode = omfs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + } else if (bh != ERR_PTR(-ENOENT)) { + inode = ERR_CAST(bh); } - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } /* sanity check block's self pointer */ diff --git a/fs/open.c b/fs/open.c index c5ee7cd60424..d0e955b558ad 100644 --- a/fs/open.c +++ b/fs/open.c @@ -724,6 +724,16 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) return ksys_fchown(fd, user, group); } +int open_check_o_direct(struct file *f) +{ + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) + return -EINVAL; + } + return 0; +} + static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *), @@ -745,7 +755,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; - goto done; + return 0; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { @@ -798,12 +808,7 @@ static int do_dentry_open(struct file *f, f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); -done: - /* NB: we're sure to have correct a_ops only after f_op->open */ - error = -EINVAL; - if ((f->f_flags & O_DIRECT) && - (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)) - goto out_fput; + return 0; cleanup_all: @@ -818,9 +823,6 @@ cleanup_file: f->f_path.dentry = NULL; f->f_inode = NULL; return error; -out_fput: - fput(f); - return error; } /** @@ -918,14 +920,20 @@ struct file *dentry_open(const struct path *path, int flags, BUG_ON(!path->mnt); f = get_empty_filp(); - if (IS_ERR(f)) - return f; - - f->f_flags = flags; - error = vfs_open(path, f, cred); - if (error) { - put_filp(f); - return ERR_PTR(error); + if (!IS_ERR(f)) { + f->f_flags = flags; + error = vfs_open(path, f, cred); + if (!error) { + /* from now on we need fput() to dispose of f */ + error = open_check_o_direct(f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } else { + put_filp(f); + f = ERR_PTR(error); + } } return f; } diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 2200662a9bf1..607092f367ad 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -256,8 +256,7 @@ found: break; } - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static int openpromfs_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 1b5707c44c3f..365cd73d9109 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -110,7 +110,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; struct inode *inode; - struct dentry *res; int ret = -EINVAL; /* @@ -158,65 +157,18 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, new_op->downcall.resp.lookup.refn.fs_id, ret); - if (ret < 0) { - if (ret == -ENOENT) { - /* - * if no inode was found, add a negative dentry to - * dcache anyway; if we don't, we don't hold expected - * lookup semantics and we most noticeably break - * during directory renames. - * - * however, if the operation failed or exited, do not - * add the dentry (e.g. in the case that a touch is - * issued on a file that already exists that was - * interrupted during this lookup -- no need to add - * another negative dentry for an existing file) - */ - - gossip_debug(GOSSIP_NAME_DEBUG, - "orangefs_lookup: Adding *negative* dentry " - "%p for %pd\n", - dentry, - dentry); - - d_add(dentry, NULL); - res = NULL; - goto out; - } - + if (ret >= 0) { + orangefs_set_timeout(dentry); + inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); + } else if (ret == -ENOENT) { + inode = NULL; + } else { /* must be a non-recoverable error */ - res = ERR_PTR(ret); - goto out; - } - - orangefs_set_timeout(dentry); - - inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); - if (IS_ERR(inode)) { - gossip_debug(GOSSIP_NAME_DEBUG, - "error %ld from iget\n", PTR_ERR(inode)); - res = ERR_CAST(inode); - goto out; + inode = ERR_PTR(ret); } - gossip_debug(GOSSIP_NAME_DEBUG, - "%s:%s:%d " - "Found good inode [%lu] with count [%d]\n", - __FILE__, - __func__, - __LINE__, - inode->i_ino, - (int)atomic_read(&inode->i_count)); - - /* update dentry/inode pair into dcache */ - res = d_splice_alias(inode, dentry); - - gossip_debug(GOSSIP_NAME_DEBUG, - "Lookup success (inode ct = %d)\n", - (int)atomic_read(&inode->i_count)); -out: op_release(new_op); - return res; + return d_splice_alias(inode, dentry); } /* return 0 on success; non-zero otherwise */ diff --git a/fs/pipe.c b/fs/pipe.c index 39d6f431da83..bb0840e234f3 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -509,19 +509,22 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } -/* No kernel lock held - fine */ -static __poll_t -pipe_poll(struct file *filp, poll_table *wait) +static struct wait_queue_head * +pipe_get_poll_head(struct file *filp, __poll_t events) { - __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - int nrbufs; - poll_wait(filp, &pipe->wait, wait); + return &pipe->wait; +} + +/* No kernel lock held - fine */ +static __poll_t pipe_poll_mask(struct file *filp, __poll_t events) +{ + struct pipe_inode_info *pipe = filp->private_data; + int nrbufs = pipe->nrbufs; + __poll_t mask = 0; /* Reading only -- no need for acquiring the semaphore. */ - nrbufs = pipe->nrbufs; - mask = 0; if (filp->f_mode & FMODE_READ) { mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; if (!pipe->writers && filp->f_version != pipe->w_counter) @@ -1020,7 +1023,8 @@ const struct file_operations pipefifo_fops = { .llseek = no_llseek, .read_iter = pipe_read, .write_iter = pipe_write, - .poll = pipe_poll, + .get_poll_head = pipe_get_poll_head, + .poll_mask = pipe_poll_mask, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, diff --git a/fs/proc/array.c b/fs/proc/array.c index 72391b3f6927..e6d7f41b6684 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -702,25 +702,22 @@ out: static int children_seq_show(struct seq_file *seq, void *v) { - struct inode *inode = seq->private; - pid_t pid; - - pid = pid_nr_ns(v, inode->i_sb->s_fs_info); - seq_printf(seq, "%d ", pid); + struct inode *inode = file_inode(seq->file); + seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode))); return 0; } static void *children_seq_start(struct seq_file *seq, loff_t *pos) { - return get_children_pid(seq->private, NULL, *pos); + return get_children_pid(file_inode(seq->file), NULL, *pos); } static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pid *pid; - pid = get_children_pid(seq->private, v, *pos + 1); + pid = get_children_pid(file_inode(seq->file), v, *pos + 1); put_pid(v); ++*pos; @@ -741,17 +738,7 @@ static const struct seq_operations children_seq_ops = { static int children_seq_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - - ret = seq_open(file, &children_seq_ops); - if (ret) - return ret; - - m = file->private_data; - m->private = inode; - - return ret; + return seq_open(file, &children_seq_ops); } const struct file_operations proc_tid_children_operations = { diff --git a/fs/proc/base.c b/fs/proc/base.c index 1a76d751cf3c..33ed1746927a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -698,7 +698,7 @@ static bool has_pid_permissions(struct pid_namespace *pid, static int proc_pid_permission(struct inode *inode, int mask) { - struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct pid_namespace *pid = proc_pid_ns(inode); struct task_struct *task; bool has_perms; @@ -733,13 +733,11 @@ static const struct inode_operations proc_def_inode_operations = { static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns; - struct pid *pid; + struct pid_namespace *ns = proc_pid_ns(inode); + struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; - ns = inode->i_sb->s_fs_info; - pid = proc_pid(inode); task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; @@ -1410,7 +1408,7 @@ static const struct file_operations proc_fail_nth_operations = { static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); struct task_struct *p; p = get_proc_task(inode); @@ -1782,8 +1780,8 @@ int pid_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct pid_namespace *pid = proc_pid_ns(inode); struct task_struct *task; - struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1809,15 +1807,22 @@ int pid_getattr(const struct path *path, struct kstat *stat, /* dentry stuff */ /* - * Exceptional case: normally we are not allowed to unhash a busy - * directory. In this case, however, we can do it - no aliasing problems - * due to the way we treat inodes. - * + * Set <pid>/... inode ownership (can change due to setuid(), etc.) + */ +void pid_update_inode(struct task_struct *task, struct inode *inode) +{ + task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); + + inode->i_mode &= ~(S_ISUID | S_ISGID); + security_task_to_inode(task, inode); +} + +/* * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * */ -int pid_revalidate(struct dentry *dentry, unsigned int flags) +static int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; @@ -1829,10 +1834,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) task = get_proc_task(inode); if (task) { - task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); - - inode->i_mode &= ~(S_ISUID | S_ISGID); - security_task_to_inode(task, inode); + pid_update_inode(task, inode); put_task_struct(task); return 1; } @@ -1880,8 +1882,8 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, struct dentry *child, *dir = file->f_path.dentry; struct qstr qname = QSTR_INIT(name, len); struct inode *inode; - unsigned type; - ino_t ino; + unsigned type = DT_UNKNOWN; + ino_t ino = 1; child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -1890,22 +1892,23 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, if (IS_ERR(child)) goto end_instantiate; if (d_in_lookup(child)) { - int err = instantiate(d_inode(dir), child, task, ptr); + struct dentry *res; + res = instantiate(child, task, ptr); d_lookup_done(child); - if (err < 0) { - dput(child); + if (IS_ERR(res)) goto end_instantiate; + if (unlikely(res)) { + dput(child); + child = res; } } } inode = d_inode(child); ino = inode->i_ino; type = inode->i_mode >> 12; +end_instantiate: dput(child); return dir_emit(ctx, name, len, ino, type); - -end_instantiate: - return dir_emit(ctx, name, len, 1, DT_UNKNOWN); } /* @@ -2067,19 +2070,19 @@ static const struct inode_operations proc_map_files_link_inode_operations = { .setattr = proc_setattr, }; -static int -proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, +static struct dentry * +proc_map_files_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | ((mode & FMODE_READ ) ? S_IRUSR : 0) | ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) - return -ENOENT; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); ei->op.proc_get_link = map_files_get_link; @@ -2088,9 +2091,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_size = 64; d_set_d_op(dentry, &tid_map_files_dentry_operations); - d_add(dentry, inode); - - return 0; + return d_splice_alias(inode, dentry); } static struct dentry *proc_map_files_lookup(struct inode *dir, @@ -2099,19 +2100,19 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; - int result; + struct dentry *result; struct mm_struct *mm; - result = -ENOENT; + result = ERR_PTR(-ENOENT); task = get_proc_task(dir); if (!task) goto out; - result = -EACCES; + result = ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; - result = -ENOENT; + result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; @@ -2125,7 +2126,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out_no_vma; if (vma->vm_file) - result = proc_map_files_instantiate(dir, dentry, task, + result = proc_map_files_instantiate(dentry, task, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: @@ -2134,7 +2135,7 @@ out_no_vma: out_put_task: put_task_struct(task); out: - return ERR_PTR(result); + return result; } static const struct inode_operations proc_map_files_inode_operations = { @@ -2337,7 +2338,7 @@ static int proc_timers_open(struct inode *inode, struct file *file) return -ENOMEM; tp->pid = proc_pid(inode); - tp->ns = inode->i_sb->s_fs_info; + tp->ns = proc_pid_ns(inode); return 0; } @@ -2435,16 +2436,16 @@ static const struct file_operations proc_pid_set_timerslack_ns_operations = { .release = single_release, }; -static int proc_pident_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static struct dentry *proc_pident_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task, p->mode); + inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); if (S_ISDIR(inode->i_mode)) @@ -2454,13 +2455,9 @@ static int proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry *proc_pident_lookup(struct inode *dir, @@ -2468,11 +2465,9 @@ static struct dentry *proc_pident_lookup(struct inode *dir, const struct pid_entry *ents, unsigned int nents) { - int error; struct task_struct *task = get_proc_task(dir); const struct pid_entry *p, *last; - - error = -ENOENT; + struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; @@ -2491,11 +2486,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, if (p >= last) goto out; - error = proc_pident_instantiate(dir, dentry, task, p); + res = proc_pident_instantiate(dentry, task, p); out: put_task_struct(task); out_no_task: - return ERR_PTR(error); + return res; } static int proc_pident_readdir(struct file *file, struct dir_context *ctx, @@ -3138,38 +3133,32 @@ void proc_flush_task(struct task_struct *task) } } -static int proc_pid_instantiate(struct inode *dir, - struct dentry * dentry, +static struct dentry *proc_pid_instantiate(struct dentry * dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) - goto out; + return ERR_PTR(-ENOENT); inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; set_nlink(inode, nlink_tgid); + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - int result = -ENOENT; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; + struct dentry *result = ERR_PTR(-ENOENT); tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) @@ -3184,10 +3173,10 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign if (!task) goto out; - result = proc_pid_instantiate(dir, dentry, task, NULL); + result = proc_pid_instantiate(dentry, task, NULL); put_task_struct(task); out: - return ERR_PTR(result); + return result; } /* @@ -3239,7 +3228,7 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(file_inode(file)); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) @@ -3435,37 +3424,32 @@ static const struct inode_operations proc_tid_base_inode_operations = { .setattr = proc_setattr, }; -static int proc_task_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static struct dentry *proc_task_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); - + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) - goto out; + return ERR_PTR(-ENOENT); + inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; - inode->i_flags|=S_IMMUTABLE; + inode->i_flags |= S_IMMUTABLE; set_nlink(inode, nlink_tid); + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - int result = -ENOENT; struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; struct pid_namespace *ns; + struct dentry *result = ERR_PTR(-ENOENT); if (!leader) goto out_no_task; @@ -3485,13 +3469,13 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (!same_thread_group(leader, task)) goto out_drop_task; - result = proc_task_instantiate(dir, dentry, task, NULL); + result = proc_task_instantiate(dentry, task, NULL); out_drop_task: put_task_struct(task); out: put_task_struct(leader); out_no_task: - return ERR_PTR(result); + return result; } /* @@ -3588,7 +3572,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = inode->i_sb->s_fs_info; + ns = proc_pid_ns(inode); tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 8233e7af9389..fa762c5fbcb2 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v) return 0; } -static int cmdline_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cmdline_proc_show, NULL); -} - -static const struct file_operations cmdline_proc_fops = { - .open = cmdline_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_cmdline_init(void) { - proc_create("cmdline", 0, NULL, &cmdline_proc_fops); + proc_create_single("cmdline", 0, NULL, cmdline_proc_show); return 0; } fs_initcall(proc_cmdline_init); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index a8ac48aebd59..954caf0b7fee 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = { .show = show_console_dev }; -static int consoles_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &consoles_op); -} - -static const struct file_operations proc_consoles_operations = { - .open = consoles_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_consoles_init(void) { - proc_create("consoles", 0, NULL, &proc_consoles_operations); + proc_create_seq("consoles", 0, NULL, &consoles_op); return 0; } fs_initcall(proc_consoles_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c index 2c7f22b14489..37d38697eaf8 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = { .show = devinfo_show }; -static int devinfo_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &devinfo_ops); -} - -static const struct file_operations proc_devinfo_operations = { - .open = devinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_devices_init(void) { - proc_create("devices", 0, NULL, &proc_devinfo_operations); + proc_create_seq("devices", 0, NULL, &devinfo_ops); return 0; } fs_initcall(proc_devices_init); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 6b80cd1e419a..05b9893e9a22 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -81,9 +81,41 @@ static const struct file_operations proc_fdinfo_file_operations = { .release = single_release, }; +static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode) +{ + struct files_struct *files = get_files_struct(task); + struct file *file; + + if (!files) + return false; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) + *mode = file->f_mode; + rcu_read_unlock(); + put_files_struct(files); + return !!file; +} + +static void tid_fd_update_inode(struct task_struct *task, struct inode *inode, + fmode_t f_mode) +{ + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } + security_task_to_inode(task, inode); +} + static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { - struct files_struct *files; struct task_struct *task; struct inode *inode; unsigned int fd; @@ -96,35 +128,11 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) fd = proc_fd(inode); if (task) { - files = get_files_struct(task); - if (files) { - struct file *file; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - unsigned f_mode = file->f_mode; - - rcu_read_unlock(); - put_files_struct(files); - - task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); - - if (S_ISLNK(inode->i_mode)) { - unsigned i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; - } - - security_task_to_inode(task, inode); - put_task_struct(task); - return 1; - } - rcu_read_unlock(); - put_files_struct(files); + fmode_t f_mode; + if (tid_fd_mode(task, fd, &f_mode)) { + tid_fd_update_inode(task, inode, f_mode); + put_task_struct(task); + return 1; } put_task_struct(task); } @@ -166,34 +174,33 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) return ret; } -static int -proc_fd_instantiate(struct inode *dir, struct dentry *dentry, - struct task_struct *task, const void *ptr) +struct fd_data { + fmode_t mode; + unsigned fd; +}; + +static struct dentry *proc_fd_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { - unsigned fd = (unsigned long)ptr; + const struct fd_data *data = ptr; struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); - ei->fd = fd; + ei->fd = data->fd; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; + tid_fd_update_inode(task, inode, data->mode); d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - return 0; - out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry *proc_lookupfd_common(struct inode *dir, @@ -201,19 +208,21 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, instantiate_t instantiate) { struct task_struct *task = get_proc_task(dir); - int result = -ENOENT; - unsigned fd = name_to_int(&dentry->d_name); + struct fd_data data = {.fd = name_to_int(&dentry->d_name)}; + struct dentry *result = ERR_PTR(-ENOENT); if (!task) goto out_no_task; - if (fd == ~0U) + if (data.fd == ~0U) + goto out; + if (!tid_fd_mode(task, data.fd, &data.mode)) goto out; - result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); + result = instantiate(dentry, task, &data); out: put_task_struct(task); out_no_task: - return ERR_PTR(result); + return result; } static int proc_readfd_common(struct file *file, struct dir_context *ctx, @@ -236,17 +245,22 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, for (fd = ctx->pos - 2; fd < files_fdtable(files)->max_fds; fd++, ctx->pos++) { + struct file *f; + struct fd_data data; char name[10 + 1]; int len; - if (!fcheck_files(files, fd)) + f = fcheck_files(files, fd); + if (!f) continue; + data.mode = f->f_mode; rcu_read_unlock(); + data.fd = fd; len = snprintf(name, sizeof(name), "%u", fd); if (!proc_fill_cache(file, ctx, name, len, instantiate, p, - (void *)(unsigned long)fd)) + &data)) goto out_fd_loop; cond_resched(); rcu_read_lock(); @@ -304,31 +318,25 @@ const struct inode_operations proc_fd_inode_operations = { .setattr = proc_setattr, }; -static int -proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, - struct task_struct *task, const void *ptr) +static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { - unsigned fd = (unsigned long)ptr; + const struct fd_data *data = ptr; struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFREG | S_IRUSR); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFREG | S_IRUSR); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); - ei->fd = fd; + ei->fd = data->fd; inode->i_fop = &proc_fdinfo_file_operations; + tid_fd_update_inode(task, inode, 0); d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - return 0; - out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry * diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2078e70e1595..7b4d9714f248 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/uaccess.h> +#include <linux/seq_file.h> #include "internal.h" @@ -256,8 +257,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-ENOMEM); d_set_d_op(dentry, &proc_misc_dentry_ops); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } read_unlock(&proc_subdir_lock); return ERR_PTR(-ENOENT); @@ -346,13 +346,12 @@ static const struct inode_operations proc_dir_inode_operations = { .setattr = proc_notify_change, }; -static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +/* returns the registered entry, or frees dp and returns NULL on failure */ +struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + struct proc_dir_entry *dp) { - int ret; - - ret = proc_alloc_inum(&dp->low_ino); - if (ret) - return ret; + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; write_lock(&proc_subdir_lock); dp->parent = dir; @@ -360,12 +359,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); write_unlock(&proc_subdir_lock); - proc_free_inum(dp->low_ino); - return -EEXIST; + goto out_free_inum; } write_unlock(&proc_subdir_lock); - return 0; + return dp; +out_free_inum: + proc_free_inum(dp->low_ino); +out_free_entry: + pde_free(dp); + return NULL; } static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, @@ -443,10 +446,7 @@ struct proc_dir_entry *proc_symlink(const char *name, if (ent->data) { strcpy((char*)ent->data,dest); ent->proc_iops = &proc_link_inode_operations; - if (proc_register(parent, ent) < 0) { - pde_free(ent); - ent = NULL; - } + ent = proc_register(parent, ent); } else { pde_free(ent); ent = NULL; @@ -470,11 +470,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; parent->nlink++; - if (proc_register(parent, ent) < 0) { - pde_free(ent); + ent = proc_register(parent, ent); + if (!ent) parent->nlink--; - ent = NULL; - } } return ent; } @@ -505,47 +503,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->proc_fops = NULL; ent->proc_iops = NULL; parent->nlink++; - if (proc_register(parent, ent) < 0) { - pde_free(ent); + ent = proc_register(parent, ent); + if (!ent) parent->nlink--; - ent = NULL; - } } return ent; } EXPORT_SYMBOL(proc_create_mount_point); -struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops, - void *data) +struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, + struct proc_dir_entry **parent, void *data) { - struct proc_dir_entry *pde; + struct proc_dir_entry *p; + if ((mode & S_IFMT) == 0) mode |= S_IFREG; - - if (!S_ISREG(mode)) { - WARN_ON(1); /* use proc_mkdir() */ + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; + + p = __proc_create(parent, name, mode, 1); + if (p) { + p->proc_iops = &proc_file_inode_operations; + p->data = data; } + return p; +} + +struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, void *data) +{ + struct proc_dir_entry *p; BUG_ON(proc_fops == NULL); - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO; - pde = __proc_create(&parent, name, mode, 1); - if (!pde) - goto out; - pde->proc_fops = proc_fops; - pde->data = data; - pde->proc_iops = &proc_file_inode_operations; - if (proc_register(parent, pde) < 0) - goto out_free; - return pde; -out_free: - pde_free(pde); -out: - return NULL; + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = proc_fops; + return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); @@ -557,6 +555,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode, } EXPORT_SYMBOL(proc_create); +static int proc_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + if (de->state_size) + return seq_open_private(file, de->seq_ops, de->state_size); + return seq_open(file, de->seq_ops); +} + +static const struct file_operations proc_seq_fops = { + .open = proc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + return proc_register(parent, p); +} +EXPORT_SYMBOL(proc_create_seq_private); + +static int proc_single_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + return single_open(file, de->single_show, de->data); +} + +static const struct file_operations proc_single_fops = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_single_fops; + p->single_show = show; + return proc_register(parent, p); +} +EXPORT_SYMBOL(proc_create_single_data); + void proc_set_size(struct proc_dir_entry *de, loff_t size) { de->size = size; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 0f1692e63cb6..43c70c9e6b62 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,7 +44,12 @@ struct proc_dir_entry { struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; + union { + const struct seq_operations *seq_ops; + int (*single_show)(struct seq_file *, void *); + }; void *data; + unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; @@ -57,9 +62,9 @@ struct proc_dir_entry { umode_t mode; u8 namelen; #ifdef CONFIG_64BIT -#define SIZEOF_PDE_INLINE_NAME (192-139) +#define SIZEOF_PDE_INLINE_NAME (192-155) #else -#define SIZEOF_PDE_INLINE_NAME (128-87) +#define SIZEOF_PDE_INLINE_NAME (128-95) #endif char inline_name[SIZEOF_PDE_INLINE_NAME]; } __randomize_layout; @@ -147,14 +152,14 @@ extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct dentry *, struct iattr *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); -extern int pid_revalidate(struct dentry *, unsigned int); +extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ -typedef int instantiate_t(struct inode *, struct dentry *, +typedef struct dentry *instantiate_t(struct dentry *, struct task_struct *, const void *); extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, instantiate_t, struct task_struct *, const void *); @@ -162,6 +167,10 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i /* * generic.c */ +struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, + struct proc_dir_entry **parent, void *data); +struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c index 6a6bee9c603c..cb0edc7cbf09 100644 --- a/fs/proc/interrupts.c +++ b/fs/proc/interrupts.c @@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = { .show = show_interrupts }; -static int interrupts_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &int_seq_ops); -} - -static const struct file_operations proc_interrupts_operations = { - .open = interrupts_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_interrupts_init(void) { - proc_create("interrupts", 0, NULL, &proc_interrupts_operations); + proc_create_seq("interrupts", 0, NULL, &int_seq_ops); return 0; } fs_initcall(proc_interrupts_init); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index b572cc865b92..d06694757201 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -28,21 +28,9 @@ static int loadavg_proc_show(struct seq_file *m, void *v) return 0; } -static int loadavg_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, loadavg_proc_show, NULL); -} - -static const struct file_operations loadavg_proc_fops = { - .open = loadavg_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_loadavg_init(void) { - proc_create("loadavg", 0, NULL, &loadavg_proc_fops); + proc_create_single("loadavg", 0, NULL, loadavg_proc_show); return 0; } fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 65a72ab57471..2fb04846ed11 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) return 0; } -static int meminfo_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, meminfo_proc_show, NULL); -} - -static const struct file_operations meminfo_proc_fops = { - .open = meminfo_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_meminfo_init(void) { - proc_create("meminfo", 0, NULL, &meminfo_proc_fops); + proc_create_single("meminfo", 0, NULL, meminfo_proc_show); return 0; } fs_initcall(proc_meminfo_init); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 59b17e509f46..dd2b35f78b09 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -87,28 +87,24 @@ static const struct inode_operations proc_ns_link_inode_operations = { .setattr = proc_setattr, }; -static int proc_ns_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static struct dentry *proc_ns_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { const struct proc_ns_operations *ns_ops = ptr; struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); inode->i_op = &proc_ns_link_inode_operations; ei->ns_ops = ns_ops; + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) @@ -147,12 +143,10 @@ const struct file_operations proc_ns_dir_operations = { static struct dentry *proc_ns_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - int error; struct task_struct *task = get_proc_task(dir); const struct proc_ns_operations **entry, **last; unsigned int len = dentry->d_name.len; - - error = -ENOENT; + struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; @@ -167,11 +161,11 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (entry == last) goto out; - error = proc_ns_instantiate(dir, dentry, task, *entry); + res = proc_ns_instantiate(dentry, task, *entry); out: put_task_struct(task); out_no_task: - return ERR_PTR(error); + return res; } const struct inode_operations proc_ns_dir_inode_operations = { diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 75634379f82e..3b63be64e436 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = { .show = nommu_region_list_show }; -static int proc_nommu_region_list_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_nommu_region_list_seqop); -} - -static const struct file_operations proc_nommu_region_list_operations = { - .open = proc_nommu_region_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_nommu_init(void) { - proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations); + proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop); return 0; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 1763f370489d..7d94fa005b0d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } -int seq_open_net(struct inode *ino, struct file *f, - const struct seq_operations *ops, int size) +static int seq_open_net(struct inode *inode, struct file *file) { - struct net *net; + unsigned int state_size = PDE(inode)->state_size; struct seq_net_private *p; + struct net *net; - BUG_ON(size < sizeof(*p)); + WARN_ON_ONCE(state_size < sizeof(*p)); - net = get_proc_net(ino); - if (net == NULL) + net = get_proc_net(inode); + if (!net) return -ENXIO; - p = __seq_open_private(f, ops, size); - if (p == NULL) { + p = __seq_open_private(file, PDE(inode)->seq_ops, state_size); + if (!p) { put_net(net); return -ENOMEM; } @@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f, #endif return 0; } -EXPORT_SYMBOL_GPL(seq_open_net); -int single_open_net(struct inode *inode, struct file *file, - int (*show)(struct seq_file *, void *)) +static int seq_release_net(struct inode *ino, struct file *f) { - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, show, net); - if (err < 0) - goto err_open; + struct seq_file *seq = f->private_data; + put_net(seq_file_net(seq)); + seq_release_private(ino, f); return 0; +} -err_open: - put_net(net); -err_net: - return err; +static const struct file_operations proc_net_seq_fops = { + .open = seq_open_net, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + return proc_register(parent, p); } -EXPORT_SYMBOL_GPL(single_open_net); +EXPORT_SYMBOL_GPL(proc_create_net_data); -int seq_release_net(struct inode *ino, struct file *f) +static int single_open_net(struct inode *inode, struct file *file) { - struct seq_file *seq; + struct proc_dir_entry *de = PDE(inode); + struct net *net; + int err; - seq = f->private_data; + net = get_proc_net(inode); + if (!net) + return -ENXIO; - put_net(seq_file_net(seq)); - seq_release_private(ino, f); - return 0; + err = single_open(file, de->single_show, net); + if (err) + put_net(net); + return err; } -EXPORT_SYMBOL_GPL(seq_release_net); -int single_release_net(struct inode *ino, struct file *f) +static int single_release_net(struct inode *ino, struct file *f) { struct seq_file *seq = f->private_data; put_net(seq->private); return single_release(ino, f); } -EXPORT_SYMBOL_GPL(single_release_net); + +static const struct file_operations proc_net_single_fops = { + .open = single_open_net, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_single_fops; + p->single_show = show; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_single); static struct net *get_proc_task_net(struct inode *dir) { diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8989936f2995..4d765e5e91ed 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -554,9 +554,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!inode) goto out; - err = NULL; d_set_d_op(dentry, &proc_sys_dentry_operations); - d_add(dentry, inode); + err = d_splice_alias(inode, dentry); out: if (h) @@ -684,6 +683,7 @@ static bool proc_sys_fill_cache(struct file *file, if (IS_ERR(child)) return false; if (d_in_lookup(child)) { + struct dentry *res; inode = proc_sys_make_inode(dir->d_sb, head, table); if (!inode) { d_lookup_done(child); @@ -691,7 +691,16 @@ static bool proc_sys_fill_cache(struct file *file, return false; } d_set_d_op(child, &proc_sys_dentry_operations); - d_add(child, inode); + res = d_splice_alias(inode, child); + d_lookup_done(child); + if (unlikely(res)) { + if (IS_ERR(res)) { + dput(child); + return false; + } + dput(child); + child = res; + } } } inode = d_inode(child); diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index d0cf1c50bb6c..c69ff191e5d8 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = { .show = show_tty_driver }; -static int tty_drivers_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tty_drivers_op); -} - -static const struct file_operations proc_tty_drivers_operations = { - .open = tty_drivers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> @@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver) struct proc_dir_entry *ent; if (!driver->driver_name || driver->proc_entry || - !driver->ops->proc_fops) + !driver->ops->proc_show) return; - ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, - driver->ops->proc_fops, driver); + ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver, + driver->ops->proc_show, driver); driver->proc_entry = ent; } @@ -186,6 +174,6 @@ void __init proc_tty_init(void) * entry. */ proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL); - proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops); - proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); + proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops); + proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op); } diff --git a/fs/proc/self.c b/fs/proc/self.c index 4d7d061696b3..127265e5c55f 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); pid_t tgid = task_tgid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = s->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(root_inode); struct dentry *self; inode_lock(root_inode); diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 24072cc06e65..12901dcf57e2 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v) return 0; } -static int softirqs_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_softirqs, NULL); -} - -static const struct file_operations proc_softirqs_operations = { - .open = softirqs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_softirqs_init(void) { - proc_create("softirqs", 0, NULL, &proc_softirqs_operations); + proc_create_single("softirqs", 0, NULL, show_softirqs); return 0; } fs_initcall(proc_softirqs_init); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c486ad4b43f0..a20c6e495bb2 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -937,7 +937,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the - * Documentation/vm/soft-dirty.txt for full description + * Documentation/admin-guide/mm/soft-dirty.rst for full description * of how soft-dirty works. */ pte_t ptent = *pte; @@ -1421,7 +1421,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, * Bits 0-54 page frame number (PFN) if present * Bits 0-4 swap type if swapped * Bits 5-54 swap offset if swapped - * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt) + * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 56 page exclusively mapped * Bits 57-60 zero * Bit 61 page is file-page or shared-anon diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 9d2efaca499f..b905010ca9eb 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = s->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(root_inode); struct dentry *thread_self; inode_lock(root_inode); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 95a708d83721..3bd12f955867 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v) return 0; } -static int uptime_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, uptime_proc_show, NULL); -} - -static const struct file_operations uptime_proc_fops = { - .open = uptime_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_uptime_init(void) { - proc_create("uptime", 0, NULL, &uptime_proc_fops); + proc_create_single("uptime", 0, NULL, uptime_proc_show); return 0; } fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index 94901e8e700d..b449f186577f 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v) return 0; } -static int version_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, version_proc_show, NULL); -} - -static const struct file_operations version_proc_fops = { - .open = version_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_version_init(void) { - proc_create("version", 0, NULL, &version_proc_fops); + proc_create_single("version", 0, NULL, version_proc_show); return 0; } fs_initcall(proc_version_init); diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index eca27878079d..8d72221735d7 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -114,13 +114,9 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned i brelse(bh); foundinode = qnx4_iget(dir->i_sb, ino); - if (IS_ERR(foundinode)) { + if (IS_ERR(foundinode)) QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n", PTR_ERR(foundinode))); - return ERR_CAST(foundinode); - } out: - d_add(dentry, foundinode); - - return NULL; + return d_splice_alias(foundinode, dentry); } diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c index 72c2770830be..e2e98e653b8d 100644 --- a/fs/qnx6/namei.c +++ b/fs/qnx6/namei.c @@ -29,15 +29,11 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, if (ino) { foundinode = qnx6_iget(dir->i_sb, ino); qnx6_put_page(page); - if (IS_ERR(foundinode)) { + if (IS_ERR(foundinode)) pr_debug("lookup->iget -> error %ld\n", PTR_ERR(foundinode)); - return ERR_CAST(foundinode); - } } else { pr_debug("%s(): not found %s\n", __func__, name); - return NULL; } - d_add(dentry, foundinode); - return NULL; + return d_splice_alias(foundinode, dentry); } diff --git a/fs/read_write.c b/fs/read_write.c index c4eabbfc90df..e83bd9744b5d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -2023,7 +2023,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) ret = mnt_want_write_file(dst_file); if (ret) { info->status = ret; - goto next_loop; + goto next_fdput; } dst_off = info->dest_offset; @@ -2058,9 +2058,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) next_file: mnt_drop_write_file(dst_file); -next_loop: +next_fdput: fdput(dst_fd); - +next_loop: if (fatal_signal_pending(current)) goto out; } diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index fe999157dd97..e39b3910d24d 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -389,27 +389,13 @@ static int show_journal(struct seq_file *m, void *unused) return 0; } -static int r_open(struct inode *inode, struct file *file) -{ - return single_open(file, PDE_DATA(inode), - proc_get_parent_data(inode)); -} - -static const struct file_operations r_file_operations = { - .open = r_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static struct proc_dir_entry *proc_info_root = NULL; static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, int (*func) (struct seq_file *, void *)) { - proc_create_data(name, 0, REISERFS_SB(sb)->procdir, - &r_file_operations, func); + proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb); } int reiserfs_proc_info_init(struct super_block *sb) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 8f06fd1f3d69..6ccb51993a76 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -213,7 +213,7 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned long offset, maxoff; - struct inode *inode; + struct inode *inode = NULL; struct romfs_inode ri; const char *name; /* got from dentry */ int len, ret; @@ -233,7 +233,7 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, for (;;) { if (!offset || offset >= maxoff) - goto out0; + break; ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri)); if (ret < 0) @@ -244,37 +244,19 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, len); if (ret < 0) goto error; - if (ret == 1) + if (ret == 1) { + /* Hard link handling */ + if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) + offset = be32_to_cpu(ri.spec) & ROMFH_MASK; + inode = romfs_iget(dir->i_sb, offset); break; + } /* next entry */ offset = be32_to_cpu(ri.next) & ROMFH_MASK; } - /* Hard link handling */ - if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) - offset = be32_to_cpu(ri.spec) & ROMFH_MASK; - - inode = romfs_iget(dir->i_sb, offset); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - goto error; - } - goto outi; - - /* - * it's a bit funky, _lookup needs to return an error code - * (negative) or a NULL, both as a dentry. ENOENT should not - * be returned, instead we need to create a negative dentry by - * d_add(dentry, NULL); and return 0 as no error. - * (Although as I see, it only matters on writable file - * systems). - */ -out0: - inode = NULL; -outi: - d_add(dentry, inode); - ret = 0; + return d_splice_alias(inode, dentry); error: return ERR_PTR(ret); } diff --git a/fs/select.c b/fs/select.c index ba879c51288f..bc3cc0f98896 100644 --- a/fs/select.c +++ b/fs/select.c @@ -34,6 +34,29 @@ #include <linux/uaccess.h> +__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) +{ + if (file->f_op->poll) { + return file->f_op->poll(file, pt); + } else if (file_has_poll_mask(file)) { + unsigned int events = poll_requested_events(pt); + struct wait_queue_head *head; + + if (pt && pt->_qproc) { + head = file->f_op->get_poll_head(file, events); + if (!head) + return DEFAULT_POLLMASK; + if (IS_ERR(head)) + return EPOLLERR; + pt->_qproc(file, head, pt); + } + + return file->f_op->poll_mask(file, events); + } else { + return DEFAULT_POLLMASK; + } +} +EXPORT_SYMBOL_GPL(vfs_poll); /* * Estimate expected accuracy in ns from a timeval. @@ -233,7 +256,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, add_wait_queue(wait_address, &entry->wait); } -int poll_schedule_timeout(struct poll_wqueues *pwq, int state, +static int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack) { int rc = -EINTR; @@ -258,7 +281,6 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, return rc; } -EXPORT_SYMBOL(poll_schedule_timeout); /** * poll_select_set_timeout - helper function to setup the timeout value @@ -503,14 +525,10 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) continue; f = fdget(i); if (f.file) { - const struct file_operations *f_op; - f_op = f.file->f_op; - mask = DEFAULT_POLLMASK; - if (f_op->poll) { - wait_key_set(wait, in, out, - bit, busy_flag); - mask = (*f_op->poll)(f.file, wait); - } + wait_key_set(wait, in, out, bit, + busy_flag); + mask = vfs_poll(f.file, wait); + fdput(f); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; @@ -813,34 +831,29 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, bool *can_busy_poll, __poll_t busy_flag) { - __poll_t mask; - int fd; - - mask = 0; - fd = pollfd->fd; - if (fd >= 0) { - struct fd f = fdget(fd); - mask = EPOLLNVAL; - if (f.file) { - /* userland u16 ->events contains POLL... bitmap */ - __poll_t filter = demangle_poll(pollfd->events) | - EPOLLERR | EPOLLHUP; - mask = DEFAULT_POLLMASK; - if (f.file->f_op->poll) { - pwait->_key = filter; - pwait->_key |= busy_flag; - mask = f.file->f_op->poll(f.file, pwait); - if (mask & busy_flag) - *can_busy_poll = true; - } - /* Mask out unneeded events. */ - mask &= filter; - fdput(f); - } - } + int fd = pollfd->fd; + __poll_t mask = 0, filter; + struct fd f; + + if (fd < 0) + goto out; + mask = EPOLLNVAL; + f = fdget(fd); + if (!f.file) + goto out; + + /* userland u16 ->events contains POLL... bitmap */ + filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; + pwait->_key = filter | busy_flag; + mask = vfs_poll(f.file, pwait); + if (mask & busy_flag) + *can_busy_poll = true; + mask &= filter; /* Mask out unneeded events. */ + fdput(f); + +out: /* ... and so does ->revents */ pollfd->revents = mangle_poll(mask); - return mask; } diff --git a/fs/super.c b/fs/super.c index 4b5b562176d0..50728d9c1a05 100644 --- a/fs/super.c +++ b/fs/super.c @@ -947,7 +947,7 @@ void emergency_remount(void) static void do_thaw_all_callback(struct super_block *sb) { down_write(&sb->s_umount); - if (sb->s_root && sb->s_flags & MS_BORN) { + if (sb->s_root && sb->s_flags & SB_BORN) { emergency_thaw_bdev(sb); thaw_super_locked(sb); } else { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 250b0755b908..4d5d20491ffd 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -51,14 +51,9 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); - - if (ino) { + if (ino) inode = sysv_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev) diff --git a/fs/timerfd.c b/fs/timerfd.c index cdad49da3ff7..d84a2bee4f82 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -226,21 +226,20 @@ static int timerfd_release(struct inode *inode, struct file *file) kfree_rcu(ctx, rcu); return 0; } - -static __poll_t timerfd_poll(struct file *file, poll_table *wait) + +static struct wait_queue_head *timerfd_get_poll_head(struct file *file, + __poll_t eventmask) { struct timerfd_ctx *ctx = file->private_data; - __poll_t events = 0; - unsigned long flags; - poll_wait(file, &ctx->wqh, wait); + return &ctx->wqh; +} - spin_lock_irqsave(&ctx->wqh.lock, flags); - if (ctx->ticks) - events |= EPOLLIN; - spin_unlock_irqrestore(&ctx->wqh.lock, flags); +static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask) +{ + struct timerfd_ctx *ctx = file->private_data; - return events; + return ctx->ticks ? EPOLLIN : 0; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, @@ -364,7 +363,8 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, - .poll = timerfd_poll, + .get_poll_head = timerfd_get_poll_head, + .poll_mask = timerfd_poll_mask, .read = timerfd_read, .llseek = noop_llseek, .show_fdinfo = timerfd_show, diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 9d7fb88e172e..4e267cc21c77 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -214,7 +214,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, int err; union ubifs_key key; struct inode *inode = NULL; - struct ubifs_dent_node *dent; + struct ubifs_dent_node *dent = NULL; struct ubifs_info *c = dir->i_sb->s_fs_info; struct fscrypt_name nm; @@ -229,14 +229,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(err); if (fname_len(&nm) > UBIFS_MAX_NLEN) { - err = -ENAMETOOLONG; - goto out_fname; + inode = ERR_PTR(-ENAMETOOLONG); + goto done; } dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); if (!dent) { - err = -ENOMEM; - goto out_fname; + inode = ERR_PTR(-ENOMEM); + goto done; } if (nm.hash) { @@ -250,16 +250,16 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, } if (err) { - if (err == -ENOENT) { + if (err == -ENOENT) dbg_gen("not found"); - goto done; - } - goto out_dent; + else + inode = ERR_PTR(err); + goto done; } if (dbg_check_name(c, dent, &nm)) { - err = -EINVAL; - goto out_dent; + inode = ERR_PTR(-EINVAL); + goto done; } inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum)); @@ -272,7 +272,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, ubifs_err(c, "dead directory entry '%pd', error %d", dentry, err); ubifs_ro_mode(c, err); - goto out_dent; + goto done; } if (ubifs_crypt_is_encrypted(dir) && @@ -280,27 +280,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, !fscrypt_has_permitted_context(dir, inode)) { ubifs_warn(c, "Inconsistent encryption contexts: %lu/%lu", dir->i_ino, inode->i_ino); - err = -EPERM; - goto out_inode; + iput(inode); + inode = ERR_PTR(-EPERM); } done: kfree(dent); fscrypt_free_filename(&nm); - /* - * Note, d_splice_alias() would be required instead if we supported - * NFS. - */ - d_add(dentry, inode); - return NULL; - -out_inode: - iput(inode); -out_dent: - kfree(dent); -out_fname: - fscrypt_free_filename(&nm); - return ERR_PTR(err); + return d_splice_alias(inode, dentry); } static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, diff --git a/fs/xattr.c b/fs/xattr.c index 61cd28ba25f3..f9cb1db187b7 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -229,7 +229,7 @@ out: } EXPORT_SYMBOL_GPL(vfs_setxattr); -ssize_t +static ssize_t xattr_getsecurity(struct inode *inode, const char *name, void *value, size_t size) { @@ -254,7 +254,6 @@ out: out_noalloc: return len; } -EXPORT_SYMBOL_GPL(xattr_getsecurity); /* * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr @@ -354,7 +353,6 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size) if (error) return error; if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) { - error = -EOPNOTSUPP; error = inode->i_op->listxattr(dentry, list, size); } else { error = security_inode_listsecurity(inode, list, size); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0ab824f574ed..102463543db3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -594,7 +594,7 @@ xfs_alloc_ioend( struct xfs_ioend *ioend; struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); xfs_init_bio_from_bh(bio, bh); ioend = container_of(bio, struct xfs_ioend, io_inline_bio); diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 69346d460dfa..694c85b03813 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -18,7 +18,7 @@ #ifndef __XFS_AOPS_H__ #define __XFS_AOPS_H__ -extern struct bio_set *xfs_ioend_bioset; +extern struct bio_set xfs_ioend_bioset; /* * Types of I/O for bmap clustering and I/O completion tracking. diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index a3ed3c811dfa..df42e4cb4dc4 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -260,6 +260,7 @@ xfs_vn_lookup( struct dentry *dentry, unsigned int flags) { + struct inode *inode; struct xfs_inode *cip; struct xfs_name name; int error; @@ -269,14 +270,13 @@ xfs_vn_lookup( xfs_dentry_to_name(&name, dentry); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); - if (unlikely(error)) { - if (unlikely(error != -ENOENT)) - return ERR_PTR(error); - d_add(dentry, NULL); - return NULL; - } - - return d_splice_alias(VFS_I(cip), dentry); + if (likely(!error)) + inode = VFS_I(cip); + else if (likely(error == -ENOENT)) + inode = NULL; + else + inode = ERR_PTR(error); + return d_splice_alias(inode, dentry); } STATIC struct dentry * diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 056e12b421eb..1cc79907b377 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -113,6 +113,7 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats) } } +#ifdef CONFIG_PROC_FS /* legacy quota interfaces */ #ifdef CONFIG_XFS_QUOTA static int xqm_proc_show(struct seq_file *m, void *v) @@ -124,18 +125,6 @@ static int xqm_proc_show(struct seq_file *m, void *v) return 0; } -static int xqm_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqm_proc_show, NULL); -} - -static const struct file_operations xqm_proc_fops = { - .open = xqm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* legacy quota stats interface no 2 */ static int xqmstat_proc_show(struct seq_file *m, void *v) { @@ -147,22 +136,8 @@ static int xqmstat_proc_show(struct seq_file *m, void *v) seq_putc(m, '\n'); return 0; } - -static int xqmstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqmstat_proc_show, NULL); -} - -static const struct file_operations xqmstat_proc_fops = { - .owner = THIS_MODULE, - .open = xqmstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_XFS_QUOTA */ -#ifdef CONFIG_PROC_FS int xfs_init_procfs(void) { @@ -174,11 +149,9 @@ xfs_init_procfs(void) goto out; #ifdef CONFIG_XFS_QUOTA - if (!proc_create("fs/xfs/xqmstat", 0, NULL, - &xqmstat_proc_fops)) + if (!proc_create_single("fs/xfs/xqmstat", 0, NULL, xqmstat_proc_show)) goto out; - if (!proc_create("fs/xfs/xqm", 0, NULL, - &xqm_proc_fops)) + if (!proc_create_single("fs/xfs/xqm", 0, NULL, xqm_proc_show)) goto out; #endif return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d71424052917..f643d76db516 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -63,7 +63,7 @@ #include <linux/parser.h> static const struct super_operations xfs_super_operations; -struct bio_set *xfs_ioend_bioset; +struct bio_set xfs_ioend_bioset; static struct kset *xfs_kset; /* top-level xfs sysfs dir */ #ifdef DEBUG @@ -1845,10 +1845,9 @@ MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) { - xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, + if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE, offsetof(struct xfs_ioend, io_inline_bio), - BIOSET_NEED_BVECS); - if (!xfs_ioend_bioset) + BIOSET_NEED_BVECS)) goto out; xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), @@ -1997,7 +1996,7 @@ xfs_init_zones(void) out_destroy_log_ticket_zone: kmem_zone_destroy(xfs_log_ticket_zone); out_free_ioend_bioset: - bioset_free(xfs_ioend_bioset); + bioset_exit(&xfs_ioend_bioset); out: return -ENOMEM; } @@ -2029,7 +2028,7 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_btree_cur_zone); kmem_zone_destroy(xfs_bmap_free_item_zone); kmem_zone_destroy(xfs_log_ticket_zone); - bioset_free(xfs_ioend_bioset); + bioset_exit(&xfs_ioend_bioset); } STATIC int __init |