From b92dccf65bab3b6b7deb79ff3321dc256eb0f53b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:03 -0500 Subject: NFS: Fix a busy inodes issue... The nfs_open_context may live longer than the file descriptor that spawned it, so it needs to carry a reference to the vfsmount. If not, then generic_shutdown_super() may end up being called before reads and writes have been flushed out. Make a couple of functions static while we're at it... Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a77ee95b7efb..8d5b6691ae3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -973,7 +973,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) return err; } -struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) { struct nfs_open_context *ctx; @@ -981,6 +981,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp if (ctx != NULL) { atomic_set(&ctx->count, 1); ctx->dentry = dget(dentry); + ctx->vfsmnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -1011,6 +1012,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); + mntput(ctx->vfsmnt); kfree(ctx); } } @@ -1019,7 +1021,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { struct inode *inode = filp->f_dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -1051,7 +1053,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c return ctx; } -void nfs_file_clear_open_context(struct file *filp) +static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_dentry->d_inode; struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; @@ -1076,7 +1078,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_dentry, cred); + ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; -- cgit v1.2.3 From cd52ed35535ef443f08bf5cd3331d350272885b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:04 -0500 Subject: NFS: Avoid races between writebacks and truncation Currently, there is no serialisation between NFS asynchronous writebacks and truncation at the page level due to the fact that nfs_sync_inode() cannot lock the pages that it is about to write out. This means that it is possible to be flushing out data (and calling something like set_page_writeback()) while the page cache is busy evicting the page. Oops... Use the hooks provided in try_to_release_page() to ensure that dirty pages are always written back to storage before we evict them. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 13 +++++++++++++ fs/nfs/pagelist.c | 10 ++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7a79fbe9f539..387809f2d188 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -316,6 +316,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse return status; } +static int nfs_invalidate_page(struct page *page, unsigned long offset) +{ + /* FIXME: we really should cancel any unstarted writes on this page */ + return 1; +} + +static int nfs_release_page(struct page *page, gfp_t gfp) +{ + return !nfs_wb_page(page->mapping->host, page); +} + struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -324,6 +335,8 @@ struct address_space_operations nfs_file_aops = { .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, .commit_write = nfs_commit_write, + .invalidatepage = nfs_invalidate_page, + .releasepage = nfs_release_page, #ifdef CONFIG_NFS_DIRECTIO .direct_IO = nfs_direct_IO, #endif diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d53857b148e2..d6e076c9dbe1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -85,6 +85,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); + BUG_ON(PagePrivate(page)); + BUG_ON(!PageLocked(page)); + BUG_ON(page->mapping->host != inode); + SetPagePrivate(page); req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; @@ -147,8 +151,10 @@ void nfs_clear_page_writeback(struct nfs_page *req) */ void nfs_clear_request(struct nfs_page *req) { - if (req->wb_page) { - page_cache_release(req->wb_page); + struct page *page = req->wb_page; + if (page != NULL) { + ClearPagePrivate(page); + page_cache_release(page); req->wb_page = NULL; } } -- cgit v1.2.3 From 1dd594b21b2d98e56f2b1fe92bb222276b28de41 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Mon, 20 Mar 2006 13:44:04 -0500 Subject: NFS: Fix buglet in fs/nfs/write.c I've been reading through fs/nfs/write.c trying to track down a bug that seems to be related to pages loosing a refcount and getting freed too early (you interested in detail??) and I spotted a little bug which the following patch should fix. Signed-off-by: Neil Brown Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9449b6835509..d6ad449041eb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -653,8 +653,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); - if (error < 0) + if (error < 0) { + if (new) + nfs_release_request(new); return ERR_PTR(error); + } continue; } spin_unlock(&nfsi->req_lock); -- cgit v1.2.3 From 755c1e20cd2ad56e5c567fa05769eb98a3eef72b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:06 -0500 Subject: NFS: writes should not clobber utimes() calls Ensure that we flush out writes in the case when someone calls utimes() in order to set the file times. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8d5b6691ae3d..5746dc841a6c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -859,11 +859,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) lock_kernel(); nfs_begin_data_update(inode); - /* Write all dirty data if we're changing file permissions or size */ - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - filemap_write_and_wait(inode->i_mapping); - nfs_wb_all(inode); - } + /* Write all dirty data */ + filemap_write_and_wait(inode->i_mapping); + nfs_wb_all(inode); /* * Return any delegations if we're going to change ACLs */ -- cgit v1.2.3 From ca62b9c3f7b8679ada4de94d2ab7098c6860c3d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:07 -0500 Subject: NFSv4: Don't invalidate cached attributes if change attribute is unchanged Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5746dc841a6c..0e1ef97bcf54 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1299,34 +1299,35 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + return -EIO; + } + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - nfsi->change_attr != fattr->change_attr) { + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { + if (nfsi->change_attr == fattr->change_attr) + goto out; nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } - /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - return -EIO; - } - - cur_size = i_size_read(inode); - new_isize = nfs_size_to_loff_t(fattr->size); - /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (nfsi->npages == 0) @@ -1343,6 +1344,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (inode->i_nlink != fattr->nlink) nfsi->cache_validity |= NFS_INO_INVALID_ATTR; +out: if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; @@ -1481,15 +1483,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_change_attribute = jiffies; } - if ((fattr->valid & NFS_ATTR_FATTR_V4) - && nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; - } - /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; @@ -1519,6 +1512,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blksize = fattr->du.nfs2.blocksize; } + if ((fattr->valid & NFS_ATTR_FATTR_V4)) { + if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; + } else + invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA); + } + /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); -- cgit v1.2.3 From c8d149f3dbd582a101aa7da7bdd6c3316efd11b4 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 20 Mar 2006 13:44:07 -0500 Subject: NFS: "const static" vs "static const" in nfs4 My previous "const static" vs "static const" cleanup missed a single case, patch below takes care of it. Signed-off-by: Jesper Juhl Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f8c0066e02e1..305bea201cd3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2958,7 +2958,7 @@ static void nfs4_delegreturn_release(void *calldata) kfree(calldata); } -const static struct rpc_call_ops nfs4_delegreturn_ops = { +static const struct rpc_call_ops nfs4_delegreturn_ops = { .rpc_call_prepare = nfs4_delegreturn_prepare, .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, -- cgit v1.2.3 From fb374d24f225f38f13dbffb65dd7ec72daf08dba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:08 -0500 Subject: NFS: reduce the number of false cache invalidations. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0e1ef97bcf54..24988f430e4b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1328,11 +1328,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (nfsi->npages == 0) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + if (cur_size != new_isize && nfsi->npages == 0) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) -- cgit v1.2.3 From 12de3b35ea549c5819f287508d7afab0bf3ac44d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:09 -0500 Subject: SUNRPC: Ensure that rpc_mkpipe returns a refcounted dentry If not, we cannot guarantee that idmap->idmap_dentry, gss_auth->dentry and clnt->cl_dentry are valid dentries. Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 821edd30333b..32c95a0d93f3 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -132,6 +132,8 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; + dput(idmap->idmap_dentry); + idmap->idmap_dentry = NULL; rpc_unlink(idmap->idmap_path); clp->cl_idmap = NULL; kfree(idmap); -- cgit v1.2.3 From 967b9281361481aecf323563886ef972ee88c681 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:09 -0500 Subject: NFSv4: Do not call rpciod_down() before call to destroy_nfsv4_state() The reason is that the idmapper cleanup may call flush_workqueue() on rpciod_workqueue. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 24988f430e4b..b81149eb26e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2025,10 +2025,11 @@ static void nfs4_kill_super(struct super_block *sb) if (server->client != NULL && !IS_ERR(server->client)) rpc_shutdown_client(server->client); - rpciod_down(); /* release rpciod */ destroy_nfsv4_state(server); + rpciod_down(); + kfree(server->hostname); kfree(server); } -- cgit v1.2.3 From a162a6b804b48c605d1fd35e1861a5d32d00ad3f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:10 -0500 Subject: NFSv4: Kill braindead gcc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nfs4_open_revalidate: 'res' may be used uninitialized nfs4_callback_compound: ‘hdr_res.nops’ may be used uninitialized 'op_nr’ may be used uninitialized encode_getattr_res: ‘savep’ may be used uninitialized Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 28 ++++++++++++++++------------ fs/nfs/nfs4proc.c | 2 +- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 7c33b9a81a94..05c38cf40b69 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res) static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) { - uint32_t *savep; + uint32_t *savep = NULL; unsigned status = res->status; if (unlikely(status != 0)) @@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, struct xdr_stream *xdr_out, void *resp) { - struct callback_op *op; - unsigned int op_nr; + struct callback_op *op = &callback_ops[0]; + unsigned int op_nr = OP_CB_ILLEGAL; unsigned int status = 0; long maxlen; unsigned res; dprintk("%s: start\n", __FUNCTION__); status = decode_op_hdr(xdr_in, &op_nr); - if (unlikely(status != 0)) { - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - status = htonl(NFS4ERR_OP_ILLEGAL); - } else - op = &callback_ops[op_nr]; + if (likely(status == 0)) { + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + op = &callback_ops[op_nr]; + break; + default: + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } + } maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { @@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp decode_compound_hdr_arg(&xdr_in, &hdr_arg); hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; + hdr_res.nops = NULL; encode_compound_hdr_res(&xdr_out, &hdr_res); for (;;) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 305bea201cd3..77a565eba562 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -908,7 +908,7 @@ out_put_state_owner: static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) { struct nfs4_exception exception = { }; - struct nfs4_state *res; + struct nfs4_state *res = ERR_PTR(-EIO); int err; do { -- cgit v1.2.3 From bd6475454c774bd9dbe6078d94bbf72b1d3b65f4 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 20 Mar 2006 13:44:10 -0500 Subject: NFS: kzalloc conversion in fs/nfs this converts fs/nfs to kzalloc() usage. compile tested with make allyesconfig Signed-off-by: Eric Sesterhenn Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 4 +--- fs/nfs/inode.c | 6 ++---- fs/nfs/unlink.c | 3 +-- fs/nfs/write.c | 6 ++---- 4 files changed, 6 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 32c95a0d93f3..b89d27f93d66 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -101,11 +101,9 @@ nfs_idmap_new(struct nfs4_client *clp) if (clp->cl_idmap != NULL) return; - if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) + if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return; - memset(idmap, 0, sizeof(*idmap)); - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), "%s/idmap", clp->cl_rpcclient->cl_pathname); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b81149eb26e5..521d1dcb4cf0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1638,10 +1638,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, #endif /* CONFIG_NFS_V3 */ s = ERR_PTR(-ENOMEM); - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) goto out_err; - memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -1942,10 +1941,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, return ERR_PTR(-EINVAL); } - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) return ERR_PTR(-ENOMEM); - memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index a65c7b53d558..0e28189c2151 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry) struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); int status = -ENOMEM; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out; - memset(data, 0, sizeof(*data)); data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); if (IS_ERR(data->cred)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d6ad449041eb..92ecf24455c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -100,10 +100,8 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) p->pagevec = &p->page_array[0]; else { size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kzalloc(size, GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_commit_mempool); p = NULL; } -- cgit v1.2.3 From c9d5128a10a4974f72674ff3463da4db439e8b04 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Mar 2006 13:44:11 -0500 Subject: NFS: sem2mutex idmap.c semaphore to mutex conversion. the conversion was generated via scripts, and the result was validated automatically via a script as well. build and boot tested. Signed-off-by: Ingo Molnar Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b89d27f93d66..3fab5b0cfc5a 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -35,6 +35,7 @@ */ #include +#include #include #include #include @@ -74,8 +75,8 @@ struct idmap { struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; - struct semaphore idmap_lock; /* Serializes upcalls */ - struct semaphore idmap_im_lock; /* Protects the hashtable */ + struct mutex idmap_lock; /* Serializes upcalls */ + struct mutex idmap_im_lock; /* Protects the hashtable */ struct idmap_hashtable idmap_user_hash; struct idmap_hashtable idmap_group_hash; }; @@ -114,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp) return; } - init_MUTEX(&idmap->idmap_lock); - init_MUTEX(&idmap->idmap_im_lock); + mutex_init(&idmap->idmap_lock); + mutex_init(&idmap->idmap_im_lock); init_waitqueue_head(&idmap->idmap_wq); idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; @@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, if (namelen >= IDMAP_NAMESZ) return -EINVAL; - down(&idmap->idmap_lock); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); he = idmap_lookup_name(h, name, namelen); if (he != NULL) { @@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, } set_current_state(TASK_UNINTERRUPTIBLE); - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); schedule(); current->state = TASK_RUNNING; remove_wait_queue(&idmap->idmap_wq, &wq); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { *id = im->im_id; @@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, out: memset(im, 0, sizeof(*im)); - up(&idmap->idmap_im_lock); - up(&idmap->idmap_lock); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); return (ret); } @@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, im = &idmap->idmap_im; - down(&idmap->idmap_lock); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); he = idmap_lookup_id(h, id); if (he != 0) { @@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, } set_current_state(TASK_UNINTERRUPTIBLE); - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); schedule(); current->state = TASK_RUNNING; remove_wait_queue(&idmap->idmap_wq, &wq); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) @@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, out: memset(im, 0, sizeof(*im)); - up(&idmap->idmap_im_lock); - up(&idmap->idmap_lock); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); return ret; } @@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (copy_from_user(&im_in, src, mlen) != 0) return (-EFAULT); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); ret = mlen; im->im_status = im_in.im_status; @@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); ret = mlen; out: - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); return ret; } @@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno >= 0) return; - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); im->im_status = IDMAP_STATUS_LOOKUPFAIL; wake_up(&idmap->idmap_wq); - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); } /* -- cgit v1.2.3 From 7a480e250c7ca9187275d8574ae9e48a6b602cb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:12 -0500 Subject: NFS: show retransmit settings when displaying mount options Sometimes it's important to know the exact RPC retransmit settings the kernel is using for an NFS mount point. Add this facility to the NFS client's show_options method. Test plan: Set various retransmit settings via the mount command, and check that the settings are reflected in /proc/mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 521d1dcb4cf0..48683d4bf0f6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -396,6 +396,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + /* create transport and client */ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { @@ -629,6 +632,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) proto = buf; } seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); return 0; @@ -1800,6 +1805,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); -- cgit v1.2.3 From c8bded96aa8735823e53c95a26177987ebb19a90 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:13 -0500 Subject: NFS: clean up some mount options Get rid of "lock" and "posix", and spell out "vers=". Test plan: None. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 48683d4bf0f6..827d69255b1b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -591,10 +591,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; @@ -603,7 +602,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) char buf[12]; char *proto; - seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); if (nfss->acregmin != 3*HZ) -- cgit v1.2.3 From d9ef5a8c26aab09762afce43df64736720b4860e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:13 -0500 Subject: NFS: introduce mechanism for tracking NFS client metrics Add a per-superblock performance counter facility to the NFS client. This facility mimics the counters available for block devices and for networking. Expose these new counters via the new /proc/self/mountstats interface. Thanks to Andrew Morton and Trond Myklebust for their review and comments. Test plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 103 +++++++++++++++++++++++++++++++++++--- fs/nfs/iostat.h | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 6 deletions(-) create mode 100644 fs/nfs/iostat.h (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 827d69255b1b..86b756f44e27 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -42,6 +42,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -65,6 +66,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -78,6 +80,7 @@ static struct super_operations nfs_sops = { .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -290,6 +293,12 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) } sb->s_root->d_op = server->rpc_ops->dentry_ops; + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + no_root_error = -ENOMEM; + goto out_no_root; + } + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -582,7 +591,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { static struct proc_nfs_info { int flag; @@ -598,20 +607,19 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); char buf[12]; char *proto; seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) + if (nfss->acregmin != 3*HZ || showdefaults) seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) + if (nfss->acregmax != 60*HZ || showdefaults) seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) + if (nfss->acdirmin != 30*HZ || showdefaults) seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) + if (nfss->acdirmax != 60*HZ || showdefaults) seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) @@ -633,8 +641,89 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",proto=%s", proto); seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); seq_printf(m, ",retrans=%u", nfss->retrans_count); +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct nfs_iostats *stats; + + if (!cpu_possible(cpu)) + continue; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + return 0; } @@ -1742,6 +1831,7 @@ static struct super_operations nfs4_sops = { .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -2015,6 +2105,7 @@ out_err: out_free: kfree(server->mnt_path); kfree(server->hostname); + nfs_free_iostats(server->io_stats); kfree(server); return s; } diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h new file mode 100644 index 000000000000..dc080e50ec57 --- /dev/null +++ b/fs/nfs/iostat.h @@ -0,0 +1,152 @@ +/* + * linux/fs/nfs/iostat.h + * + * Declarations for NFS client per-mount statistics + * + * Copyright (C) 2005, 2006 Chuck Lever + * + * NFS client per-mount statistics provide information about the health of + * the NFS client and the health of each NFS mount point. Generally these + * are not for detailed problem diagnosis, but simply to indicate that there + * is a problem. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + */ + +#ifndef _NFS_IOSTAT +#define _NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written to the + * server by the NFS client via an NFS READ or WRITE request. + * + * 2. NORMAL - the number of bytes read or written by applications via + * the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files opened + * with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out of the NFS + * client. Comparing the number of bytes requested by an application with the + * number of bytes the client requests from the server can provide an + * indication of client efficiency (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods are in + * use. DIRECT by itself shows whether there is any O_DIRECT traffic. + * NORMAL + DIRECT shows how much data is going through the system call + * interface. A large amount of SERVER traffic without much NORMAL or + * DIRECT traffic shows that applications are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client activity + * without enabling NFS trace debugging. The counters show the rate at + * which VFS requests are made, and how often the client invalidates its + * data and attribute caches. This allows system administrators to monitor + * such things as how close-to-open is working, and answer questions such + * as "why are there so many GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, silly + * renames due to close-after-delete, and operations that change the size + * of a file (such operations can often be the source of data corruption + * if applications aren't using file locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + __NFSIOS_COUNTSMAX, +}; + +#ifdef __KERNEL__ + +#include +#include + +struct nfs_iostats { + unsigned long long bytes[__NFSIOS_BYTESMAX]; + unsigned long events[__NFSIOS_COUNTSMAX]; +} ____cacheline_aligned; + +static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->events[stat] ++; + put_cpu_no_resched(); +} + +static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats->bytes[stat] += addend; + put_cpu_no_resched(); +} + +static inline struct nfs_iostats *nfs_alloc_iostats(void) +{ + return alloc_percpu(struct nfs_iostats); +} + +static inline void nfs_free_iostats(struct nfs_iostats *stats) +{ + free_percpu(stats); +} + +#endif +#endif -- cgit v1.2.3 From 91d5b47023b608227d605d1e916b29dd0215bff7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:14 -0500 Subject: NFS: add I/O performance counters Invoke the byte and event counter macros where we want to count bytes and events. Clean-up: fix a possible NULL dereference in nfs_lock, and simplify nfs_file_open. Test-plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 ++++++++ fs/nfs/direct.c | 7 +++++++ fs/nfs/file.c | 20 +++++++++----------- fs/nfs/inode.c | 8 ++++++++ fs/nfs/read.c | 12 ++++++++++++ fs/nfs/write.c | 18 ++++++++++++++++++ 6 files changed, 62 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a1554bead692..151b8dd0ac3b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -34,6 +34,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "iostat.h" #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -507,6 +508,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct nfs_fattr fattr; long res; + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); @@ -713,6 +716,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { @@ -844,6 +848,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) @@ -1241,6 +1246,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); #ifdef NFS_PARANOIA if (!dentry->d_inode) @@ -1640,6 +1646,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) struct rpc_cred *cred; int res = 0; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + if (mask == 0) goto out; /* Is this sys_access() ? */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4e9b3a1b36c5..fc07ce4885da 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -54,6 +54,8 @@ #include #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) @@ -67,6 +69,7 @@ struct nfs_direct_req { struct kref kref; /* release manager */ struct list_head list; /* nfs_read_data structs */ wait_queue_head_t wait; /* wait for i/o completion */ + struct inode * inode; /* target file of I/O */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ atomic_t complete, /* i/os we're waiting for */ @@ -357,7 +360,9 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; + nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, file_offset); @@ -572,6 +577,7 @@ static ssize_t nfs_direct_write(struct inode *inode, return page_count; } + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size); result = nfs_direct_write_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 0); @@ -581,6 +587,7 @@ static ssize_t nfs_direct_write(struct inode *inode, break; return result; } + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); tot_bytes += result; file_offset += result; if (result < size) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 387809f2d188..1cf07e4ad136 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -32,6 +32,7 @@ #include #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -102,18 +103,15 @@ static int nfs_check_flags(int flags) static int nfs_file_open(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); - int (*open)(struct inode *, struct file *); int res; res = nfs_check_flags(filp->f_flags); if (res) return res; + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - /* Do NFSv4 open() call */ - if ((open = server->rpc_ops->file_open) != NULL) - res = open(inode, filp); + res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); unlock_kernel(); return res; } @@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp) /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) filemap_fdatawrite(filp->f_mapping); + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } @@ -199,6 +198,7 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); @@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) (unsigned long) count, (unsigned long) pos); result = nfs_revalidate_file(inode, iocb->ki_filp); + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync) dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); lock_kernel(); status = nfs_wb_all(inode); if (!status) { @@ -378,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t if (!count) goto out; + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, buf, count, pos); out: return result; @@ -517,9 +520,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && @@ -544,9 +545,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags); - if (!inode) - return -EINVAL; - /* * No BSD flocks over NFS allowed. * Note: we could try to fake a POSIX lock request here by diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 86b756f44e27..8ee74111e519 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -753,6 +753,8 @@ static void nfs_zap_caches_locked(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -940,6 +942,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) struct nfs_fattr fattr; int error; + nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) attr->ia_valid &= ~ATTR_SIZE; @@ -993,6 +997,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); } @@ -1278,6 +1283,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; @@ -1294,6 +1300,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); if (S_ISREG(inode->i_mode)) nfs_sync_mapping(mapping); invalidate_inode_pages2(mapping); @@ -1615,6 +1622,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 05eb43fadf8e..ae3ddd24cf8f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,6 +31,8 @@ #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE static int nfs_pagein_one(struct list_head *, struct inode *); @@ -133,6 +135,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, } count -= result; rdata->args.pgbase += result; + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result); + /* Note: result == 0 should only happen if we're caching * a write that extends the file and punches a hole. */ @@ -458,8 +462,11 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", task->tk_pid, status); + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); + /* Is this a short read? */ if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count != 0) { /* Yes, so retry the read at the end of the data */ @@ -491,6 +498,9 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page->index); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + nfs_add_stats(inode, NFSIOS_READPAGES, 1); + /* * Try to flush any pending writes to the file.. * @@ -570,6 +580,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, inode->i_sb->s_id, (long long)NFS_FILEID(inode), nr_pages); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); if (filp == NULL) { desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); @@ -582,6 +593,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) + nfs_add_stats(inode, NFSIOS_READPAGES, err); ret = err; } put_nfs_open_context(desc.ctx); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 92ecf24455c3..e7c8361cf201 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,6 +63,7 @@ #include #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -134,6 +135,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) return; + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); i_size_write(inode, end); } @@ -223,6 +225,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, wdata->args.pgbase += result; written += result; count -= result; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); } while (count); /* Update file length */ nfs_grow_file(page, offset, written); @@ -279,6 +282,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) int priority = wb_priority(wbc); int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting @@ -343,6 +349,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct inode *inode = mapping->host; int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + err = generic_writepages(mapping, wbc); if (err) return err; @@ -354,6 +362,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); if (err < 0) goto out; + nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); wbc->nr_to_write -= err; if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wait_on_requests(inode, 0, 0); @@ -596,6 +605,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; + + nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + if (intr) { struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); sigset_t oldset; @@ -749,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page, struct nfs_page *req; int status = 0; + nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, count, @@ -1152,6 +1166,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1177,6 +1193,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; + nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + /* Has the server at least made some progress? */ if (resp->count != 0) { /* Was this an NFSv2 write or an NFSv3 stable write? */ -- cgit v1.2.3 From 006ea73e5fa82915d0ac7a3f15ee7c688433236d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:14 -0500 Subject: NFS: add hooks to account for NFSERR_JUKEBOX errors Make an inode or an nfs_server struct available in the logic that handles JUKEBOX/DELAY type errors so the NFS client can account for them. This patch is split out from the main nfs iostat patch to highlight minor architectural changes required to support this statistic. Test plan: None. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/iostat.h | 19 +++++++++++++++---- fs/nfs/nfs3proc.c | 13 ++++++++----- fs/nfs/nfs4proc.c | 5 ++++- 3 files changed, 27 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index dc080e50ec57..7a7495153317 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -103,6 +103,7 @@ enum nfs_stat_eventcounters { NFSIOS_SILLYRENAME, NFSIOS_SHORTREAD, NFSIOS_SHORTWRITE, + NFSIOS_DELAY, __NFSIOS_COUNTSMAX, }; @@ -116,28 +117,38 @@ struct nfs_iostats { unsigned long events[__NFSIOS_COUNTSMAX]; } ____cacheline_aligned; -static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) { struct nfs_iostats *iostats; int cpu; cpu = get_cpu(); - iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats = per_cpu_ptr(server->io_stats, cpu); iostats->events[stat] ++; put_cpu_no_resched(); } -static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +{ + nfs_inc_server_stats(NFS_SERVER(inode), stat); +} + +static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) { struct nfs_iostats *iostats; int cpu; cpu = get_cpu(); - iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); + iostats = per_cpu_ptr(server->io_stats, cpu); iostats->bytes[stat] += addend; put_cpu_no_resched(); } +static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + nfs_add_server_stats(NFS_SERVER(inode), stat, addend); +} + static inline struct nfs_iostats *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ed67567f0556..7204ba5b2bf8 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -19,6 +19,8 @@ #include #include +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PROC extern struct rpc_procinfo nfs3_procedures[]; @@ -58,10 +60,11 @@ nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, i nfs3_rpc_wrapper(clnt, msg, flags) static int -nfs3_async_handle_jukebox(struct rpc_task *task) +nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { if (task->tk_status != -EJUKEBOX) return 0; + nfs_inc_stats(inode, NFSIOS_DELAY); task->tk_status = 0; rpc_restart_call(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); @@ -447,7 +450,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task) struct rpc_message *msg = &task->tk_msg; struct nfs_fattr *dir_attr; - if (nfs3_async_handle_jukebox(task)) + if (nfs3_async_handle_jukebox(task, dir->d_inode)) return 1; if (msg->rpc_argp) { dir_attr = (struct nfs_fattr*)msg->rpc_resp; @@ -748,7 +751,7 @@ static void nfs3_read_done(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - if (nfs3_async_handle_jukebox(task)) + if (nfs3_async_handle_jukebox(task, data->inode)) return; /* Call back common NFS readpage processing */ if (task->tk_status >= 0) @@ -786,7 +789,7 @@ static void nfs3_write_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - if (nfs3_async_handle_jukebox(task)) + if (nfs3_async_handle_jukebox(task, data->inode)) return; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); @@ -833,7 +836,7 @@ static void nfs3_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - if (nfs3_async_handle_jukebox(task)) + if (nfs3_async_handle_jukebox(task, data->inode)) return; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 77a565eba562..f1ff4fa6cce5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,6 +51,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -2755,8 +2756,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) rpc_wake_up_task(task); task->tk_status = 0; return -EAGAIN; - case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + nfs_inc_server_stats((struct nfs_server *) server, + NFSIOS_DELAY); + case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; -- cgit v1.2.3 From 67ec9f46b889bfb1ab0a4e307d53929d5f0692bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:15 -0500 Subject: NFS: report how long an NFS file system has been mounted Add a field in nfs_server to record a timestamp when a mount succeeds. Report the number of seconds the file system has been mounted via nfs_show_stats(). Test plan: Mount an NFS file system, watch the mountstats reports and compare with clock time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ee74111e519..1b2d782374b5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -299,6 +299,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) goto out_no_root; } + /* mount time stamp, in seconds */ + server->mount_time = jiffies; + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -674,6 +677,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); nfs_show_mount_options(m, nfss, 1); + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%d", nfss->wtmult); -- cgit v1.2.3 From 4ece3a2d18fd7fe1d4972284a8c98c569020093f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: NFS: add RPC I/O statistics to /proc/self/mountstats NFS client now shows various RPC I/O metrics in /proc/self/mountstats. Test plan: Mount/umount while doing "cat /proc/self/mountstats", multiple iterations of connectathon locking suite. Test with NFS version 2, 3, and 4. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1b2d782374b5..b9f35ca266c2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -728,6 +729,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "\n\tbytes:\t"); for (i = 0; i < __NFSIOS_BYTESMAX; i++) seq_printf(m, "%Lu ", totals.bytes[i]); + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); return 0; } -- cgit v1.2.3 From cc0175c1dc1de8f6af0eb0631dcc5b999a6fcc42 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: SUNRPC: display human-readable procedure name in rpc_iostats output Add fields to the rpc_procinfo struct that allow the display of a human-readable name for each procedure in the rpc_iostats output. Also fix it so that the NFSv4 stats are broken up correctly by sub-procedure number. NFSv4 uses only two real RPC procedures: NULL, and COMPOUND. Test plan: Mount with NFSv2, NFSv3, and NFSv4, and do "cat /proc/self/mountstats". Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 4 ++++ fs/nfs/nfs2xdr.c | 4 +++- fs/nfs/nfs3xdr.c | 6 +++++- fs/nfs/nfs4xdr.c | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index db99b8f678f8..4a1340358223 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -137,6 +137,8 @@ static struct rpc_procinfo mnt_procedures[] = { .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus, .p_bufsiz = MNT_dirpath_sz << 2, + .p_statidx = MNTPROC_MNT, + .p_name = "MOUNT", }, }; @@ -146,6 +148,8 @@ static struct rpc_procinfo mnt3_procedures[] = { .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, .p_bufsiz = MNT_dirpath_sz << 2, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", }, }; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7fc0560c89c9..8cdc792ff3c7 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat) .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ - .p_timer = timer \ + .p_timer = timer, \ + .p_statidx = NFSPROC_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs_procedures[] = { PROC(GETATTR, fhandle, attrstat, 1), diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b6c0b5012bce..2d8701a230f0 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ - .p_timer = timer \ + .p_timer = timer, \ + .p_statidx = NFS3PROC_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs3_procedures[] = { @@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, .p_timer = 1, + .p_name = "GETACL", }, [ACLPROC3_SETACL] = { .p_proc = ACLPROC3_SETACL, @@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, .p_timer = 0, + .p_name = "SETACL", }, }; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4bbf5ef57785..b95675349ba3 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat) .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ + .p_statidx = NFSPROC4_CLNT_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs4_procedures[] = { -- cgit v1.2.3 From dead28da8e3fb32601d38fb32b7021122e0a3d21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:23 -0500 Subject: SUNRPC: eliminate rpc_call() Clean-up: replace rpc_call() helper with direct call to rpc_call_sync. This makes NFSv2 and NFSv3 synchronous calls more computationally efficient, and reduces stack consumption in functions that used to invoke rpc_call more than once. Test plan: Compile kernel with CONFIG_NFS enabled. Connectathon on NFS version 2, version 3, and version 4 mount points. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 13 +++-- fs/nfs/nfs3acl.c | 16 ++++-- fs/nfs/nfs3proc.c | 140 ++++++++++++++++++++++++++++++++++++++-------------- fs/nfs/proc.c | 107 ++++++++++++++++++++++++++++++--------- 4 files changed, 209 insertions(+), 67 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 4a1340358223..c44d87bdddb3 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, struct mnt_fhstatus result = { .fh = fh }; + struct rpc_message msg = { + .rpc_argp = path, + .rpc_resp = &result, + }; char hostname[32]; int status; - int call; dprintk("NFS: nfs_mount(%08x:%s)\n", (unsigned)ntohl(addr->sin_addr.s_addr), path); @@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, if (IS_ERR(mnt_clnt)) return PTR_ERR(mnt_clnt); - call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT; - status = rpc_call(mnt_clnt, call, path, &result, 0); + if (version == NFS_MNT3_VERSION) + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; + else + msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; + + status = rpc_call_sync(mnt_clnt, &msg, 0); return status < 0? status : (result.status? -EACCES : 0); } diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 6a5bbc0ae941..33287879bd23 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) struct nfs3_getaclres res = { .fattr = &fattr, }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &res, + }; struct posix_acl *acl; int status, count; @@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) return NULL; dprintk("NFS call getacl\n"); - status = rpc_call(server->client_acl, ACLPROC3_GETACL, - &args, &res, 0); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; + status = rpc_call_sync(server->client_acl, &msg, 0); dprintk("NFS reply getacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ @@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, .acl_access = acl, .pages = pages, }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &fattr, + }; int status, count; status = -EOPNOTSUPP; @@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, dprintk("NFS call setacl\n"); nfs_begin_data_update(inode); - status = rpc_call(server->client_acl, ACLPROC3_SETACL, - &args, &fattr, 0); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; + status = rpc_call_sync(server->client_acl, &msg, 0); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; spin_unlock(&inode->i_lock); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7204ba5b2bf8..740f8b1ab04d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -43,21 +43,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) return res; } -static inline int -nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) -{ - struct rpc_message msg = { - .rpc_proc = &clnt->cl_procinfo[proc], - .rpc_argp = argp, - .rpc_resp = resp, - }; - return nfs3_rpc_wrapper(clnt, &msg, flags); -} - -#define rpc_call(clnt, proc, argp, resp, flags) \ - nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags) -#define rpc_call_sync(clnt, msg, flags) \ - nfs3_rpc_wrapper(clnt, msg, flags) +#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags) static int nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) @@ -75,14 +61,21 @@ static int do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("%s: call fsinfo\n", __FUNCTION__); nfs_fattr_init(info->fattr); - status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { - status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_resp = info->fattr; + status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); } return status; @@ -110,12 +103,16 @@ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call(server->client, NFS3PROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -129,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, .fh = NFS_FH(inode), .sattr = sattr, }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; int status; dprintk("NFS call setattr\n"); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); @@ -155,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); - if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_argp = fhandle; + msg.rpc_resp = fattr; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + } dprintk("NFS reply lookup: %d\n", status); if (status >= 0) status = nfs_refresh_inode(dir, &dir_attr); @@ -183,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = entry->cred + .rpc_cred = entry->cred, }; int mode = entry->mask; int status; @@ -229,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, .pglen = pglen, .pages = &page }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK], + .rpc_argp = &args, + .rpc_resp = &fattr, + }; int status; dprintk("NFS call readlink\n"); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, - &args, &fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply readlink: %d\n", status); return status; @@ -330,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; mode_t mode = sattr->ia_mode; int status; @@ -346,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, again: nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); - nfs_post_op_update_inode(dir, &dir_attr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); /* If the server doesn't support the exclusive creation semantics, * try again with simple 'guarded' mode. */ @@ -477,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, .fromattr = &old_dir_attr, .toattr = &new_dir_attr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); nfs_fattr_init(&old_dir_attr); nfs_fattr_init(&new_dir_attr); - status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); nfs_post_op_update_inode(old_dir, &old_dir_attr); nfs_post_op_update_inode(new_dir, &new_dir_attr); dprintk("NFS reply rename: %d\n", status); @@ -503,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) .dir_attr = &dir_attr, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call link %s\n", name->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); nfs_post_op_update_inode(inode, &fattr); dprintk("NFS reply link: %d\n", status); @@ -534,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; if (path->len > NFS3_MAXPATHLEN) @@ -541,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, dprintk("NFS call symlink %s -> %s\n", name->name, path->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply symlink: %d\n", status); return status; @@ -563,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int mode = sattr->ia_mode; int status; @@ -572,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; @@ -594,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name) .name = name->name, .len = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR], + .rpc_argp = &arg, + .rpc_resp = &dir_attr, + }; int status; dprintk("NFS call rmdir %s\n", name->name); nfs_fattr_init(&dir_attr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply rmdir: %d\n", status); return status; @@ -675,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fh, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD], + .rpc_argp = &arg, + .rpc_resp = &res, + }; mode_t mode = sattr->ia_mode; int status; @@ -693,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; @@ -710,11 +759,16 @@ static int nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *stat) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT], + .rpc_argp = fhandle, + .rpc_resp = stat, + }; int status; dprintk("NFS call fsstat\n"); nfs_fattr_init(stat->fattr); - status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply statfs: %d\n", status); return status; } @@ -723,11 +777,16 @@ static int nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } @@ -736,11 +795,16 @@ static int nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("NFS call pathconf\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply pathconf: %d\n", status); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f5150d71c03d..2b051ab8bea8 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, { struct nfs_fattr *fattr = info->fattr; struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("%s: call getattr\n", __FUNCTION__); nfs_fattr_init(fattr); - status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) return status; dprintk("%s: call statfs\n", __FUNCTION__); - status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); + msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; + msg.rpc_resp = &fsinfo; + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); if (status) return status; @@ -90,12 +97,16 @@ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call(server->client, NFSPROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, .fh = NFS_FH(inode), .sattr = sattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; int status; /* Mask out the non-modebit related stuff from attr->ia_mode */ @@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, dprintk("NFS call setattr\n"); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); @@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name, .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page, .pglen = pglen, .pages = &page }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READLINK], + .rpc_argp = &args, + }; int status; dprintk("NFS call readlink\n"); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); dprintk("NFS reply readlink: %d\n", status); return status; } @@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; nfs_fattr_init(&fattr); dprintk("NFS call create %s\n", dentry->d_name.name); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply create: %d\n", status); @@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status, mode; dprintk("NFS call mknod %s\n", dentry->d_name.name); @@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == -EINVAL && S_ISFIFO(mode)) { sattr->ia_mode = mode; nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name) struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], .rpc_argp = &arg, - .rpc_resp = NULL, - .rpc_cred = NULL }; int status; @@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, .toname = new_name->name, .tolen = new_name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_RENAME], + .rpc_argp = &arg, + }; int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); nfs_mark_for_revalidate(old_dir); nfs_mark_for_revalidate(new_dir); dprintk("NFS reply rename: %d\n", status); @@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) .toname = name->name, .tolen = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_LINK], + .rpc_argp = &arg, + }; int status; dprintk("NFS call link %s\n", name->name); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_mark_for_revalidate(inode); nfs_mark_for_revalidate(dir); dprintk("NFS reply link: %d\n", status); @@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, .tolen = path->len, .sattr = sattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK], + .rpc_argp = &arg, + }; int status; if (path->len > NFS2_MAXPATHLEN) @@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, dprintk("NFS call symlink %s -> %s\n", name->name, path->name); nfs_fattr_init(fattr); fhandle->size = 0; - status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); dprintk("NFS reply symlink: %d\n", status); return status; @@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_MKDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name) .name = name->name, .len = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_RMDIR], + .rpc_argp = &arg, + }; int status; dprintk("NFS call rmdir %s\n", name->name); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); dprintk("NFS reply rmdir: %d\n", status); return status; @@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .fh = NFS_FH(dir), .cookie = cookie, .count = count, - .pages = &page + .pages = &page, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READDIR], .rpc_argp = &arg, - .rpc_resp = NULL, - .rpc_cred = cred + .rpc_cred = cred, }; int status; @@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *stat) { struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_STATFS], + .rpc_argp = fhandle, + .rpc_resp = &fsinfo, + }; int status; dprintk("NFS call statfs\n"); nfs_fattr_init(stat->fattr); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply statfs: %d\n", status); if (status) goto out; @@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_STATFS], + .rpc_argp = fhandle, + .rpc_resp = &fsinfo, + }; int status; dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); if (status) goto out; -- cgit v1.2.3 From 1e7cb3dc12dbbac690d78c84f9c7cb11132ed121 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:24 -0500 Subject: NFS: directory trace messages Reuse NFSDBG_DIRCACHE and NFSDBG_LOOKUPCACHE to provide additional diagnostic messages that trace the operation of the NFS client's directory behavior. A few new messages are now generated when NFSDBG_VFS is active, as well, to trace normal VFS activity. This compromise provides better trace debugging for those who use pre-built kernels, without adding a lot of extra noise to the standard debug settings. Test-plan: Enable NFS trace debugging with flags 1, 2, or 4. You should be able to see different types of trace messages with each flag setting. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 96 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 30 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 151b8dd0ac3b..609185a15c99 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -130,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp) { int res = 0; + dfprintk(VFS, "NFS: opendir(%s/%ld)\n", + inode->i_sb->s_id, inode->i_ino); + lock_kernel(); /* Call generic open code in order to cache credentials */ if (!res) @@ -173,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) unsigned long timestamp; int error; - dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); + dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", + __FUNCTION__, (long long)desc->entry->cookie, + page->index); again: timestamp = jiffies; @@ -245,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc) status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", + __FUNCTION__, (unsigned long long)entry->cookie); if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { @@ -253,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc) schedule(); } } - dfprintk(VFS, "NFS: find_dirent() returns %d\n", status); return status; } @@ -277,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc) if (status) break; - dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", + (unsigned long long)entry->cookie, desc->current_index); if (desc->file->f_pos == desc->current_index) { *desc->dir_cookie = entry->cookie; @@ -289,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc) schedule(); } } - dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); return status; } @@ -304,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) struct page *page; int status; - dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); + dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", + __FUNCTION__, desc->page_index, + (long long) *desc->dir_cookie); page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); @@ -325,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) if (status < 0) dir_page_release(desc); out: - dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status); return status; read_error: page_cache_release(page); @@ -347,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) /* Always search-by-index from the beginning of the cache */ if (*desc->dir_cookie == 0) { - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", + (long long)desc->file->f_pos); desc->page_index = 0; desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; desc->current_index = 0; } else - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); for (;;) { res = find_dirent_page(desc); @@ -366,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) schedule(); } } - dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res); + + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res); return res; } @@ -391,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", + (unsigned long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -428,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", + (unsigned long long)*desc->dir_cookie, res); return res; } @@ -454,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { @@ -486,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; out: - dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", + __FUNCTION__, status); return status; out_release: dir_page_release(desc); @@ -508,6 +522,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct nfs_fattr fattr; long res; + dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (long long)filp->f_pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); lock_kernel(); @@ -569,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } unlock_kernel(); - if (res < 0) - return res; - return 0; + if (res > 0) + res = 0; + dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + res); + return res; } loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) @@ -602,6 +622,10 @@ out: */ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { + dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); + return 0; } @@ -726,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) } if (is_bad_inode(inode)) { - dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); goto out_bad; } @@ -759,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) out_valid: unlock_kernel(); dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 1; out_zap_parent: nfs_zap_caches(dir); @@ -775,6 +803,9 @@ out_zap_parent: d_drop(dentry); unlock_kernel(); dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 0; } @@ -917,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry struct dentry *res = NULL; int error; + dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + /* Check that we are indeed trying to open this file */ if (!is_atomic_open(dir, nd)) goto no_open; @@ -1124,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, int error; int open_flags = 0; - dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; @@ -1158,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1191,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; @@ -1217,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); lock_kernel(); nfs_begin_data_update(dir); @@ -1274,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name); sillycounter++; sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); - dfprintk(VFS, "trying to rename %s to %s\n", - dentry->d_name.name, silly); + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* @@ -1687,13 +1721,15 @@ force_lookup: res = PTR_ERR(cred); unlock_kernel(); out: + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", + inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask, NULL); unlock_kernel(); - return res; + goto out; } /* -- cgit v1.2.3 From 8dc7c3115b611c00006eac3ee5b108296432aab7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: locks,lockd: fix race in nlmsvc_testlock posix_test_lock() returns a pointer to a struct file_lock which is unprotected and can be removed while in use by the caller. Move the conflicting lock from the return to a parameter, and copy the conflicting lock. In most cases the caller ends up putting the copy of the conflicting lock on the stack. On i386, sizeof(struct file_lock) appears to be about 100 bytes. We're assuming that's reasonable. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1cf07e4ad136..ee140c53dba6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -392,15 +392,14 @@ out_swapfile: static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { - struct file_lock *cfl; + struct file_lock cfl; struct inode *inode = filp->f_mapping->host; int status = 0; lock_kernel(); /* Try local locking first */ - cfl = posix_test_lock(filp, fl); - if (cfl != NULL) { - locks_copy_lock(fl, cfl); + if (posix_test_lock(filp, fl, &cfl)) { + locks_copy_lock(fl, &cfl); goto out; } -- cgit v1.2.3 From 788e7a89a03e364855583c0ab4649b94925efbb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS write code in preparation for asynchronous o_direct This patch inverts the callback hierarchy for NFS write calls. Instead of having the NFSv2/v3/v4-specific code set up the RPC callback ops, we allow the original caller to do so. This allows for more flexibility w.r.t. how to set up and tear down the nfs_write_data structure while still allowing the NFSv3/v4 code to perform error handling. The greater flexibility is needed by the asynchronous O_DIRECT code, which wants to be able to hold on to the original nfs_write_data structures after the WRITE RPC call has completed in order to be able to replay them if the COMMIT call determines that the server has rebooted. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 66 ++++++++++----------------------------- fs/nfs/nfs4proc.c | 54 ++++++++------------------------ fs/nfs/proc.c | 24 +++------------ fs/nfs/write.c | 92 +++++++++++++++++++++++++++++++++++++++---------------- 4 files changed, 101 insertions(+), 135 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 740f8b1ab04d..c4f7de8830e9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -849,29 +849,17 @@ nfs3_proc_read_setup(struct nfs_read_data *data) rpc_call_setup(task, &msg, 0); } -static void nfs3_write_done(struct rpc_task *task, void *calldata) +static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - if (nfs3_async_handle_jukebox(task, data->inode)) - return; + return -EAGAIN; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_write_ops = { - .rpc_call_done = nfs3_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs3_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int stable; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], .rpc_argp = &data->args, @@ -879,45 +867,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how) .rpc_cred = data->cred, }; + data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { - if (!NFS_I(inode)->ncommit) - stable = NFS_FILE_SYNC; - else - stable = NFS_DATA_SYNC; - } else - stable = NFS_UNSTABLE; - data->args.stable = stable; - - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + data->args.stable = NFS_FILE_SYNC; + if (NFS_I(data->inode)->ncommit) + data->args.stable = NFS_DATA_SYNC; + } /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs3_commit_done(struct rpc_task *task, void *calldata) +static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - if (nfs3_async_handle_jukebox(task, data->inode)) - return; + return -EAGAIN; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_commit_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_commit_ops = { - .rpc_call_done = nfs3_commit_done, - .rpc_release = nfs_commit_release, -}; - -static void -nfs3_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], .rpc_argp = &data->args, @@ -925,12 +896,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how) .rpc_cred = data->cred, }; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static int @@ -970,7 +936,9 @@ struct nfs_rpc_ops nfs_v3_clientops = { .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, .write_setup = nfs3_proc_write_setup, + .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, + .commit_done = nfs3_commit_done, .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f1ff4fa6cce5..ef4dc315ecc2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2388,32 +2388,23 @@ nfs4_proc_read_setup(struct nfs_read_data *data) rpc_call_setup(task, &msg, 0); } -static void nfs4_write_done(struct rpc_task *task, void *calldata) +static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { rpc_restart_call(task); - return; + return -EAGAIN; } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); nfs_post_op_update_inode(inode, data->res.fattr); } - /* Call back common NFS writeback processing */ - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs4_write_ops = { - .rpc_call_done = nfs4_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs4_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs4_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], .rpc_argp = &data->args, @@ -2423,7 +2414,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) struct inode *inode = data->inode; struct nfs_server *server = NFS_SERVER(inode); int stable; - int flags; if (how & FLUSH_STABLE) { if (!NFS_I(inode)->ncommit) @@ -2438,57 +2428,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) data->timestamp = jiffies; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs4_commit_done(struct rpc_task *task, void *calldata) +static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { rpc_restart_call(task); - return; + return -EAGAIN; } if (task->tk_status >= 0) nfs_post_op_update_inode(inode, data->res.fattr); - /* Call back common NFS writeback processing */ - nfs_commit_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs4_commit_ops = { - .rpc_call_done = nfs4_commit_done, - .rpc_release = nfs_commit_release, -}; - -static void -nfs4_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], .rpc_argp = &data->args, .rpc_resp = &data->res, .rpc_cred = data->cred, }; - struct inode *inode = data->inode; - struct nfs_server *server = NFS_SERVER(inode); - int flags; + struct nfs_server *server = NFS_SERVER(data->inode); data->args.bitmask = server->attr_bitmask; data->res.server = server; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } /* @@ -3648,7 +3618,9 @@ struct nfs_rpc_ops nfs_v4_clientops = { .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, + .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, + .commit_done = nfs4_commit_done, .file_open = nfs_open, .file_release = nfs_release, .lock = nfs4_proc_lock, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 2b051ab8bea8..608aa5932a1d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -654,26 +654,15 @@ nfs_proc_read_setup(struct nfs_read_data *data) rpc_call_setup(task, &msg, 0); } -static void nfs_write_done(struct rpc_task *task, void *calldata) +static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs_write_ops = { - .rpc_call_done = nfs_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_WRITE], .rpc_argp = &data->args, @@ -684,12 +673,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how) /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ data->args.stable = NFS_FILE_SYNC; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static void @@ -736,6 +721,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .decode_dirent = nfs_decode_dirent, .read_setup = nfs_proc_read_setup, .write_setup = nfs_proc_write_setup, + .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e7c8361cf201..5912274ff1a1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -77,12 +77,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct inode *, struct page *, unsigned int, unsigned int); -static void nfs_writeback_done_partial(struct nfs_write_data *, int); -static void nfs_writeback_done_full(struct nfs_write_data *, int); +static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how); +static const struct rpc_call_ops nfs_write_partial_ops; +static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_commit_ops; static kmem_cache_t *nfs_wdata_cachep; mempool_t *nfs_wdata_mempool; @@ -872,10 +874,12 @@ static inline int flush_task_priority(int how) */ static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, int how) { struct inode *inode; + int flags; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -896,6 +900,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + /* Set up the initial task struct. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); NFS_PROTO(inode)->write_setup(data, how); data->task.tk_priority = flush_task_priority(how); @@ -959,14 +966,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) list_del_init(&data->pages); data->pagevec[0] = page; - data->complete = nfs_writeback_done_partial; if (nbytes > wsize) { - nfs_write_rpcsetup(req, data, wsize, offset, how); + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + wsize, offset, how); offset += wsize; nbytes -= wsize; } else { - nfs_write_rpcsetup(req, data, nbytes, offset, how); + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + nbytes, offset, how); nbytes = 0; } nfs_execute_write(data); @@ -1020,9 +1028,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) } req = nfs_list_entry(data->pages.next); - data->complete = nfs_writeback_done_full; /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, count, 0, how); + nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); nfs_execute_write(data); return 0; @@ -1066,8 +1073,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how) /* * Handle a write reply that flushed part of a page. */ -static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) +static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { + struct nfs_write_data *data = calldata; struct nfs_page *req = data->req; struct page *page = req->wb_page; @@ -1077,11 +1085,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) req->wb_bytes, (long long)req_offset(req)); - if (status < 0) { + if (nfs_writeback_done(task, data) != 0) + return; + + if (task->tk_status < 0) { ClearPageUptodate(page); SetPageError(page); - req->wb_context->error = status; - dprintk(", error = %d\n", status); + req->wb_context->error = task->tk_status; + dprintk(", error = %d\n", task->tk_status); } else { #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (data->verf.committed < NFS_FILE_SYNC) { @@ -1102,6 +1113,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) nfs_writepage_release(req); } +static const struct rpc_call_ops nfs_write_partial_ops = { + .rpc_call_done = nfs_writeback_done_partial, + .rpc_release = nfs_writedata_release, +}; + /* * Handle a write reply that flushes a whole page. * @@ -1109,11 +1125,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) * writebacks since the page->count is kept > 1 for as long * as the page has a write request pending. */ -static void nfs_writeback_done_full(struct nfs_write_data *data, int status) +static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) { + struct nfs_write_data *data = calldata; struct nfs_page *req; struct page *page; + if (nfs_writeback_done(task, data) != 0) + return; + /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1126,13 +1146,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) req->wb_bytes, (long long)req_offset(req)); - if (status < 0) { + if (task->tk_status < 0) { ClearPageUptodate(page); SetPageError(page); - req->wb_context->error = status; + req->wb_context->error = task->tk_status; end_page_writeback(page); nfs_inode_remove_request(req); - dprintk(", error = %d\n", status); + dprintk(", error = %d\n", task->tk_status); goto next; } end_page_writeback(page); @@ -1154,18 +1174,28 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) } } +static const struct rpc_call_ops nfs_write_full_ops = { + .rpc_call_done = nfs_writeback_done_full, + .rpc_release = nfs_writedata_release, +}; + + /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, void *calldata) +static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + int status; dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ + status = NFS_PROTO(data->inode)->write_done(task, data); + if (status != 0) + return status; nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -1210,7 +1240,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) argp->stable = NFS_FILE_SYNC; } rpc_restart_call(task); - return; + return -EAGAIN; } if (time_before(complain, jiffies)) { printk(KERN_WARNING @@ -1221,11 +1251,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - - /* - * Process the nfs_page list - */ - data->complete(data, task->tk_status); + return 0; } @@ -1239,10 +1265,12 @@ void nfs_commit_release(void *wdata) * Set up the argument/result storage required for the RPC call. */ static void nfs_commit_rpcsetup(struct list_head *head, - struct nfs_write_data *data, int how) + struct nfs_write_data *data, + int how) { struct nfs_page *first; struct inode *inode; + int flags; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1262,7 +1290,10 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); - + + /* Set up the initial task struct. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data); NFS_PROTO(inode)->commit_setup(data, how); data->task.tk_priority = flush_task_priority(how); @@ -1303,7 +1334,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) /* * COMMIT call returned */ -void nfs_commit_done(struct rpc_task *task, void *calldata) +static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; struct nfs_page *req; @@ -1312,6 +1343,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; + while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); @@ -1345,6 +1380,11 @@ void nfs_commit_done(struct rpc_task *task, void *calldata) } sub_page_state(nr_unstable,res); } + +static const struct rpc_call_ops nfs_commit_ops = { + .rpc_call_done = nfs_commit_done, + .rpc_release = nfs_commit_release, +}; #endif static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, -- cgit v1.2.3 From ec06c096edec0755534c7126f4caded69de131c2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS read code Same callback hierarchy inversion as for the NFS write calls. This patch is not strictly speaking needed by the O_DIRECT code, but avoids confusing differences between the asynchronous read and write code. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 17 ++++++++++++---- fs/nfs/nfs3proc.c | 27 ++++++-------------------- fs/nfs/nfs4proc.c | 33 +++++++++---------------------- fs/nfs/proc.c | 25 +++++------------------- fs/nfs/read.c | 58 +++++++++++++++++++++++++++++++++++++++---------------- 5 files changed, 74 insertions(+), 86 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fc07ce4885da..3f87a72bd137 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -218,14 +218,17 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int * until the RPCs complete. This could be long *after* we are woken up in * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). */ -static void nfs_direct_read_result(struct nfs_read_data *data, int status) +static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - if (likely(status >= 0)) + if (nfs_readpage_result(task, data) != 0) + return; + if (likely(task->tk_status >= 0)) atomic_add(data->res.count, &dreq->count); else - atomic_set(&dreq->error, status); + atomic_set(&dreq->error, task->tk_status); if (unlikely(atomic_dec_and_test(&dreq->complete))) { nfs_free_user_pages(dreq->pages, dreq->npages, 1); @@ -234,6 +237,11 @@ static void nfs_direct_read_result(struct nfs_read_data *data, int status) } } +static const struct rpc_call_ops nfs_read_direct_ops = { + .rpc_call_done = nfs_direct_read_result, + .rpc_release = nfs_readdata_release, +}; + /** * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read * @dreq: address of nfs_direct_req struct for this request @@ -280,10 +288,11 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, data->res.eof = 0; data->res.count = bytes; + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_read_direct_ops, data); NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long) inode; - data->complete = nfs_direct_read_result; lock_kernel(); rpc_execute(&data->task); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c4f7de8830e9..cf186f0d2b3b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -811,29 +811,18 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); -static void nfs3_read_done(struct rpc_task *task, void *calldata) +static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - if (nfs3_async_handle_jukebox(task, data->inode)) - return; + return -EAGAIN; /* Call back common NFS readpage processing */ if (task->tk_status >= 0) nfs_refresh_inode(data->inode, &data->fattr); - nfs_readpage_result(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_read_ops = { - .rpc_call_done = nfs3_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs3_proc_read_setup(struct nfs_read_data *data) +static void nfs3_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READ], .rpc_argp = &data->args, @@ -841,12 +830,7 @@ nfs3_proc_read_setup(struct nfs_read_data *data) .rpc_cred = data->cred, }; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -935,6 +919,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef4dc315ecc2..bad1eae5608a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2345,47 +2345,31 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static void nfs4_read_done(struct rpc_task *task, void *calldata) +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(data->inode); - if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + if (nfs4_async_handle_error(task, server) == -EAGAIN) { rpc_restart_call(task); - return; + return -EAGAIN; } if (task->tk_status > 0) - renew_lease(NFS_SERVER(inode), data->timestamp); - /* Call back common NFS readpage processing */ - nfs_readpage_result(task, calldata); + renew_lease(server, data->timestamp); + return 0; } -static const struct rpc_call_ops nfs4_read_ops = { - .rpc_call_done = nfs4_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs4_proc_read_setup(struct nfs_read_data *data) +static void nfs4_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], .rpc_argp = &data->args, .rpc_resp = &data->res, .rpc_cred = data->cred, }; - struct inode *inode = data->inode; - int flags; data->timestamp = jiffies; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -3617,6 +3601,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .pathconf = nfs4_proc_pathconf, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 608aa5932a1d..9dd85cac2df0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -613,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); -static void nfs_read_done(struct rpc_task *task, void *calldata) +static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 @@ -625,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata) if (data->args.offset + data->args.count >= data->res.fattr->size) data->res.eof = 1; } - nfs_readpage_result(task, calldata); + return 0; } -static const struct rpc_call_ops nfs_read_ops = { - .rpc_call_done = nfs_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs_proc_read_setup(struct nfs_read_data *data) +static void nfs_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READ], .rpc_argp = &data->args, @@ -646,12 +635,7 @@ nfs_proc_read_setup(struct nfs_read_data *data) .rpc_cred = data->cred, }; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) @@ -720,6 +704,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ae3ddd24cf8f..2da255f0247f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -36,8 +36,8 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE static int nfs_pagein_one(struct list_head *, struct inode *); -static void nfs_readpage_result_partial(struct nfs_read_data *, int); -static void nfs_readpage_result_full(struct nfs_read_data *, int); +static const struct rpc_call_ops nfs_read_partial_ops; +static const struct rpc_call_ops nfs_read_full_ops; static kmem_cache_t *nfs_rdata_cachep; mempool_t *nfs_rdata_mempool; @@ -200,9 +200,11 @@ static void nfs_readpage_release(struct nfs_page *req) * Set up the NFS read request struct */ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset) { struct inode *inode; + int flags; data->req = req; data->inode = inode = req->wb_context->dentry->d_inode; @@ -220,6 +222,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.eof = 0; nfs_fattr_init(&data->fattr); + /* Set up the initial task struct. */ + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long)inode; @@ -307,14 +312,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) list_del_init(&data->pages); data->pagevec[0] = page; - data->complete = nfs_readpage_result_partial; if (nbytes > rsize) { - nfs_read_rpcsetup(req, data, rsize, offset); + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + rsize, offset); offset += rsize; nbytes -= rsize; } else { - nfs_read_rpcsetup(req, data, nbytes, offset); + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + nbytes, offset); nbytes = 0; } nfs_execute_read(data); @@ -360,8 +366,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) } req = nfs_list_entry(data->pages.next); - data->complete = nfs_readpage_result_full; - nfs_read_rpcsetup(req, data, count, 0); + nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); nfs_execute_read(data); return 0; @@ -395,12 +400,15 @@ nfs_pagein_list(struct list_head *head, int rpages) /* * Handle a read reply that fills part of a page. */ -static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) +static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; struct nfs_page *req = data->req; struct page *page = req->wb_page; - if (status >= 0) { + if (nfs_readpage_result(task, data) != 0) + return; + if (task->tk_status >= 0) { unsigned int request = data->args.count; unsigned int result = data->res.count; @@ -419,20 +427,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) } } +static const struct rpc_call_ops nfs_read_partial_ops = { + .rpc_call_done = nfs_readpage_result_partial, + .rpc_release = nfs_readdata_release, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -static void nfs_readpage_result_full(struct nfs_read_data *data, int status) +static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; unsigned int count = data->res.count; + if (nfs_readpage_result(task, data) != 0) + return; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); struct page *page = req->wb_page; nfs_list_remove_request(req); - if (status >= 0) { + if (task->tk_status >= 0) { if (count < PAGE_CACHE_SIZE) { if (count < req->wb_bytes) memclear_highpage_flush(page, @@ -448,19 +464,27 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status) } } +static const struct rpc_call_ops nfs_read_full_ops = { + .rpc_call_done = nfs_readpage_result_full, + .rpc_release = nfs_readdata_release, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -void nfs_readpage_result(struct rpc_task *task, void *calldata) +int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; - int status = task->tk_status; + int status; dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", - task->tk_pid, status); + task->tk_pid, task->tk_status); + + status = NFS_PROTO(data->inode)->read_done(task, data); + if (status != 0) + return status; nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); @@ -474,14 +498,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata) argp->pgbase += resp->count; argp->count -= resp->count; rpc_restart_call(task); - return; + return -EAGAIN; } task->tk_status = -EIO; } spin_lock(&data->inode->i_lock); NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&data->inode->i_lock); - data->complete(data, status); + return 0; } /* -- cgit v1.2.3 From b8a32e2b8b7fefff994c89d398b6ac920a195b43 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:28 -0500 Subject: NFS: clean up NFS client's a_ops->direct_IO method The NFS client's a_ops->direct_IO method, nfs_direct_IO, is required to be present to allow NFS files to be opened with O_DIRECT, but is never called because the NFS client shunts reads and writes to files opened with O_DIRECT directly to its own routines. Gut the nfs_direct_IO function. This eliminates the only part of the NFS client's direct I/O path that requires support for multi-segment iovs, allowing further simplification in subsequent patches. Test plan: Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 70 +++++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 47 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3f87a72bd137..8096d326bd79 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -78,6 +78,29 @@ struct nfs_direct_req { }; +/** + * nfs_direct_IO - NFS address space operation for direct I/O + * @rw: direction (read or write) + * @iocb: target I/O control block + * @iov: array of vectors that define I/O buffer + * @pos: offset in file to begin the operation + * @nr_segs: size of iovec array + * + * The presence of this routine in the address space ops vector means + * the NFS client supports direct I/O. However, we shunt off direct + * read and write requests before the VFS gets them, so this method + * should never be called. + */ +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +{ + struct dentry *dentry = iocb->ki_filp->f_dentry; + + dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", + dentry->d_name.name, (long long) pos, nr_segs); + + return -EINVAL; +} + /** * nfs_get_user_pages - find and set up pages underlying user's buffer * rw: direction (read or write) @@ -605,53 +628,6 @@ static ssize_t nfs_direct_write(struct inode *inode, return tot_bytes; } -/** - * nfs_direct_IO - NFS address space operation for direct I/O - * rw: direction (read or write) - * @iocb: target I/O control block - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * - */ -ssize_t -nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t file_offset, unsigned long nr_segs) -{ - ssize_t result = -EINVAL; - struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx; - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; - - /* - * No support for async yet - */ - if (!is_sync_kiocb(iocb)) - return result; - - ctx = (struct nfs_open_context *)file->private_data; - switch (rw) { - case READ: - dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", - dentry->d_name.name, file_offset, nr_segs); - - result = nfs_direct_read(inode, ctx, iov, - file_offset, nr_segs); - break; - case WRITE: - dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", - dentry->d_name.name, file_offset, nr_segs); - - result = nfs_direct_write(inode, ctx, iov, - file_offset, nr_segs); - break; - default: - break; - } - return result; -} - /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block -- cgit v1.2.3 From d4cc948ba97980c55a308eab167a695109796456 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:28 -0500 Subject: NFS: update comments and function definitions in fs/nfs/direct.c Update to latest coding style standards. Remove block comments on statically defined functions, and place function definitions all on one line. Test plan: Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 125 +++++++------------------------------------------------- 1 file changed, 15 insertions(+), 110 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8096d326bd79..d38c3dc052a7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -101,16 +101,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -/** - * nfs_get_user_pages - find and set up pages underlying user's buffer - * rw: direction (read or write) - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * @pages: returned array of page struct pointers underlying user's buffer - */ -static inline int -nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, - struct page ***pages) +static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) { int result = -ENOMEM; unsigned long page_count; @@ -147,14 +138,7 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, return result; } -/** - * nfs_free_user_pages - tear down page struct array - * @pages: array of page struct pointers underlying target buffer - * @npages: number of pages in the array - * @do_dirty: dirty the pages as we release them - */ -static void -nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) { int i; for (i = 0; i < npages; i++) { @@ -166,22 +150,13 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty) kfree(pages); } -/** - * nfs_direct_req_release - release nfs_direct_req structure for direct read - * @kref: kref object embedded in an nfs_direct_req structure - * - */ static void nfs_direct_req_release(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); kmem_cache_free(nfs_direct_cachep, dreq); } -/** - * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read - * @count: count of bytes for the read request - * @rsize: local rsize setting - * +/* * Note we also set the number of requests we have in the dreq when we are * done. This prevents races with I/O completion so we will always wait * until all requests have been dispatched and completed. @@ -232,11 +207,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int return dreq; } -/** - * nfs_direct_read_result - handle a read reply for a direct read request - * @data: address of NFS READ operation control block - * @status: status of this NFS READ operation - * +/* * We must hold a reference to all the pages in this direct read request * until the RPCs complete. This could be long *after* we are woken up in * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). @@ -265,21 +236,11 @@ static const struct rpc_call_ops nfs_read_direct_ops = { .rpc_release = nfs_readdata_release, }; -/** - * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read - * @dreq: address of nfs_direct_req struct for this request - * @inode: target inode - * @ctx: target file open context - * @user_addr: starting address of this segment of user's buffer - * @count: size of this segment - * @file_offset: offset in file to begin the operation - * +/* * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, - struct inode *inode, struct nfs_open_context *ctx, - unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset) { struct list_head *list = &dreq->list; struct page **pages = dreq->pages; @@ -337,11 +298,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, } while (count != 0); } -/** - * nfs_direct_read_wait - wait for I/O completion for direct reads - * @dreq: request on which we are to wait - * @intr: whether or not this wait can be interrupted - * +/* * Collects and returns the final error value/byte-count. */ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) @@ -364,22 +321,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) return (ssize_t) result; } -/** - * nfs_direct_read_seg - Read in one iov segment. Generate separate - * read RPCs for each "rsize" bytes. - * @inode: target inode - * @ctx: target file open context - * @user_addr: starting address of this segment of user's buffer - * @count: size of this segment - * @file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * @nr_pages: number of pages in the array - * - */ -static ssize_t nfs_direct_read_seg(struct inode *inode, - struct nfs_open_context *ctx, unsigned long user_addr, - size_t count, loff_t file_offset, struct page **pages, - unsigned int nr_pages) +static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) { ssize_t result; sigset_t oldset; @@ -404,22 +346,11 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, return result; } -/** - * nfs_direct_read - For each iov segment, map the user's buffer - * then generate read RPCs. - * @inode: target inode - * @ctx: target file open context - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * +/* * We've already pushed out any non-direct writes so that this read * will see them when we read from the server. */ -static ssize_t -nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, - const struct iovec *iov, loff_t file_offset, - unsigned long nr_segs) +static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { ssize_t tot_bytes = 0; unsigned long seg = 0; @@ -457,21 +388,7 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, return tot_bytes; } -/** - * nfs_direct_write_seg - Write out one iov segment. Generate separate - * write RPCs for each "wsize" bytes, then commit. - * @inode: target inode - * @ctx: target file open context - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * nr_pages: size of pages array - */ -static ssize_t nfs_direct_write_seg(struct inode *inode, - struct nfs_open_context *ctx, unsigned long user_addr, - size_t count, loff_t file_offset, struct page **pages, - int nr_pages) +static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { const unsigned int wsize = NFS_SERVER(inode)->wsize; size_t request; @@ -573,22 +490,12 @@ sync_retry: goto retry; } -/** - * nfs_direct_write - For each iov segment, map the user's buffer - * then generate write and commit RPCs. - * @inode: target inode - * @ctx: target file open context - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * +/* * Upon return, generic_file_direct_IO invalidates any cached pages * that non-direct readers might access, so they will pick up these * writes immediately. */ -static ssize_t nfs_direct_write(struct inode *inode, - struct nfs_open_context *ctx, const struct iovec *iov, - loff_t file_offset, unsigned long nr_segs) +static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { ssize_t tot_bytes = 0; unsigned long seg = 0; @@ -649,8 +556,7 @@ static ssize_t nfs_direct_write(struct inode *inode, * client must read the updated atime from the server back into its * cache. */ -ssize_t -nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; loff_t *ppos = &iocb->ki_pos; @@ -717,8 +623,7 @@ out: * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t -nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; struct file *file = iocb->ki_filp; -- cgit v1.2.3 From 5dd602f20688e08c85ac91e0451c4e6321ed25d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:29 -0500 Subject: NFS: use size_t type for holding rsize bytes in NFS O_DIRECT read path size_t is used for holding byte counts, so use it for variables storing rsize. Note that the write path will be updated as we add support for async O_DIRECT writes. Test plan: Need to verify that existing comparisons against new size_t variables behave correctly. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d38c3dc052a7..8f5d2dfd5a8a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -161,7 +161,7 @@ static void nfs_direct_req_release(struct kref *kref) * done. This prevents races with I/O completion so we will always wait * until all requests have been dispatched and completed. */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize) +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) { struct list_head *list; struct nfs_direct_req *dreq; @@ -244,14 +244,14 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode * { struct list_head *list = &dreq->list; struct page **pages = dreq->pages; + size_t rsize = NFS_SERVER(inode)->rsize; unsigned int curpage, pgbase; - unsigned int rsize = NFS_SERVER(inode)->rsize; curpage = 0; pgbase = user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; - unsigned int bytes; + size_t bytes; bytes = rsize; if (count < rsize) -- cgit v1.2.3 From 0cdd80d07fb0f558dfdb30f6e0b9905f5e5475f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:29 -0500 Subject: NFS: remove support for multi-segment iovs in the direct read path Eliminate the persistent use of automatic storage in all parts of the NFS client's direct read path to pave the way for introducing support for aio against files opened with the O_DIRECT flag. Test plan: Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 66 +++++++++++++-------------------------------------------- 1 file changed, 15 insertions(+), 51 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8f5d2dfd5a8a..6ecde9602f99 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -321,7 +321,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) return (ssize_t) result; } -static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) +static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) { ssize_t result; sigset_t oldset; @@ -346,48 +346,6 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, struct nfs_open_context return result; } -/* - * We've already pushed out any non-direct writes so that this read - * will see them when we read from the server. - */ -static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) -{ - ssize_t tot_bytes = 0; - unsigned long seg = 0; - - while ((seg < nr_segs) && (tot_bytes >= 0)) { - ssize_t result; - int page_count; - struct page **pages; - const struct iovec *vec = &iov[seg++]; - unsigned long user_addr = (unsigned long) vec->iov_base; - size_t size = vec->iov_len; - - page_count = nfs_get_user_pages(READ, user_addr, size, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - if (tot_bytes > 0) - break; - return page_count; - } - - result = nfs_direct_read_seg(inode, ctx, user_addr, size, - file_offset, pages, page_count); - - if (result <= 0) { - if (tot_bytes > 0) - break; - return result; - } - tot_bytes += result; - file_offset += result; - if (result < size) - break; - } - - return tot_bytes; -} - static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { const unsigned int wsize = NFS_SERVER(inode)->wsize; @@ -559,16 +517,13 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; - loff_t *ppos = &iocb->ki_pos; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; struct nfs_open_context *ctx = (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = buf, - .iov_len = count, - }; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, @@ -580,7 +535,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (count < 0) goto out; retval = -EFAULT; - if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) + if (!access_ok(VERIFY_WRITE, buf, count)) goto out; retval = 0; if (!count) @@ -590,9 +545,18 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (retval) goto out; - retval = nfs_direct_read(inode, ctx, &iov, pos, 1); + page_count = nfs_get_user_pages(READ, (unsigned long) buf, + count, &pages); + if (page_count < 0) { + nfs_free_user_pages(pages, 0, 0); + retval = page_count; + goto out; + } + + retval = nfs_direct_read(inode, ctx, (unsigned long) buf, count, pos, + pages, page_count); if (retval > 0) - *ppos = pos + retval; + iocb->ki_pos = pos + retval; out: return retval; -- cgit v1.2.3 From 99514f8fdda2beef1ca922b7f9d89c1a2c57fec0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:30 -0500 Subject: NFS: make iocb available everywhere in direct read path Pass the iocb argument all the way down to the direct read request scheduler, and make it available in nfs_direct_read_result. Test plan: Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6ecde9602f99..6cbddc51acbc 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -68,6 +68,8 @@ static kmem_cache_t *nfs_direct_cachep; struct nfs_direct_req { struct kref kref; /* release manager */ struct list_head list; /* nfs_read_data structs */ + struct file * filp; /* file descriptor */ + struct kiocb * iocb; /* controlling i/o request */ wait_queue_head_t wait; /* wait for i/o completion */ struct inode * inode; /* target file of I/O */ struct page ** pages; /* pages in our buffer */ @@ -240,8 +242,12 @@ static const struct rpc_call_ops nfs_read_direct_ops = { * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset) { + struct file *file = dreq->filp; + struct inode *inode = file->f_mapping->host; + struct nfs_open_context *ctx = (struct nfs_open_context *) + file->private_data; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; size_t rsize = NFS_SERVER(inode)->rsize; @@ -321,10 +327,11 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) return (ssize_t) result; } -static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) { ssize_t result; sigset_t oldset; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; @@ -335,11 +342,11 @@ static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx dreq->pages = pages; dreq->npages = nr_pages; dreq->inode = inode; + dreq->filp = iocb->ki_filp; nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, - file_offset); + nfs_direct_read_schedule(dreq, user_addr, count, file_offset); result = nfs_direct_read_wait(dreq, clnt->cl_intr); rpc_clnt_sigunmask(clnt, &oldset); @@ -520,10 +527,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, int page_count; struct page **pages; struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx = - (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, @@ -553,7 +557,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, goto out; } - retval = nfs_direct_read(inode, ctx, (unsigned long) buf, count, pos, + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, pages, page_count); if (retval > 0) iocb->ki_pos = pos + retval; -- cgit v1.2.3 From 487b83723ed4d4eaafd5109f36560da4f15c6578 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:30 -0500 Subject: NFS: support EIOCBQUEUED return in direct read path For async iocb's, the NFS direct read path should return EIOCBQUEUED and call aio_complete when all the requested reads are finished. The synchronous part of the NFS direct read path behaves exactly as it was before. Test plan: aio-stress with "-O". OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6cbddc51acbc..094456c3df90 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -177,6 +177,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) kref_init(&dreq->kref); init_waitqueue_head(&dreq->wait); INIT_LIST_HEAD(&dreq->list); + dreq->iocb = NULL; atomic_set(&dreq->count, 0); atomic_set(&dreq->error, 0); @@ -213,6 +214,10 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) * We must hold a reference to all the pages in this direct read request * until the RPCs complete. This could be long *after* we are woken up in * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). + * + * In addition, synchronous I/O uses a stack-allocated iocb. Thus we + * can't trust the iocb is still valid here if this is a synchronous + * request. If the waiter is woken prematurely, the iocb is long gone. */ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { @@ -228,7 +233,13 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (unlikely(atomic_dec_and_test(&dreq->complete))) { nfs_free_user_pages(dreq->pages, dreq->npages, 1); - wake_up(&dreq->wait); + if (dreq->iocb) { + long res = atomic_read(&dreq->error); + if (!res) + res = atomic_read(&dreq->count); + aio_complete(dreq->iocb, res, 0); + } else + wake_up(&dreq->wait); kref_put(&dreq->kref, nfs_direct_req_release); } } @@ -309,8 +320,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long */ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) { - int result = 0; + int result = -EIOCBQUEUED; + + /* Async requests don't wait here */ + if (dreq->iocb) + goto out; + result = 0; if (intr) { result = wait_event_interruptible(dreq->wait, (atomic_read(&dreq->complete) == 0)); @@ -323,6 +339,7 @@ static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) if (!result) result = atomic_read(&dreq->count); +out: kref_put(&dreq->kref, nfs_direct_req_release); return (ssize_t) result; } @@ -343,6 +360,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size dreq->npages = nr_pages; dreq->inode = inode; dreq->filp = iocb->ki_filp; + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); @@ -534,8 +553,6 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); - if (!is_sync_kiocb(iocb)) - goto out; if (count < 0) goto out; retval = -EFAULT; -- cgit v1.2.3 From bc0fb201b34b12e2d16e8cbd5bb078c1db936304 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:31 -0500 Subject: NFS: create common routine for waiting for direct I/O to complete We're about to add asynchrony to the NFS direct write path. Begin by abstracting out the common pieces in the read path. The first piece is nfs_direct_read_wait, which works the same whether the process is waiting for a read or a write. Test plan: Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 57 ++++++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 094456c3df90..2593f47eaff0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -158,6 +158,30 @@ static void nfs_direct_req_release(struct kref *kref) kmem_cache_free(nfs_direct_cachep, dreq); } +/* + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) +{ + int result = -EIOCBQUEUED; + + /* Async requests don't wait here */ + if (dreq->iocb) + goto out; + + result = wait_event_interruptible(dreq->wait, + (atomic_read(&dreq->complete) == 0)); + + if (!result) + result = atomic_read(&dreq->error); + if (!result) + result = atomic_read(&dreq->count); + +out: + kref_put(&dreq->kref, nfs_direct_req_release); + return (ssize_t) result; +} + /* * Note we also set the number of requests we have in the dreq when we are * done. This prevents races with I/O completion so we will always wait @@ -213,7 +237,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) /* * We must hold a reference to all the pages in this direct read request * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). * * In addition, synchronous I/O uses a stack-allocated iocb. Thus we * can't trust the iocb is still valid here if this is a synchronous @@ -315,35 +339,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long } while (count != 0); } -/* - * Collects and returns the final error value/byte-count. - */ -static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) -{ - int result = -EIOCBQUEUED; - - /* Async requests don't wait here */ - if (dreq->iocb) - goto out; - - result = 0; - if (intr) { - result = wait_event_interruptible(dreq->wait, - (atomic_read(&dreq->complete) == 0)); - } else { - wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); - } - - if (!result) - result = atomic_read(&dreq->error); - if (!result) - result = atomic_read(&dreq->count); - -out: - kref_put(&dreq->kref, nfs_direct_req_release); - return (ssize_t) result; -} - static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) { ssize_t result; @@ -366,7 +361,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); nfs_direct_read_schedule(dreq, user_addr, count, file_offset); - result = nfs_direct_read_wait(dreq, clnt->cl_intr); + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; -- cgit v1.2.3 From 93619e5989173614bef0013b0bb8a3fe3dbd5a95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:31 -0500 Subject: NFS: create common routine for allocating nfs_direct_req Factor out a small common piece of the path that allocate nfs_direct_req structures. Test plan: Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2593f47eaff0..489f736d0f5d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -152,6 +152,24 @@ static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) kfree(pages); } +static inline struct nfs_direct_req *nfs_direct_req_alloc(void) +{ + struct nfs_direct_req *dreq; + + dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + if (!dreq) + return NULL; + + kref_init(&dreq->kref); + init_waitqueue_head(&dreq->wait); + INIT_LIST_HEAD(&dreq->list); + dreq->iocb = NULL; + atomic_set(&dreq->count, 0); + atomic_set(&dreq->error, 0); + + return dreq; +} + static void nfs_direct_req_release(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); @@ -194,17 +212,10 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) unsigned int reads = 0; unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + dreq = nfs_direct_req_alloc(); if (!dreq) return NULL; - kref_init(&dreq->kref); - init_waitqueue_head(&dreq->wait); - INIT_LIST_HEAD(&dreq->list); - dreq->iocb = NULL; - atomic_set(&dreq->count, 0); - atomic_set(&dreq->error, 0); - list = &dreq->list; for(;;) { struct nfs_read_data *data = nfs_readdata_alloc(rpages); -- cgit v1.2.3 From 63ab46abc70b01cb0711301f5ddb08c1c0bb9b1c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:31 -0500 Subject: NFS: create common routine for handling direct I/O completion Factor out the common piece of completing an NFS direct I/O request. Test plan: Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 489f736d0f5d..4df21ce28e17 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -200,6 +200,30 @@ out: return (ssize_t) result; } +/* + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). + * + * In addition, synchronous I/O uses a stack-allocated iocb. Thus we + * can't trust the iocb is still valid here if this is a synchronous + * request. If the waiter is woken prematurely, the iocb is long gone. + */ +static void nfs_direct_complete(struct nfs_direct_req *dreq) +{ + nfs_free_user_pages(dreq->pages, dreq->npages, 1); + + if (dreq->iocb) { + long res = atomic_read(&dreq->error); + if (!res) + res = atomic_read(&dreq->count); + aio_complete(dreq->iocb, res, 0); + } else + wake_up(&dreq->wait); + + kref_put(&dreq->kref, nfs_direct_req_release); +} + /* * Note we also set the number of requests we have in the dreq when we are * done. This prevents races with I/O completion so we will always wait @@ -245,15 +269,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) return dreq; } -/* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - * - * In addition, synchronous I/O uses a stack-allocated iocb. Thus we - * can't trust the iocb is still valid here if this is a synchronous - * request. If the waiter is woken prematurely, the iocb is long gone. - */ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; @@ -266,17 +281,8 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) else atomic_set(&dreq->error, task->tk_status); - if (unlikely(atomic_dec_and_test(&dreq->complete))) { - nfs_free_user_pages(dreq->pages, dreq->npages, 1); - if (dreq->iocb) { - long res = atomic_read(&dreq->error); - if (!res) - res = atomic_read(&dreq->count); - aio_complete(dreq->iocb, res, 0); - } else - wake_up(&dreq->wait); - kref_put(&dreq->kref, nfs_direct_req_release); - } + if (unlikely(atomic_dec_and_test(&dreq->complete))) + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_read_direct_ops = { -- cgit v1.2.3 From 462d5b3296b56289efec426499a83faad4c08d9e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:32 -0500 Subject: NFS: make direct write path generate write requests concurrently Duplicate infrastructure from direct read path that will allow write path to generate multiple write requests concurrently. This will enable us to add support for aio in this path. Temporarily we will lose the ability to do UNSTABLE writes followed by a COMMIT in the direct write path. However, all applications I am aware of that use NFS O_DIRECT currently write in relatively small chunks, so this should not be inconvenient in any way. Test plan: Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 240 +++++++++++++++++++++++++++++++++++++------------------- fs/nfs/write.c | 3 +- 2 files changed, 160 insertions(+), 83 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4df21ce28e17..dea3239cdded 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -384,106 +384,185 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size return result; } -static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) +static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) { - const unsigned int wsize = NFS_SERVER(inode)->wsize; - size_t request; - int curpage, need_commit; - ssize_t result, tot_bytes; - struct nfs_writeverf first_verf; - struct nfs_write_data *wdata; - - wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); - if (!wdata) - return -ENOMEM; + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int writes = 0; + unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - wdata->inode = inode; - wdata->cred = ctx->cred; - wdata->args.fh = NFS_FH(inode); - wdata->args.context = ctx; - wdata->args.stable = NFS_UNSTABLE; - if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) - wdata->args.stable = NFS_FILE_SYNC; - wdata->res.fattr = &wdata->fattr; - wdata->res.verf = &wdata->verf; + dreq = nfs_direct_req_alloc(); + if (!dreq) + return NULL; + + list = &dreq->list; + for(;;) { + struct nfs_write_data *data = nfs_writedata_alloc(wpages); + + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + writes++; + if (nbytes <= wsize) + break; + nbytes -= wsize; + } + kref_get(&dreq->kref); + atomic_set(&dreq->complete, writes); + return dreq; +} + +/* + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr) +{ + int result = 0; + + if (intr) { + result = wait_event_interruptible(dreq->wait, + (atomic_read(&dreq->complete) == 0)); + } else { + wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); + } + + if (!result) + result = atomic_read(&dreq->error); + if (!result) + result = atomic_read(&dreq->count); + + kref_put(&dreq->kref, nfs_direct_req_release); + return (ssize_t) result; +} + +static void nfs_direct_write_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = task->tk_status; + + if (nfs_writeback_done(task, data) != 0) + return; + /* If the server fell back to an UNSTABLE write, it's an error. */ + if (unlikely(data->res.verf->committed != NFS_FILE_SYNC)) + status = -EIO; + + if (likely(status >= 0)) + atomic_add(data->res.count, &dreq->count); + else + atomic_set(&dreq->error, status); + + if (unlikely(atomic_dec_and_test(&dreq->complete))) + nfs_direct_complete(dreq); +} + +static const struct rpc_call_ops nfs_write_direct_ops = { + .rpc_call_done = nfs_direct_write_result, + .rpc_release = nfs_writedata_release, +}; + +/* + * For each nfs_write_data struct that was allocated on the list, dispatch + * an NFS WRITE operation + * + * XXX: For now, support only FILE_SYNC writes. Later we may add + * support for UNSTABLE + COMMIT. + */ +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset) +{ + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int curpage, pgbase; - nfs_begin_data_update(inode); -retry: - need_commit = 0; - tot_bytes = 0; curpage = 0; - request = count; - wdata->args.pgbase = user_addr & ~PAGE_MASK; - wdata->args.offset = file_offset; + pgbase = user_addr & ~PAGE_MASK; do { - wdata->args.count = request; - if (wdata->args.count > wsize) - wdata->args.count = wsize; - wdata->args.pages = &pages[curpage]; + struct nfs_write_data *data; + size_t bytes; + + bytes = wsize; + if (count < wsize) + bytes = count; + + data = list_entry(list->next, struct nfs_write_data, pages); + list_del_init(&data->pages); + + data->inode = inode; + data->cred = ctx->cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = file_offset; + data->args.pgbase = pgbase; + data->args.pages = &pages[curpage]; + data->args.count = bytes; + data->res.fattr = &data->fattr; + data->res.count = bytes; + + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_write_direct_ops, data); + NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); - dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", - wdata->args.count, (long long) wdata->args.offset, - user_addr + tot_bytes, wdata->args.pgbase, curpage); + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long) inode; lock_kernel(); - result = NFS_PROTO(inode)->write(wdata); + rpc_execute(&data->task); unlock_kernel(); - if (result <= 0) { - if (tot_bytes > 0) - break; - goto out; - } + dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); - if (tot_bytes == 0) - memcpy(&first_verf.verifier, &wdata->verf.verifier, - sizeof(first_verf.verifier)); - if (wdata->verf.committed != NFS_FILE_SYNC) { - need_commit = 1; - if (memcmp(&first_verf.verifier, &wdata->verf.verifier, - sizeof(first_verf.verifier))) - goto sync_retry; - } + file_offset += bytes; + pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; - tot_bytes += result; + count -= bytes; + } while (count != 0); +} - /* in case of a short write: stop now, let the app recover */ - if (result < wdata->args.count) - break; +static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) +{ + ssize_t result; + sigset_t oldset; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_direct_req *dreq; - wdata->args.offset += result; - wdata->args.pgbase += result; - curpage += wdata->args.pgbase >> PAGE_SHIFT; - wdata->args.pgbase &= ~PAGE_MASK; - request -= result; - } while (request != 0); + dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize); + if (!dreq) + return -ENOMEM; - /* - * Commit data written so far, even in the event of an error - */ - if (need_commit) { - wdata->args.count = tot_bytes; - wdata->args.offset = file_offset; + dreq->pages = pages; + dreq->npages = nr_pages; - lock_kernel(); - result = NFS_PROTO(inode)->commit(wdata); - unlock_kernel(); + nfs_begin_data_update(inode); - if (result < 0 || memcmp(&first_verf.verifier, - &wdata->verf.verifier, - sizeof(first_verf.verifier)) != 0) - goto sync_retry; - } - result = tot_bytes; + rpc_clnt_sigmask(clnt, &oldset); + nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count, + file_offset); + result = nfs_direct_write_wait(dreq, clnt->cl_intr); + rpc_clnt_sigunmask(clnt, &oldset); -out: nfs_end_data_update(inode); - nfs_writedata_free(wdata); - return result; -sync_retry: - wdata->args.stable = NFS_FILE_SYNC; - goto retry; + return result; } /* @@ -515,7 +594,6 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size); result = nfs_direct_write_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); - nfs_free_user_pages(pages, page_count, 0); if (result <= 0) { if (tot_bytes > 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5912274ff1a1..875f5b060533 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -77,7 +77,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct inode *, struct page *, unsigned int, unsigned int); -static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, @@ -1183,7 +1182,7 @@ static const struct rpc_call_ops nfs_write_full_ops = { /* * This function is called when the WRITE call is complete. */ -static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; -- cgit v1.2.3 From 47989d7454398827500d0e73766270986a3b488f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:32 -0500 Subject: NFS: remove support for multi-segment iovs in the direct write path Eliminate the persistent use of automatic storage in all parts of the NFS client's direct write path to pave the way for introducing support for aio against files opened with the O_DIRECT flag. Test plan: Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 68 +++++++++++++++------------------------------------------ 1 file changed, 17 insertions(+), 51 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index dea3239cdded..9a7d45907054 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -510,6 +510,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode data->args.count = bytes; data->res.fattr = &data->fattr; data->res.count = bytes; + data->res.verf = &data->verf; rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs_write_direct_ops, data); @@ -538,7 +539,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode } while (count != 0); } -static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) +static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { ssize_t result; sigset_t oldset; @@ -552,6 +553,8 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context dreq->pages = pages; dreq->npages = nr_pages; + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); + nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); @@ -565,50 +568,6 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context return result; } -/* - * Upon return, generic_file_direct_IO invalidates any cached pages - * that non-direct readers might access, so they will pick up these - * writes immediately. - */ -static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) -{ - ssize_t tot_bytes = 0; - unsigned long seg = 0; - - while ((seg < nr_segs) && (tot_bytes >= 0)) { - ssize_t result; - int page_count; - struct page **pages; - const struct iovec *vec = &iov[seg++]; - unsigned long user_addr = (unsigned long) vec->iov_base; - size_t size = vec->iov_len; - - page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - if (tot_bytes > 0) - break; - return page_count; - } - - nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size); - result = nfs_direct_write_seg(inode, ctx, user_addr, size, - file_offset, pages, page_count); - - if (result <= 0) { - if (tot_bytes > 0) - break; - return result; - } - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); - tot_bytes += result; - file_offset += result; - if (result < size) - break; - } - return tot_bytes; -} - /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block @@ -701,14 +660,13 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; struct nfs_open_context *ctx = (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = (char __user *)buf, - }; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, @@ -729,17 +687,25 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t retval = 0; if (!count) goto out; - iov.iov_len = count, retval = -EFAULT; - if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + if (!access_ok(VERIFY_READ, buf, count)) goto out; retval = nfs_sync_mapping(mapping); if (retval) goto out; - retval = nfs_direct_write(inode, ctx, &iov, pos, 1); + page_count = nfs_get_user_pages(WRITE, (unsigned long) buf, + count, &pages); + if (page_count < 0) { + nfs_free_user_pages(pages, 0, 0); + retval = page_count; + goto out; + } + + retval = nfs_direct_write(inode, ctx, (unsigned long) buf, count, + pos, pages, page_count); if (mapping->nrpages) invalidate_inode_pages2(mapping); if (retval > 0) -- cgit v1.2.3 From c89f2ee5f9223b864725f7344f24a037dfa76568 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:33 -0500 Subject: NFS: make iocb available everywhere in direct write path Pass the iocb argument all the way down to the direct write request scheduler, and make it available in nfs_direct_write_result. Test plan: Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 46 ++++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9a7d45907054..9d57a299824c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -424,29 +424,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize return dreq; } -/* - * Collects and returns the final error value/byte-count. - */ -static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr) -{ - int result = 0; - - if (intr) { - result = wait_event_interruptible(dreq->wait, - (atomic_read(&dreq->complete) == 0)); - } else { - wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); - } - - if (!result) - result = atomic_read(&dreq->error); - if (!result) - result = atomic_read(&dreq->count); - - kref_put(&dreq->kref, nfs_direct_req_release); - return (ssize_t) result; -} - static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -480,8 +457,12 @@ static const struct rpc_call_ops nfs_write_direct_ops = { * XXX: For now, support only FILE_SYNC writes. Later we may add * support for UNSTABLE + COMMIT. */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset) { + struct file *file = dreq->filp; + struct inode *inode = file->f_mapping->host; + struct nfs_open_context *ctx = (struct nfs_open_context *) + file->private_data; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; size_t wsize = NFS_SERVER(inode)->wsize; @@ -539,10 +520,11 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode } while (count != 0); } -static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { ssize_t result; sigset_t oldset; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; @@ -552,15 +534,18 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; + dreq->filp = iocb->ki_filp; + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count, - file_offset); - result = nfs_direct_write_wait(dreq, clnt->cl_intr); + nfs_direct_write_schedule(dreq, user_addr, count, file_offset); + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); nfs_end_data_update(inode); @@ -663,10 +648,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t int page_count; struct page **pages; struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx = - (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, @@ -704,7 +686,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t goto out; } - retval = nfs_direct_write(inode, ctx, (unsigned long) buf, count, + retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos, pages, page_count); if (mapping->nrpages) invalidate_inode_pages2(mapping); -- cgit v1.2.3 From 9eafa8cc521b489f205bf7b0634c99e34e046606 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:33 -0500 Subject: NFS: support EIOCBQUEUED return in direct write path For async iocb's, the NFS direct write path now returns EIOCBQUEUED, and calls aio_complete when all the requested writes are finished. The synchronous part of the NFS direct write path behaves exactly as it was before. Shared mapped NFS files will have some coherency difficulties when accessed concurrently with aio+dio. Will need to explore how this is handled in the local file system case. Test plan: aio-stress with "-O". OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9d57a299824c..df86e526702f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -441,8 +441,10 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) else atomic_set(&dreq->error, status); - if (unlikely(atomic_dec_and_test(&dreq->complete))) + if (unlikely(atomic_dec_and_test(&dreq->complete))) { + nfs_end_data_update(data->inode); nfs_direct_complete(dreq); + } } static const struct rpc_call_ops nfs_write_direct_ops = { @@ -548,8 +550,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); - nfs_end_data_update(inode); - return result; } @@ -655,10 +655,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); - retval = -EINVAL; - if (!is_sync_kiocb(iocb)) - goto out; - retval = generic_write_checks(file, &pos, &count, 0); if (retval) goto out; @@ -688,8 +684,18 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos, pages, page_count); + + /* + * XXX: nfs_end_data_update() already ensures this file's + * cached data is subsequently invalidated. Do we really + * need to call invalidate_inode_pages2() again here? + * + * For aio writes, this invalidation will almost certainly + * occur before the writes complete. Kind of racey. + */ if (mapping->nrpages) invalidate_inode_pages2(mapping); + if (retval > 0) iocb->ki_pos = pos + retval; -- cgit v1.2.3 From 88467055f7654302c12df74e5fe4d12516656a39 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:34 -0500 Subject: NFS: clean up comments and tab damage in direct.c Clean up tab damage and comments. Replace "file_offset" with more commonly used "pos". Test plan: Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 54 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index df86e526702f..bcbc213b4033 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -7,11 +7,11 @@ * * There are important applications whose performance or correctness * depends on uncached access to file data. Database clusters - * (multiple copies of the same instance running on separate hosts) + * (multiple copies of the same instance running on separate hosts) * implement their own cache coherency protocol that subsumes file - * system cache protocols. Applications that process datasets - * considerably larger than the client's memory do not always benefit - * from a local cache. A streaming video server, for instance, has no + * system cache protocols. Applications that process datasets + * considerably larger than the client's memory do not always benefit + * from a local cache. A streaming video server, for instance, has no * need to cache the contents of a file. * * When an application requests uncached I/O, all read and write requests @@ -34,6 +34,7 @@ * 08 Jun 2003 Port to 2.5 APIs --cel * 31 Mar 2004 Handle direct I/O without VFS support --cel * 15 Sep 2004 Parallel async reads --cel + * 04 May 2005 support O_DIRECT with aio --cel * */ @@ -67,11 +68,11 @@ static kmem_cache_t *nfs_direct_cachep; */ struct nfs_direct_req { struct kref kref; /* release manager */ - struct list_head list; /* nfs_read_data structs */ + struct list_head list; /* nfs_read/write_data structs */ struct file * filp; /* file descriptor */ struct kiocb * iocb; /* controlling i/o request */ wait_queue_head_t wait; /* wait for i/o completion */ - struct inode * inode; /* target file of I/O */ + struct inode * inode; /* target file of i/o */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ atomic_t complete, /* i/os we're waiting for */ @@ -110,7 +111,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz size_t array_size; /* set an arbitrary limit to prevent type overflow */ - /* XXX: this can probably be as large as INT_MAX */ if (size > MAX_DIRECTIO_SIZE) { *pages = NULL; return -EFBIG; @@ -294,7 +294,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = { * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct file *file = dreq->filp; struct inode *inode = file->f_mapping->host; @@ -322,7 +322,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; - data->args.offset = file_offset; + data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = &pages[curpage]; data->args.count = bytes; @@ -347,7 +347,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long bytes, (unsigned long long)data->args.offset); - file_offset += bytes; + pos += bytes; pgbase += bytes; curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; @@ -356,7 +356,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long } while (count != 0); } -static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) { ssize_t result; sigset_t oldset; @@ -377,7 +377,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq, user_addr, count, file_offset); + nfs_direct_read_schedule(dreq, user_addr, count, pos); result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); @@ -459,7 +459,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = { * XXX: For now, support only FILE_SYNC writes. Later we may add * support for UNSTABLE + COMMIT. */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct file *file = dreq->filp; struct inode *inode = file->f_mapping->host; @@ -487,7 +487,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; - data->args.offset = file_offset; + data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = &pages[curpage]; data->args.count = bytes; @@ -513,7 +513,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long bytes, (unsigned long long)data->args.offset); - file_offset += bytes; + pos += bytes; pgbase += bytes; curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; @@ -522,7 +522,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long } while (count != 0); } -static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) { ssize_t result; sigset_t oldset; @@ -546,7 +546,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, user_addr, count, file_offset); + nfs_direct_write_schedule(dreq, user_addr, count, pos); result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); @@ -557,18 +557,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @buf: user's buffer into which to read data - * count: number of bytes to read - * pos: byte offset in file where reading starts + * @count: number of bytes to read + * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if * the request starts before the end of the file. For that check * to work, we must generate a GETATTR before each direct read, and * even then there is a window between the GETATTR and the subsequent - * READ where the file size could change. So our preference is simply + * READ where the file size could change. Our preference is simply * to do all reads the application wants, and the server will take * care of managing the end of file boundary. - * + * * This function also eliminates unnecessarily updating the file's * atime locally, as the NFS server sets the file's atime, and this * client must read the updated atime from the server back into its @@ -621,8 +621,8 @@ out: * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @buf: user's buffer from which to write data - * count: number of bytes to write - * pos: byte offset in file where writing starts + * @count: number of bytes to write + * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -703,6 +703,10 @@ out: return retval; } +/** + * nfs_init_directcache - create a slab cache for nfs_direct_req structures + * + */ int nfs_init_directcache(void) { nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", @@ -715,6 +719,10 @@ int nfs_init_directcache(void) return 0; } +/** + * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures + * + */ void nfs_destroy_directcache(void) { if (kmem_cache_destroy(nfs_direct_cachep)) -- cgit v1.2.3 From 15ce4a0c1ce0d5e288398cb9e5493fd4e55e2025 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:34 -0500 Subject: NFS: Replace atomic_t variables in nfs_direct_req with a single spin lock Three atomic_t variables cause a lot of bus locking. Because they are all used in the same places in the code, just use a single spin lock. Now that the atomic_t variables are gone, we can remove the request size limitation since the code no longer depends on the limited width of atomic_t on some platforms. Test plan: Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx operations, iozone, OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 81 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 34 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bcbc213b4033..3de7c4b07968 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -58,7 +58,6 @@ #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS -#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty); static kmem_cache_t *nfs_direct_cachep; @@ -68,6 +67,8 @@ static kmem_cache_t *nfs_direct_cachep; */ struct nfs_direct_req { struct kref kref; /* release manager */ + + /* I/O parameters */ struct list_head list; /* nfs_read/write_data structs */ struct file * filp; /* file descriptor */ struct kiocb * iocb; /* controlling i/o request */ @@ -75,12 +76,14 @@ struct nfs_direct_req { struct inode * inode; /* target file of i/o */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ - atomic_t complete, /* i/os we're waiting for */ - count, /* bytes actually processed */ + + /* completion state */ + spinlock_t lock; /* protect completion state */ + int outstanding; /* i/os we're waiting for */ + ssize_t count, /* bytes actually processed */ error; /* any reported error */ }; - /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -110,12 +113,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz unsigned long page_count; size_t array_size; - /* set an arbitrary limit to prevent type overflow */ - if (size > MAX_DIRECTIO_SIZE) { - *pages = NULL; - return -EFBIG; - } - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; page_count -= user_addr >> PAGE_SHIFT; @@ -164,8 +161,10 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) init_waitqueue_head(&dreq->wait); INIT_LIST_HEAD(&dreq->list); dreq->iocb = NULL; - atomic_set(&dreq->count, 0); - atomic_set(&dreq->error, 0); + spin_lock_init(&dreq->lock); + dreq->outstanding = 0; + dreq->count = 0; + dreq->error = 0; return dreq; } @@ -181,19 +180,18 @@ static void nfs_direct_req_release(struct kref *kref) */ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) { - int result = -EIOCBQUEUED; + ssize_t result = -EIOCBQUEUED; /* Async requests don't wait here */ if (dreq->iocb) goto out; - result = wait_event_interruptible(dreq->wait, - (atomic_read(&dreq->complete) == 0)); + result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0)); if (!result) - result = atomic_read(&dreq->error); + result = dreq->error; if (!result) - result = atomic_read(&dreq->count); + result = dreq->count; out: kref_put(&dreq->kref, nfs_direct_req_release); @@ -214,9 +212,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_free_user_pages(dreq->pages, dreq->npages, 1); if (dreq->iocb) { - long res = atomic_read(&dreq->error); + long res = (long) dreq->error; if (!res) - res = atomic_read(&dreq->count); + res = (long) dreq->count; aio_complete(dreq->iocb, res, 0); } else wake_up(&dreq->wait); @@ -233,7 +231,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) { struct list_head *list; struct nfs_direct_req *dreq; - unsigned int reads = 0; unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; dreq = nfs_direct_req_alloc(); @@ -259,13 +256,12 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) list_add(&data->pages, list); data->req = (struct nfs_page *) dreq; - reads++; + dreq->outstanding++; if (nbytes <= rsize) break; nbytes -= rsize; } kref_get(&dreq->kref); - atomic_set(&dreq->complete, reads); return dreq; } @@ -276,13 +272,21 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; + + spin_lock(&dreq->lock); + if (likely(task->tk_status >= 0)) - atomic_add(data->res.count, &dreq->count); + dreq->count += data->res.count; else - atomic_set(&dreq->error, task->tk_status); + dreq->error = task->tk_status; + + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; + } - if (unlikely(atomic_dec_and_test(&dreq->complete))) - nfs_direct_complete(dreq); + spin_unlock(&dreq->lock); + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -388,7 +392,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize { struct list_head *list; struct nfs_direct_req *dreq; - unsigned int writes = 0; unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; dreq = nfs_direct_req_alloc(); @@ -414,16 +417,19 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize list_add(&data->pages, list); data->req = (struct nfs_page *) dreq; - writes++; + dreq->outstanding++; if (nbytes <= wsize) break; nbytes -= wsize; } kref_get(&dreq->kref); - atomic_set(&dreq->complete, writes); return dreq; } +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -436,15 +442,22 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) if (unlikely(data->res.verf->committed != NFS_FILE_SYNC)) status = -EIO; + spin_lock(&dreq->lock); + if (likely(status >= 0)) - atomic_add(data->res.count, &dreq->count); + dreq->count += data->res.count; else - atomic_set(&dreq->error, status); + dreq->error = status; - if (unlikely(atomic_dec_and_test(&dreq->complete))) { - nfs_end_data_update(data->inode); - nfs_direct_complete(dreq); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; } + + spin_unlock(&dreq->lock); + + nfs_end_data_update(data->inode); + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_write_direct_ops = { -- cgit v1.2.3 From a37ec012d7fd352648c8455d3396ea24001efcd3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:35 -0500 Subject: NFS: fix data_update accounting in NFS direct I/O path ^C against "iozone -I" is hitting the assertion in nfs_clear_inode(). Test plan: "iozone -i0 -I -a -c" against a slow server, then control C. This should not cause an oops. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3de7c4b07968..737990dd4dfe 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -219,6 +219,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) } else wake_up(&dreq->wait); + iput(dreq->inode); kref_put(&dreq->kref, nfs_direct_req_release); } @@ -374,6 +375,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size dreq->pages = pages; dreq->npages = nr_pages; + igrab(inode); dreq->inode = inode; dreq->filp = iocb->ki_filp; if (!is_sync_kiocb(iocb)) @@ -549,6 +551,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz dreq->pages = pages; dreq->npages = nr_pages; + igrab(inode); dreq->inode = inode; dreq->filp = iocb->ki_filp; if (!is_sync_kiocb(iocb)) -- cgit v1.2.3 From e17b1fc4b35399935f00a635206e183d9292fe4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:35 -0500 Subject: NFS: Make nfs_commit_alloc() extern We need to use nfs_commit_alloc() in fs/nfs/direct.c. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 875f5b060533..f7c8be0beccf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -91,7 +91,7 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); @@ -112,7 +112,7 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) return p; } -static inline void nfs_commit_free(struct nfs_write_data *p) +void nfs_commit_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); -- cgit v1.2.3 From fad61490419b3e494f300e9b2579810ef3bcda31 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:36 -0500 Subject: nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not necessary. This simplifies the internal logic, but this could be a difficult workload for some servers. Instead, let's send UNSTABLE writes, and after they all complete, send a COMMIT for the dirty range. After the COMMIT returns successfully, then do the wake_up or fire off aio_complete(). Test plan: Async direct I/O tests against Solaris (or any server that requires committed unstable writes). Reboot server during test. Based on an earlier patch by Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 199 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 737990dd4dfe..f0f2053c7a61 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -69,11 +69,15 @@ struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ - struct list_head list; /* nfs_read/write_data structs */ + struct list_head list, /* nfs_read/write_data structs */ + rewrite_list; /* saved nfs_write_data structs */ struct file * filp; /* file descriptor */ struct kiocb * iocb; /* controlling i/o request */ wait_queue_head_t wait; /* wait for i/o completion */ struct inode * inode; /* target file of i/o */ + unsigned long user_addr; /* location of user's buffer */ + size_t user_count; /* total bytes to move */ + loff_t pos; /* starting offset in file */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ @@ -82,8 +86,18 @@ struct nfs_direct_req { int outstanding; /* i/os we're waiting for */ ssize_t count, /* bytes actually processed */ error; /* any reported error */ + + /* commit state */ + struct nfs_write_data * commit_data; /* special write_data for commits */ + int flags; +#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ +#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + struct nfs_writeverf verf; /* unstable write verifier */ }; +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -160,11 +174,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_init(&dreq->kref); init_waitqueue_head(&dreq->wait); INIT_LIST_HEAD(&dreq->list); + INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; spin_lock_init(&dreq->lock); dreq->outstanding = 0; dreq->count = 0; dreq->error = 0; + dreq->flags = 0; return dreq; } @@ -299,7 +315,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = { * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) { struct file *file = dreq->filp; struct inode *inode = file->f_mapping->host; @@ -307,11 +323,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long file->private_data; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; size_t rsize = NFS_SERVER(inode)->rsize; unsigned int curpage, pgbase; curpage = 0; - pgbase = user_addr & ~PAGE_MASK; + pgbase = dreq->user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; @@ -373,6 +391,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size if (!dreq) return -ENOMEM; + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; igrab(inode); @@ -383,13 +404,137 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq, user_addr, count, pos); + nfs_direct_read_schedule(dreq); result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; } +static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) +{ + list_splice_init(&dreq->rewrite_list, &dreq->list); + while (!list_empty(&dreq->list)) { + struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_release(data); + } +} + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) +{ + struct list_head *pos; + + list_splice_init(&dreq->rewrite_list, &dreq->list); + list_for_each(pos, &dreq->list) + dreq->outstanding++; + dreq->count = 0; + + nfs_direct_write_schedule(dreq, FLUSH_STABLE); +} + +static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; + if (unlikely(task->tk_status < 0)) { + dreq->error = task->tk_status; + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + dprintk("NFS: %5u commit verify failed\n", task->tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + + dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status); + nfs_direct_write_complete(dreq, data->inode); +} + +static const struct rpc_call_ops nfs_commit_direct_ops = { + .rpc_call_done = nfs_direct_commit_result, + .rpc_release = nfs_commit_release, +}; + +static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) +{ + struct file *file = dreq->filp; + struct nfs_open_context *ctx = (struct nfs_open_context *) + file->private_data; + struct nfs_write_data *data = dreq->commit_data; + struct rpc_task *task = &data->task; + + data->inode = dreq->inode; + data->cred = ctx->cred; + + data->args.fh = NFS_FH(data->inode); + data->args.offset = dreq->pos; + data->args.count = dreq->user_count; + data->res.count = 0; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; + + rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC, + &nfs_commit_direct_ops, data); + NFS_PROTO(data->inode)->commit_setup(data, 0); + + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long)data->inode; + /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ + dreq->commit_data = NULL; + + dprintk("NFS: %5u initiated commit call\n", task->tk_pid); + + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); +} + +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + int flags = dreq->flags; + + dreq->flags = 0; + switch (flags) { + case NFS_ODIRECT_DO_COMMIT: + nfs_direct_commit_schedule(dreq); + break; + case NFS_ODIRECT_RESCHED_WRITES: + nfs_direct_write_reschedule(dreq); + break; + default: + nfs_end_data_update(inode); + if (dreq->commit_data != NULL) + nfs_commit_free(dreq->commit_data); + nfs_direct_free_writedata(dreq); + nfs_direct_complete(dreq); + } +} + +static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = nfs_commit_alloc(0); + if (dreq->commit_data != NULL) + dreq->commit_data->req = (struct nfs_page *) dreq; +} +#else +static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = NULL; +} + +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + nfs_end_data_update(inode); + nfs_direct_free_writedata(dreq); + nfs_direct_complete(dreq); +} +#endif + static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) { struct list_head *list; @@ -424,14 +569,13 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize break; nbytes -= wsize; } + + nfs_alloc_commit_data(dreq); + kref_get(&dreq->kref); return dreq; } -/* - * NB: Return the value of the first error return code. Subsequent - * errors after the first one are ignored. - */ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -440,41 +584,62 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) if (nfs_writeback_done(task, data) != 0) return; - /* If the server fell back to an UNSTABLE write, it's an error. */ - if (unlikely(data->res.verf->committed != NFS_FILE_SYNC)) - status = -EIO; spin_lock(&dreq->lock); if (likely(status >= 0)) dreq->count += data->res.count; else - dreq->error = status; + dreq->error = task->tk_status; + if (data->res.verf->committed != NFS_FILE_SYNC) { + switch (dreq->flags) { + case 0: + memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); + dreq->flags = NFS_ODIRECT_DO_COMMIT; + break; + case NFS_ODIRECT_DO_COMMIT: + if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { + dprintk("NFS: %5u write verify failed\n", task->tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + } + } + /* In case we have to resend */ + data->args.stable = NFS_FILE_SYNC; + + spin_unlock(&dreq->lock); +} + +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ +static void nfs_direct_write_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + + spin_lock(&dreq->lock); if (--dreq->outstanding) { spin_unlock(&dreq->lock); return; } - spin_unlock(&dreq->lock); - nfs_end_data_update(data->inode); - nfs_direct_complete(dreq); + nfs_direct_write_complete(dreq, data->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { .rpc_call_done = nfs_direct_write_result, - .rpc_release = nfs_writedata_release, + .rpc_release = nfs_direct_write_release, }; /* * For each nfs_write_data struct that was allocated on the list, dispatch * an NFS WRITE operation - * - * XXX: For now, support only FILE_SYNC writes. Later we may add - * support for UNSTABLE + COMMIT. */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) { struct file *file = dreq->filp; struct inode *inode = file->f_mapping->host; @@ -482,11 +647,13 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long file->private_data; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int curpage, pgbase; curpage = 0; - pgbase = user_addr & ~PAGE_MASK; + pgbase = dreq->user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; @@ -496,7 +663,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long bytes = count; data = list_entry(list->next, struct nfs_write_data, pages); - list_del_init(&data->pages); + list_move_tail(&data->pages, &dreq->rewrite_list); data->inode = inode; data->cred = ctx->cred; @@ -512,7 +679,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs_write_direct_ops, data); - NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); + NFS_PROTO(inode)->write_setup(data, sync); data->task.tk_priority = RPC_PRIORITY_NORMAL; data->task.tk_cookie = (unsigned long) inode; @@ -544,11 +711,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; + size_t wsize = NFS_SERVER(inode)->wsize; + int sync = 0; - dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize); + dreq = nfs_direct_write_alloc(count, wsize); if (!dreq) return -ENOMEM; + if (dreq->commit_data == NULL || count < wsize) + sync = FLUSH_STABLE; + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; igrab(inode); @@ -562,7 +736,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, user_addr, count, pos); + nfs_direct_write_schedule(dreq, sync); result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); -- cgit v1.2.3 From a8881f5a5c723f82da84b786d3ca83a0df9e0c33 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:36 -0500 Subject: NFS: O_DIRECT async IO may lose context The struct nfs_direct_req currently keeps a pointer to the file descriptor without referencing it. This may cause problems if the parent process is killed. The nfs_open_context should normally have all the information that we're currently using the filp for, and unlike fput(), is safe to release from an rpciod process context. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f0f2053c7a61..e6585b9ff45a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -71,7 +71,7 @@ struct nfs_direct_req { /* I/O parameters */ struct list_head list, /* nfs_read/write_data structs */ rewrite_list; /* saved nfs_write_data structs */ - struct file * filp; /* file descriptor */ + struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ wait_queue_head_t wait; /* wait for i/o completion */ struct inode * inode; /* target file of i/o */ @@ -176,6 +176,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; + dreq->ctx = NULL; spin_lock_init(&dreq->lock); dreq->outstanding = 0; dreq->count = 0; @@ -188,6 +189,9 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) static void nfs_direct_req_release(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + + if (dreq->ctx != NULL) + put_nfs_open_context(dreq->ctx); kmem_cache_free(nfs_direct_cachep, dreq); } @@ -235,7 +239,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) } else wake_up(&dreq->wait); - iput(dreq->inode); kref_put(&dreq->kref, nfs_direct_req_release); } @@ -317,10 +320,8 @@ static const struct rpc_call_ops nfs_read_direct_ops = { */ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) { - struct file *file = dreq->filp; - struct inode *inode = file->f_mapping->host; - struct nfs_open_context *ctx = (struct nfs_open_context *) - file->private_data; + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; size_t count = dreq->user_count; @@ -396,9 +397,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; - igrab(inode); dreq->inode = inode; - dreq->filp = iocb->ki_filp; + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -462,14 +462,11 @@ static const struct rpc_call_ops nfs_commit_direct_ops = { static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - struct file *file = dreq->filp; - struct nfs_open_context *ctx = (struct nfs_open_context *) - file->private_data; struct nfs_write_data *data = dreq->commit_data; struct rpc_task *task = &data->task; data->inode = dreq->inode; - data->cred = ctx->cred; + data->cred = dreq->ctx->cred; data->args.fh = NFS_FH(data->inode); data->args.offset = dreq->pos; @@ -641,10 +638,8 @@ static const struct rpc_call_ops nfs_write_direct_ops = { */ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) { - struct file *file = dreq->filp; - struct inode *inode = file->f_mapping->host; - struct nfs_open_context *ctx = (struct nfs_open_context *) - file->private_data; + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; size_t count = dreq->user_count; @@ -725,9 +720,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; - igrab(inode); dreq->inode = inode; - dreq->filp = iocb->ki_filp; + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; -- cgit v1.2.3 From 5db3a7b2cabe8f0957683f798c4f8fa8605f9ebb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:37 -0500 Subject: NFS: Debugging code for nfs_direct_(read|write)_schedule() Make sure that we're doing our list accounting correctly. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e6585b9ff45a..1e3725e51df1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -339,6 +339,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) if (count < rsize) bytes = count; + BUG_ON(list_empty(list)); data = list_entry(list->next, struct nfs_read_data, pages); list_del_init(&data->pages); @@ -378,6 +379,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) count -= bytes; } while (count != 0); + BUG_ON(!list_empty(list)); } static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) @@ -657,6 +659,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) if (count < wsize) bytes = count; + BUG_ON(list_empty(list)); data = list_entry(list->next, struct nfs_write_data, pages); list_move_tail(&data->pages, &dreq->rewrite_list); @@ -697,6 +700,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) count -= bytes; } while (count != 0); + BUG_ON(!list_empty(list)); } static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) -- cgit v1.2.3 From 3feb2d49394b7874348a6e43c076b780c1d222c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:37 -0500 Subject: NFS: Uninline nfs_writedata_(alloc|free) and nfs_readdata_(alloc|free) Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 32 +++++++++++++++++++++++++++++++- fs/nfs/write.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2da255f0247f..3961524fd4ab 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -40,10 +40,40 @@ static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; static kmem_cache_t *nfs_rdata_cachep; -mempool_t *nfs_rdata_mempool; +static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) +struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +{ + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_rdata_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_readdata_free(struct nfs_read_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_rdata_mempool); +} + void nfs_readdata_release(void *data) { nfs_readdata_free(data); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f7c8be0beccf..647e3217c2e1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -86,7 +86,7 @@ static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; static kmem_cache_t *nfs_wdata_cachep; -mempool_t *nfs_wdata_mempool; +static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); @@ -119,6 +119,36 @@ void nfs_commit_free(struct nfs_write_data *p) mempool_free(p, nfs_commit_mempool); } +struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +{ + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_wdata_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_writedata_free(struct nfs_write_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_wdata_mempool); +} + void nfs_writedata_release(void *wdata) { nfs_writedata_free(wdata); -- cgit v1.2.3 From 606bbba06b11ebcbdf3a4fcd8cce4507c5bd7a4b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:42 -0500 Subject: NFS: fix compiler warnings on 64-bit platforms Introduced by NFS aio+dio patches. Test plan: Compile kernel with CONFIG_NFS enabled on 64-bit hardware. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1e3725e51df1..b4bbf6d75923 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -365,7 +365,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) rpc_execute(&data->task); unlock_kernel(); - dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -686,7 +686,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) rpc_execute(&data->task); unlock_kernel(); - dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), -- cgit v1.2.3 From 6b45d858ed6821dd687efd3b68929de2e4954fec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:43 -0500 Subject: NFS: Clean up nfs_get_user_pages Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 59 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b4bbf6d75923..58830429e42a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -59,7 +59,6 @@ #define NFSDBG_FACILITY NFSDBG_VFS -static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty); static kmem_cache_t *nfs_direct_cachep; /* @@ -121,6 +120,18 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } +static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +{ + int i; + for (i = 0; i < npages; i++) { + struct page *page = pages[i]; + if (do_dirty && !PageCompound(page)) + set_page_dirty_lock(page); + page_cache_release(page); + } + kfree(pages); +} + static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) { int result = -ENOMEM; @@ -138,31 +149,23 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz page_count, (rw == READ), 0, *pages, NULL); up_read(¤t->mm->mmap_sem); - /* - * If we got fewer pages than expected from get_user_pages(), - * the user buffer runs off the end of a mapping; return EFAULT. - */ - if (result >= 0 && result < page_count) { - nfs_free_user_pages(*pages, result, 0); + if (result != page_count) { + /* + * If we got fewer pages than expected from + * get_user_pages(), the user buffer runs off the + * end of a mapping; return EFAULT. + */ + if (result >= 0) { + nfs_free_user_pages(*pages, result, 0); + result = -EFAULT; + } else + kfree(*pages); *pages = NULL; - result = -EFAULT; } } return result; } -static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) -{ - int i; - for (i = 0; i < npages; i++) { - struct page *page = pages[i]; - if (do_dirty && !PageCompound(page)) - set_page_dirty_lock(page); - page_cache_release(page); - } - kfree(pages); -} - static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -788,13 +791,11 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (retval) goto out; - page_count = nfs_get_user_pages(READ, (unsigned long) buf, + retval = nfs_get_user_pages(READ, (unsigned long) buf, count, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - retval = page_count; + if (retval < 0) goto out; - } + page_count = retval; retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, pages, page_count); @@ -862,13 +863,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t if (retval) goto out; - page_count = nfs_get_user_pages(WRITE, (unsigned long) buf, + retval = nfs_get_user_pages(WRITE, (unsigned long) buf, count, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - retval = page_count; + if (retval < 0) goto out; - } + page_count = retval; retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos, pages, page_count); -- cgit v1.2.3 From d72b7a6b26b9009b7a05117fe2e04b3a73ae4a5c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:43 -0500 Subject: NFS: O_DIRECT needs to use a completion Now that we have aio writes, it is possible for dreq->outstanding to be zero, but for the I/O not to have completed. Convert struct nfs_direct_req to use a completion to signal when the I/O is done. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 58830429e42a..cbef57a16ffb 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -72,7 +72,6 @@ struct nfs_direct_req { rewrite_list; /* saved nfs_write_data structs */ struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ - wait_queue_head_t wait; /* wait for i/o completion */ struct inode * inode; /* target file of i/o */ unsigned long user_addr; /* location of user's buffer */ size_t user_count; /* total bytes to move */ @@ -85,6 +84,7 @@ struct nfs_direct_req { int outstanding; /* i/os we're waiting for */ ssize_t count, /* bytes actually processed */ error; /* any reported error */ + struct completion completion; /* wait for i/o completion */ /* commit state */ struct nfs_write_data * commit_data; /* special write_data for commits */ @@ -175,7 +175,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return NULL; kref_init(&dreq->kref); - init_waitqueue_head(&dreq->wait); + init_completion(&dreq->completion); INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; @@ -209,7 +209,7 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) if (dreq->iocb) goto out; - result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0)); + result = wait_for_completion_interruptible(&dreq->completion); if (!result) result = dreq->error; @@ -239,8 +239,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) if (!res) res = (long) dreq->count; aio_complete(dreq->iocb, res, 0); - } else - wake_up(&dreq->wait); + } + complete_all(&dreq->completion); kref_put(&dreq->kref, nfs_direct_req_release); } -- cgit v1.2.3 From e4cd038a45a46ffbe06a1a72f3f15246e5b041ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:44 -0500 Subject: NLM: Fix nlmclnt_test to not copy private part of locks The struct file_lock does not carry a properly initialised lock, so don't copy it as if it were. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ee140c53dba6..6bcbc4d676c4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -399,7 +399,10 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) lock_kernel(); /* Try local locking first */ if (posix_test_lock(filp, fl, &cfl)) { - locks_copy_lock(fl, &cfl); + fl->fl_start = cfl.fl_start; + fl->fl_end = cfl.fl_end; + fl->fl_type = cfl.fl_type; + fl->fl_pid = cfl.fl_pid; goto out; } -- cgit v1.2.3 From f25bc34967d76610d17bc70769d7c220976eeeb1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:46 -0500 Subject: NFSv4: Ensure nfs_callback_down() calls svc_destroy() Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index fcd97406a778..2c042f8d70b5 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -73,6 +73,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) svc_process(serv, rqstp); } + svc_exit_thread(rqstp); nfs_callback_info.pid = 0; complete(&nfs_callback_info.stopped); unlock_kernel(); -- cgit v1.2.3 From 3e4f6290ca4df7464ee066123f2bca4298c2dab4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:46 -0500 Subject: NFSv4: Send the delegation stateid for SETATTR calls In the case where we hold a delegation stateid, use that in for inside SETATTR calls. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 19 +++++++++++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4proc.c | 27 +++++++++++++-------------- 3 files changed, 33 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c6f07c1c71e6..d3be923d4e43 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) nfs_free_delegation(delegation); } } + +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int res = 0; + + if (nfsi->delegation_state == 0) + return 0; + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); + res = 1; + } + spin_unlock(&clp->cl_lock); + return res; +} diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 7a0b2bfce771..3858694652fa 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); static inline int nfs_have_delegation(struct inode *inode, int flags) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bad1eae5608a..62aed077fc2a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1018,12 +1018,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, return res; } -static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, - struct nfs_fh *fhandle, struct iattr *sattr, - struct nfs4_state *state) +static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, + struct iattr *sattr, struct nfs4_state *state) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs_setattrargs arg = { - .fh = fhandle, + .fh = NFS_FH(inode), .iap = sattr, .server = server, .bitmask = server->attr_bitmask, @@ -1042,7 +1042,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, nfs_fattr_init(fattr); - if (state != NULL) { + if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { + /* Use that stateid */ + } else if (state != NULL) { msg.rpc_cred = state->owner->so_cred; nfs4_copy_stateid(&arg.stateid, state, current->files); } else @@ -1054,16 +1056,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, return status; } -static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, - struct nfs_fh *fhandle, struct iattr *sattr, - struct nfs4_state *state) +static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, + struct iattr *sattr, struct nfs4_state *state) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_do_setattr(server, fattr, fhandle, sattr, - state), + _nfs4_do_setattr(inode, fattr, sattr, state), &exception); } while (exception.retry); return err; @@ -1504,8 +1505,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, if (ctx != NULL) state = ctx->state; - status = nfs4_do_setattr(NFS_SERVER(inode), fattr, - NFS_FH(inode), sattr, state); + status = nfs4_do_setattr(inode, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); if (ctx != NULL) @@ -1824,8 +1824,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, d_instantiate(dentry, igrab(state->inode)); if (flags & O_EXCL) { struct nfs_fattr fattr; - status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, - NFS_FH(state->inode), sattr, state); + status = nfs4_do_setattr(state->inode, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); } -- cgit v1.2.3 From 51581f3bf922512880f52a7777923fd6dcfc792b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:47 -0500 Subject: NFSv4: SETCLIENTID_CONFIRM should handle NFS4ERR_DELAY/NFS4ERR_RESOURCE Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 +++++++++++++++++++-- fs/nfs/nfs4state.c | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 62aed077fc2a..31000326aba4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2849,8 +2849,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p return status; } -int -nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) { struct nfs_fsinfo fsinfo; struct rpc_message msg = { @@ -2874,6 +2873,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) return status; } +int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +{ + long timeout; + int err; + do { + err = _nfs4_proc_setclientid_confirm(clp, cred); + switch (err) { + case 0: + return err; + case -NFS4ERR_RESOURCE: + /* The IBM lawyers misread another document! */ + case -NFS4ERR_DELAY: + err = nfs4_delay(clp->cl_rpcclient, &timeout); + } + } while (err == 0); + return err; +} + struct nfs4_delegreturndata { struct nfs4_delegreturnargs args; struct nfs4_delegreturnres res; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index afad0255e7db..96e5b82c153b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -977,6 +977,7 @@ out: out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", NIPQUAD(clp->cl_addr.s_addr), -status); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } -- cgit v1.2.3 From 01d0ae8beaee75d954900109619b700fe68707d9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:48 -0500 Subject: NFSv4: Fix an oops in nfs4_fill_super The mount statistics patches introduced a call to nfs_free_iostats that is not only redundant, but actually causes an oops. Also fix a memory leak due to the lack of a call to nfs_free_iostats on unmount. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 13 ++++++------- fs/nfs/iostat.h | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b9f35ca266c2..17654bffc3c6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -281,6 +281,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) sb->s_magic = NFS_SUPER_MAGIC; + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + return -ENOMEM; + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); /* Did getting the root inode fail? */ if (IS_ERR(root_inode)) { @@ -294,12 +298,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) } sb->s_root->d_op = server->rpc_ops->dentry_ops; - server->io_stats = nfs_alloc_iostats(); - if (!server->io_stats) { - no_root_error = -ENOMEM; - goto out_no_root; - } - /* mount time stamp, in seconds */ server->mount_time = jiffies; @@ -1822,6 +1820,7 @@ static void nfs_kill_super(struct super_block *s) rpciod_down(); /* release rpciod */ + nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); } @@ -2122,7 +2121,6 @@ out_err: out_free: kfree(server->mnt_path); kfree(server->hostname); - nfs_free_iostats(server->io_stats); kfree(server); return s; } @@ -2143,6 +2141,7 @@ static void nfs4_kill_super(struct super_block *sb) rpciod_down(); + nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); } diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 7a7495153317..6350ecbde589 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -156,7 +156,8 @@ static inline struct nfs_iostats *nfs_alloc_iostats(void) static inline void nfs_free_iostats(struct nfs_iostats *stats) { - free_percpu(stats); + if (stats != NULL) + free_percpu(stats); } #endif -- cgit v1.2.3 From 03f28e3a2059fc466761d872122f30acb7be61ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:48 -0500 Subject: NFS: Make nfs_fhget() return appropriate error values Currently it returns NULL, which usually gets interpreted as ENOMEM. In fact it can mean a host of issues. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 10 +++++----- fs/nfs/inode.c | 15 +++++++-------- fs/nfs/nfs4proc.c | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 609185a15c99..06c48b385c94 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -901,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); - if (!inode) + res = (struct dentry *)inode; + if (IS_ERR(res)) goto out_unlock; no_entry: res = d_add_unique(dentry, inode); @@ -1096,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) return NULL; dentry->d_op = NFS_PROTO(dir)->dentry_ops; inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); - if (!inode) { + if (IS_ERR(inode)) { dput(dentry); return NULL; } @@ -1134,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (error < 0) goto out_err; } - error = -ENOMEM; inode = nfs_fhget(dentry->d_sb, fhandle, fattr); - if (inode == NULL) + error = PTR_ERR(inode); + if (IS_ERR(inode)) goto out_err; d_instantiate(dentry, inode); return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 17654bffc3c6..a0cda53461b3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -241,7 +241,6 @@ static struct inode * nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) { struct nfs_server *server = NFS_SB(sb); - struct inode *rooti; int error; error = server->rpc_ops->getroot(server, rootfh, fsinfo); @@ -250,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f return ERR_PTR(error); } - rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); - if (!rooti) - return ERR_PTR(-ENOMEM); - return rooti; + return nfs_fhget(sb, rootfh, fsinfo->fattr); } /* @@ -853,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) .fh = fh, .fattr = fattr }; - struct inode *inode = NULL; + struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -866,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) hash = nfs_fattr_to_ino_t(fattr); - if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) + inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); + if (inode == NULL) { + inode = ERR_PTR(-ENOMEM); goto out_no_inode; + } if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); @@ -936,7 +935,7 @@ out: return inode; out_no_inode: - printk("nfs_fhget: iget failed\n"); + dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 31000326aba4..02c7d8c04c58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -336,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (!(data->f_attr.valid & NFS_ATTR_FATTR)) goto out; inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); - if (inode == NULL) + if (IS_ERR(inode)) goto out; state = nfs4_get_open_state(inode, data->owner); if (state == NULL) -- cgit v1.2.3 From a9a801787a761616589a6526d7a29c13f4deb3d8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:48 -0500 Subject: NFS, NLM: Allow blocking locks to respect signals Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6bcbc4d676c4..5263b2864a44 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -443,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; - sigset_t oldset; int status; - rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); /* * Flush all pending writes before doing anything * with locks.. @@ -464,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) else status = do_vfs_lock(filp, fl); unlock_kernel(); - rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); return status; } static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; - sigset_t oldset; int status; - rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); /* * Flush all pending writes before doing anything * with locks.. @@ -507,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) nfs_sync_mapping(filp->f_mapping); nfs_zap_caches(inode); out: - rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); return status; } -- cgit v1.2.3 From 1dd761e9070aa2e543df3db41bd75ed4b8f2fab9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:49 -0500 Subject: NFSv4: Ensure the callback daemon flushes signals If the callback daemon is signalled, but is unable to exit because it still has users, then we need to flush signals. If not, then svc_recv() can never sleep, and so we hang. If we flush signals, then we also have to be prepared to resend them when we want the thread to exit. Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 2c042f8d70b5..99d2cfbce863 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) complete(&nfs_callback_info.started); - while (nfs_callback_info.users != 0 || !signalled()) { + for(;;) { + if (signalled()) { + if (nfs_callback_info.users == 0) + break; + flush_signals(current); + } /* * Listen for a request on the socket */ @@ -135,11 +140,13 @@ int nfs_callback_down(void) lock_kernel(); down(&nfs_callback_sema); - if (--nfs_callback_info.users || nfs_callback_info.pid == 0) - goto out; - kill_proc(nfs_callback_info.pid, SIGKILL, 1); - wait_for_completion(&nfs_callback_info.stopped); -out: + nfs_callback_info.users--; + do { + if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0) + break; + if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0) + break; + } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); up(&nfs_callback_sema); unlock_kernel(); return ret; -- cgit v1.2.3 From deb7d638262019cbac5d15ab74ffd1c29242c7cb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:50 -0500 Subject: NFS: Fix a race with PG_private and nfs_release_page() We don't need to set PG_private for readahead pages, since they never get unlocked while I/O is in progress. However there is a small race in nfs_readpage_release() whereby the page may be unlocked, and have PG_private set. Fix is to have PG_private set only for the case of writes... Also fix a bug in nfs_clear_page_writeback(): Don't attempt to clear the radix_tree tag if we've already deleted the radix tree entry. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 10 +++++----- fs/nfs/write.c | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d6e076c9dbe1..106aca388ebc 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -88,7 +88,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, BUG_ON(PagePrivate(page)); BUG_ON(!PageLocked(page)); BUG_ON(page->mapping->host != inode); - SetPagePrivate(page); req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; @@ -136,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); - spin_lock(&nfsi->req_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); - spin_unlock(&nfsi->req_lock); + if (req->wb_page != NULL) { + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + } nfs_unlock_request(req); } @@ -153,7 +154,6 @@ void nfs_clear_request(struct nfs_page *req) { struct page *page = req->wb_page; if (page != NULL) { - ClearPagePrivate(page); page_cache_release(page); req->wb_page = NULL; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 647e3217c2e1..d9e5ee594572 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -429,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } + SetPagePrivate(req->wb_page); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -445,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfsi->req_lock); + ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { -- cgit v1.2.3 From 7d46a49f512e8d10b23353781a8ba85edd4fa640 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:50 -0500 Subject: NFS: Clean up nfs_flush_list() Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 62 +++++++++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d9e5ee594572..2beb1268bb1b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -963,7 +963,7 @@ static void nfs_execute_write(struct nfs_write_data *data) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) +static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -1032,16 +1032,13 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) +static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; unsigned int count; - if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(head, inode, how); - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1074,24 +1071,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) return -ENOMEM; } -static int -nfs_flush_list(struct list_head *head, int wpages, int how) +static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) { LIST_HEAD(one_request); - struct nfs_page *req; - int error = 0; - unsigned int pages = 0; + int (*flush_one)(struct inode *, struct list_head *, int); + struct nfs_page *req; + int wpages = NFS_SERVER(inode)->wpages; + int wsize = NFS_SERVER(inode)->wsize; + int error; - while (!list_empty(head)) { - pages += nfs_coalesce_requests(head, &one_request, wpages); + flush_one = nfs_flush_one; + if (wsize < PAGE_CACHE_SIZE) + flush_one = nfs_flush_multi; + /* For single writes, FLUSH_STABLE is more efficient */ + if (npages <= wpages && npages == NFS_I(inode)->npages + && nfs_list_entry(head->next)->wb_bytes <= wsize) + how |= FLUSH_STABLE; + + do { + nfs_coalesce_requests(head, &one_request, wpages); req = nfs_list_entry(one_request.next); - error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); + error = flush_one(inode, &one_request, how); if (error < 0) - break; - } - if (error >= 0) - return pages; - + goto out_err; + } while (!list_empty(head)); + return 0; +out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -1423,24 +1428,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res, - error = 0; + int res; spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); if (res) { - struct nfs_server *server = NFS_SERVER(inode); - - /* For single writes, FLUSH_STABLE is more efficient */ - if (res == nfsi->npages && nfsi->npages <= server->wpages) { - if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) - how |= FLUSH_STABLE; - } - error = nfs_flush_list(&head, server->wpages, how); + int error = nfs_flush_list(inode, &head, res, how); + if (error < 0) + return error; } - if (error < 0) - return error; return res; } @@ -1449,14 +1446,13 @@ int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res, - error = 0; + int res; spin_lock(&nfsi->req_lock); res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); if (res) { - error = nfs_commit_list(inode, &head, how); + int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; } -- cgit v1.2.3 From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:51 -0500 Subject: NFS: Fix a race in nfs_sync_inode() Kudos to Neil Brown for spotting the problem: "in nfs_sync_inode, there is effectively the sequence: nfs_wait_on_requests nfs_flush_inode nfs_commit_inode This seems a bit racy to me as if the only requests are on the ->commit list, and nfs_commit_inode is called separately after nfs_wait_on_requests completes, and before nfs_commit_inode start (say: by nfs_write_inode) then none of these function will return >0, yet there will be some pending request that aren't waited for." The solution is to search for requests to wait upon, search for dirty requests, and search for uncommitted requests while holding the nfsi->req_lock The patch also cleans up nfs_sync_inode(), getting rid of the redundant FLUSH_WAIT flag. It turns out that we were always setting it. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++-- fs/nfs/write.c | 72 ++++++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 52 insertions(+), 24 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a0cda53461b3..60aac58270a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -137,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) static int nfs_write_inode(struct inode *inode, int sync) { - int flags = sync ? FLUSH_WAIT : 0; + int flags = sync ? FLUSH_SYNC : 0; int ret; ret = nfs_commit_inode(inode, flags); @@ -1051,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int err; /* Flush out writes to the server in order to update c/mtime */ - nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); + nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2beb1268bb1b..3f5225404c97 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req) * * Interruptible by signals only if mounted with intr flag. */ -static int -nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; @@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int else idx_end = idx_start + npages - 1; - spin_lock(&nfsi->req_lock); next = idx_start; while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) @@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); + spin_lock(&nfsi->req_lock); if (error < 0) return error; - spin_lock(&nfsi->req_lock); res++; } - spin_unlock(&nfsi->req_lock); return res; } +static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret; + + spin_lock(&nfsi->req_lock); + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + spin_unlock(&nfsi->req_lock); + return ret; +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st } return res; } +#else +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + return 0; +} #endif static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) @@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; +#else +static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + return 0; +} #endif static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, @@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how) } #endif -int nfs_sync_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { + struct nfs_inode *nfsi = NFS_I(inode); + LIST_HEAD(head); int nocommit = how & FLUSH_NOCOMMIT; - int wait = how & FLUSH_WAIT; - int error; - - how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); + int pages, ret; + how &= ~FLUSH_NOCOMMIT; + spin_lock(&nfsi->req_lock); do { - if (wait) { - error = nfs_wait_on_requests(inode, idx_start, npages); - if (error != 0) - continue; - } - error = nfs_flush_inode(inode, idx_start, npages, how); - if (error != 0) + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + if (ret != 0) continue; - if (!nocommit) - error = nfs_commit_inode(inode, how); - } while (error > 0); - return error; + pages = nfs_scan_dirty(inode, &head, idx_start, npages); + if (pages != 0) { + spin_unlock(&nfsi->req_lock); + ret = nfs_flush_list(inode, &head, pages, how); + spin_lock(&nfsi->req_lock); + continue; + } + if (nocommit) + break; + pages = nfs_scan_commit(inode, &head, 0, 0); + if (pages == 0) + break; + spin_unlock(&nfsi->req_lock); + ret = nfs_commit_list(inode, &head, how); + spin_lock(&nfsi->req_lock); + } while (ret >= 0); + spin_unlock(&nfsi->req_lock); + return ret; } int nfs_init_writepagecache(void) -- cgit v1.2.3 From 7a1218a277c45cba1fb8d7089407a1769c645c43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 18:11:10 -0500 Subject: SUNRPC: Ensure rpc_call_async() always calls tk_ops->rpc_release() Currently this will not happen if we exit before rpc_new_task() was called. Also fix up rpc_run_task() to do the same (for consistency). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 02c7d8c04c58..4aba15ad1d27 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -605,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) int status; atomic_inc(&data->count); + /* + * If rpc_run_task() ends up calling ->rpc_release(), we + * want to ensure that it takes the 'error' code path. + */ + data->rpc_status = -ENOMEM; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); - if (IS_ERR(task)) { - nfs4_opendata_free(data); + if (IS_ERR(task)) return PTR_ERR(task); - } status = nfs4_wait_for_completion_rpc_task(task); if (status != 0) { data->cancelled = 1; @@ -708,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) int status; atomic_inc(&data->count); + /* + * If rpc_run_task() ends up calling ->rpc_release(), we + * want to ensure that it takes the 'error' code path. + */ + data->rpc_status = -ENOMEM; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); - if (IS_ERR(task)) { - nfs4_opendata_free(data); + if (IS_ERR(task)) return PTR_ERR(task); - } status = nfs4_wait_for_completion_rpc_task(task); if (status != 0) { data->cancelled = 1; @@ -2959,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data->rpc_status = 0; task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); - if (IS_ERR(task)) { - nfs4_delegreturn_release(data); + if (IS_ERR(task)) return PTR_ERR(task); - } status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) { status = data->rpc_status; @@ -3182,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, struct nfs_seqid *seqid) { struct nfs4_unlockdata *data; - struct rpc_task *task; data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { @@ -3192,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, /* Unlock _before_ we do the RPC call */ do_vfs_lock(fl->fl_file, fl); - task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); - if (IS_ERR(task)) - nfs4_locku_release_calldata(data); - return task; + return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); } static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -3376,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.reclaim = 1; task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC, &nfs4_lock_ops, data); - if (IS_ERR(task)) { - nfs4_lock_release(data); + if (IS_ERR(task)) return PTR_ERR(task); - } ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; -- cgit v1.2.3 From 096455a22acac06fb6d0d75f276170ab72d55ba6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:23:42 -0500 Subject: NFSv4: Dont list system.nfs4_acl for filesystems that don't support it. Thanks to Frank Filz for pointing out that we list system.nfs4_acl extended attribute even on filesystems where we don't actually support nfs4_acl. This is inconsistent with the e.g. ext3 POSIX ACL behaviour, and seems to annoy cp. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4aba15ad1d27..47ece1dd3c67 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3559,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) { size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) + return 0; if (buf && buflen < len) return -ERANGE; if (buf) -- cgit v1.2.3